Clean up failed cluster creation (#3044)

Signed-off-by: Modular Magician <magic-modules@google.com>
This commit is contained in:
The Magician 2019-02-14 10:29:34 -08:00 committed by emily
parent 4631fa5651
commit b2f501754f
2 changed files with 144 additions and 10 deletions

View File

@ -730,6 +730,11 @@ func resourceContainerClusterCreate(d *schema.ResourceData, meta interface{}) er
timeoutInMinutes := int(d.Timeout(schema.TimeoutCreate).Minutes())
waitErr := containerOperationWait(config, op, project, location, "creating GKE cluster", timeoutInMinutes)
if waitErr != nil {
if deleteErr := cleanFailedContainerCluster(d, meta); deleteErr != nil {
log.Printf("[WARN] Unable to clean up cluster from failed creation: %s", deleteErr)
} else {
log.Printf("[WARN] Verified failed creation of cluster %s was cleaned up", d.Id())
}
// The resource didn't actually create
d.SetId("")
return waitErr
@ -1308,16 +1313,9 @@ func resourceContainerClusterDelete(d *schema.ResourceData, meta interface{}) er
return err
}
var location string
locations := []string{}
if regionName, isRegionalCluster := d.GetOk("region"); !isRegionalCluster {
location, err = getZone(d, config)
if err != nil {
return err
}
locations = append(locations, location)
} else {
location = regionName.(string)
location, err := getLocation(d, config)
if err != nil {
return err
}
clusterName := d.Get("name").(string)
@ -1363,6 +1361,44 @@ func resourceContainerClusterDelete(d *schema.ResourceData, meta interface{}) er
return nil
}
// cleanFailedContainerCluster deletes clusters that failed but were
// created in an error state. Similar to resourceContainerClusterDelete
// but implemented in separate function as it doesn't try to lock already
// locked cluster state, does different error handling, and doesn't do retries.
func cleanFailedContainerCluster(d *schema.ResourceData, meta interface{}) error {
config := meta.(*Config)
project, err := getProject(d, config)
if err != nil {
return err
}
location, err := getLocation(d, config)
if err != nil {
return err
}
clusterName := d.Get("name").(string)
fullName := containerClusterFullName(project, location, clusterName)
log.Printf("[DEBUG] Cleaning up failed GKE cluster %s", d.Get("name").(string))
op, err := config.clientContainerBeta.Projects.Locations.Clusters.Delete(fullName).Do()
if err != nil {
return handleNotFoundError(err, d, fmt.Sprintf("Container Cluster %q", d.Get("name").(string)))
}
// Wait until it's deleted
timeoutInMinutes := int(d.Timeout(schema.TimeoutDelete).Minutes())
waitErr := containerOperationWait(config, op, project, location, "deleting GKE cluster", timeoutInMinutes)
if waitErr != nil {
return waitErr
}
log.Printf("[INFO] GKE cluster %s has been deleted", d.Id())
d.SetId("")
return nil
}
// container engine's API currently mistakenly returns the instance group manager's
// URL instead of the instance group's URL in its responses. This shim detects that
// error, and corrects it, by fetching the instance group manager URL and retrieving

View File

@ -1214,6 +1214,60 @@ func TestAccContainerCluster_withResourceLabelsUpdate(t *testing.T) {
})
}
func TestAccContainerCluster_errorCleanDanglingCluster(t *testing.T) {
t.Parallel()
prefix := acctest.RandString(10)
clusterName := fmt.Sprintf("cluster-test-%s", prefix)
clusterNameError := fmt.Sprintf("cluster-test-err-%s", prefix)
initConfig := testAccContainerCluster_withInitialCIDR(clusterName)
overlapConfig := testAccContainerCluster_withCIDROverlap(initConfig, clusterNameError)
resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckContainerClusterDestroy,
Steps: []resource.TestStep{
{
Config: initConfig,
},
{
ResourceName: "google_container_cluster.cidr_error_preempt",
ImportStateIdPrefix: "us-central1-a/",
ImportState: true,
ImportStateVerify: true,
},
{
Config: overlapConfig,
ExpectError: regexp.MustCompile("Error waiting for creating GKE cluster"),
},
// If dangling cluster wasn't deleted, this plan will return an error
{
Config: overlapConfig,
PlanOnly: true,
ExpectNonEmptyPlan: true,
},
},
})
}
func TestAccContainerCluster_errorNoClusterCreated(t *testing.T) {
t.Parallel()
resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckContainerClusterDestroy,
Steps: []resource.TestStep{
{
Config: testAccContainerCluster_withInvalidLocation("wonderland"),
ExpectError: regexp.MustCompile(`Location "wonderland" does not exist`),
},
},
})
}
func testAccCheckContainerClusterDestroy(s *terraform.State) error {
config := testAccProvider.Meta().(*Config)
@ -2296,3 +2350,47 @@ resource "google_container_cluster" "with_resource_labels" {
}
`, clusterName)
}
func testAccContainerCluster_withInitialCIDR(clusterName string) string {
return fmt.Sprintf(`
resource "google_container_cluster" "cidr_error_preempt" {
name = "%s"
zone = "us-central1-a"
initial_node_count = 1
ip_allocation_policy {
cluster_ipv4_cidr_block = "10.3.0.0/19"
services_ipv4_cidr_block = "10.4.0.0/19"
}
}
`, clusterName)
}
func testAccContainerCluster_withCIDROverlap(initConfig, secondCluster string) string {
return fmt.Sprintf(`
%s
resource "google_container_cluster" "cidr_error_overlap" {
name = "%s"
zone = "us-central1-a"
initial_node_count = 1
ip_allocation_policy {
cluster_ipv4_cidr_block = "10.3.0.0/19"
services_ipv4_cidr_block = "10.4.0.0/19"
}
}
`, initConfig, secondCluster)
}
func testAccContainerCluster_withInvalidLocation(location string) string {
return fmt.Sprintf(`
resource "google_container_cluster" "with_resource_labels" {
name = "invalid-gke-cluster"
zone = "%s"
initial_node_count = 1
}
`, location)
}