diff --git a/google/resource_container_cluster.go b/google/resource_container_cluster.go index 765e83ba..01def4ae 100644 --- a/google/resource_container_cluster.go +++ b/google/resource_container_cluster.go @@ -730,6 +730,11 @@ func resourceContainerClusterCreate(d *schema.ResourceData, meta interface{}) er timeoutInMinutes := int(d.Timeout(schema.TimeoutCreate).Minutes()) waitErr := containerOperationWait(config, op, project, location, "creating GKE cluster", timeoutInMinutes) if waitErr != nil { + if deleteErr := cleanFailedContainerCluster(d, meta); deleteErr != nil { + log.Printf("[WARN] Unable to clean up cluster from failed creation: %s", deleteErr) + } else { + log.Printf("[WARN] Verified failed creation of cluster %s was cleaned up", d.Id()) + } // The resource didn't actually create d.SetId("") return waitErr @@ -1308,16 +1313,9 @@ func resourceContainerClusterDelete(d *schema.ResourceData, meta interface{}) er return err } - var location string - locations := []string{} - if regionName, isRegionalCluster := d.GetOk("region"); !isRegionalCluster { - location, err = getZone(d, config) - if err != nil { - return err - } - locations = append(locations, location) - } else { - location = regionName.(string) + location, err := getLocation(d, config) + if err != nil { + return err } clusterName := d.Get("name").(string) @@ -1363,6 +1361,44 @@ func resourceContainerClusterDelete(d *schema.ResourceData, meta interface{}) er return nil } +// cleanFailedContainerCluster deletes clusters that failed but were +// created in an error state. Similar to resourceContainerClusterDelete +// but implemented in separate function as it doesn't try to lock already +// locked cluster state, does different error handling, and doesn't do retries. +func cleanFailedContainerCluster(d *schema.ResourceData, meta interface{}) error { + config := meta.(*Config) + + project, err := getProject(d, config) + if err != nil { + return err + } + + location, err := getLocation(d, config) + if err != nil { + return err + } + + clusterName := d.Get("name").(string) + fullName := containerClusterFullName(project, location, clusterName) + + log.Printf("[DEBUG] Cleaning up failed GKE cluster %s", d.Get("name").(string)) + op, err := config.clientContainerBeta.Projects.Locations.Clusters.Delete(fullName).Do() + if err != nil { + return handleNotFoundError(err, d, fmt.Sprintf("Container Cluster %q", d.Get("name").(string))) + } + + // Wait until it's deleted + timeoutInMinutes := int(d.Timeout(schema.TimeoutDelete).Minutes()) + waitErr := containerOperationWait(config, op, project, location, "deleting GKE cluster", timeoutInMinutes) + if waitErr != nil { + return waitErr + } + + log.Printf("[INFO] GKE cluster %s has been deleted", d.Id()) + d.SetId("") + return nil +} + // container engine's API currently mistakenly returns the instance group manager's // URL instead of the instance group's URL in its responses. This shim detects that // error, and corrects it, by fetching the instance group manager URL and retrieving diff --git a/google/resource_container_cluster_test.go b/google/resource_container_cluster_test.go index 679f2290..1f996ae3 100644 --- a/google/resource_container_cluster_test.go +++ b/google/resource_container_cluster_test.go @@ -1214,6 +1214,60 @@ func TestAccContainerCluster_withResourceLabelsUpdate(t *testing.T) { }) } +func TestAccContainerCluster_errorCleanDanglingCluster(t *testing.T) { + t.Parallel() + + prefix := acctest.RandString(10) + clusterName := fmt.Sprintf("cluster-test-%s", prefix) + clusterNameError := fmt.Sprintf("cluster-test-err-%s", prefix) + + initConfig := testAccContainerCluster_withInitialCIDR(clusterName) + overlapConfig := testAccContainerCluster_withCIDROverlap(initConfig, clusterNameError) + + resource.Test(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckContainerClusterDestroy, + Steps: []resource.TestStep{ + { + Config: initConfig, + }, + { + ResourceName: "google_container_cluster.cidr_error_preempt", + ImportStateIdPrefix: "us-central1-a/", + ImportState: true, + ImportStateVerify: true, + }, + { + Config: overlapConfig, + ExpectError: regexp.MustCompile("Error waiting for creating GKE cluster"), + }, + // If dangling cluster wasn't deleted, this plan will return an error + { + Config: overlapConfig, + PlanOnly: true, + ExpectNonEmptyPlan: true, + }, + }, + }) +} + +func TestAccContainerCluster_errorNoClusterCreated(t *testing.T) { + t.Parallel() + + resource.Test(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckContainerClusterDestroy, + Steps: []resource.TestStep{ + { + Config: testAccContainerCluster_withInvalidLocation("wonderland"), + ExpectError: regexp.MustCompile(`Location "wonderland" does not exist`), + }, + }, + }) +} + func testAccCheckContainerClusterDestroy(s *terraform.State) error { config := testAccProvider.Meta().(*Config) @@ -2296,3 +2350,47 @@ resource "google_container_cluster" "with_resource_labels" { } `, clusterName) } + +func testAccContainerCluster_withInitialCIDR(clusterName string) string { + return fmt.Sprintf(` +resource "google_container_cluster" "cidr_error_preempt" { + name = "%s" + zone = "us-central1-a" + + initial_node_count = 1 + + ip_allocation_policy { + cluster_ipv4_cidr_block = "10.3.0.0/19" + services_ipv4_cidr_block = "10.4.0.0/19" + } +} +`, clusterName) +} + +func testAccContainerCluster_withCIDROverlap(initConfig, secondCluster string) string { + return fmt.Sprintf(` +%s + +resource "google_container_cluster" "cidr_error_overlap" { + name = "%s" + zone = "us-central1-a" + + initial_node_count = 1 + + ip_allocation_policy { + cluster_ipv4_cidr_block = "10.3.0.0/19" + services_ipv4_cidr_block = "10.4.0.0/19" + } +} +`, initConfig, secondCluster) +} + +func testAccContainerCluster_withInvalidLocation(location string) string { + return fmt.Sprintf(` +resource "google_container_cluster" "with_resource_labels" { + name = "invalid-gke-cluster" + zone = "%s" + initial_node_count = 1 +} +`, location) +}