diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index 7a5344ca25b..e8b71df616e 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -368,6 +368,16 @@ type LoadBalancerSpec struct { BackendPool BackendPool `json:"backendPool,omitempty"` LoadBalancerClassSpec `json:",inline"` + + // AvailabilityZones is a list of availability zones for the load balancer. + // When specified for an internal load balancer, the frontend IP configuration + // will be zone-redundant across the specified zones. + // For public load balancers, this should be set on the associated public IP addresses instead. + // +optional + // +listType=set + // +kubebuilder:validation:MaxItems=3 + // +kubebuilder:validation:items:Pattern=`^[1-3]$` + AvailabilityZones []string `json:"availabilityZones,omitempty"` } // SKU defines an Azure load balancer SKU. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 3ab3aa92d1f..60f3f0cfcb2 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -3455,6 +3455,11 @@ func (in *LoadBalancerSpec) DeepCopyInto(out *LoadBalancerSpec) { } out.BackendPool = in.BackendPool in.LoadBalancerClassSpec.DeepCopyInto(&out.LoadBalancerClassSpec) + if in.AvailabilityZones != nil { + in, out := &in.AvailabilityZones, &out.AvailabilityZones + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LoadBalancerSpec. diff --git a/azure/scope/cluster.go b/azure/scope/cluster.go index d057c254d1e..22a818217aa 100644 --- a/azure/scope/cluster.go +++ b/azure/scope/cluster.go @@ -158,6 +158,7 @@ func (s *ClusterScope) PublicIPSpecs() []azure.ResourceSpecGetter { if s.IsAPIServerPrivate() { // Public IP specs for control plane outbound lb if s.ControlPlaneOutboundLB() != nil { + failureDomains := s.getPublicIPFailureDomains(s.ControlPlaneOutboundLB().AvailabilityZones) for _, ip := range s.ControlPlaneOutboundLB().FrontendIPs { controlPlaneOutboundIPSpecs = append(controlPlaneOutboundIPSpecs, &publicips.PublicIPSpec{ Name: ip.PublicIP.Name, @@ -167,13 +168,14 @@ func (s *ClusterScope) PublicIPSpecs() []azure.ResourceSpecGetter { IsIPv6: false, // Set to default value Location: s.Location(), ExtendedLocation: s.ExtendedLocation(), - FailureDomains: s.FailureDomains(), + FailureDomains: failureDomains, AdditionalTags: s.AdditionalTags(), }) } } } else { if s.ControlPlaneEnabled() { + failureDomains := s.getPublicIPFailureDomains(s.APIServerLB().AvailabilityZones) controlPlaneOutboundIPSpecs = []azure.ResourceSpecGetter{ &publicips.PublicIPSpec{ Name: s.APIServerPublicIP().Name, @@ -183,7 +185,7 @@ func (s *ClusterScope) PublicIPSpecs() []azure.ResourceSpecGetter { ClusterName: s.ClusterName(), Location: s.Location(), ExtendedLocation: s.ExtendedLocation(), - FailureDomains: s.FailureDomains(), + FailureDomains: failureDomains, AdditionalTags: s.AdditionalTags(), IPTags: s.APIServerPublicIP().IPTags, }, @@ -194,6 +196,7 @@ func (s *ClusterScope) PublicIPSpecs() []azure.ResourceSpecGetter { // Public IP specs for node outbound lb if s.NodeOutboundLB() != nil { + failureDomains := s.getPublicIPFailureDomains(s.NodeOutboundLB().AvailabilityZones) for _, ip := range s.NodeOutboundLB().FrontendIPs { publicIPSpecs = append(publicIPSpecs, &publicips.PublicIPSpec{ Name: ip.PublicIP.Name, @@ -203,7 +206,7 @@ func (s *ClusterScope) PublicIPSpecs() []azure.ResourceSpecGetter { IsIPv6: false, // Set to default value Location: s.Location(), ExtendedLocation: s.ExtendedLocation(), - FailureDomains: s.FailureDomains(), + FailureDomains: failureDomains, AdditionalTags: s.AdditionalTags(), }) } @@ -270,6 +273,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } if s.APIServerLB().FrontendIPs != nil { @@ -304,6 +308,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } privateIPFound := false @@ -351,6 +356,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.NodeOutboundLB().IdleTimeoutInMinutes, Role: infrav1.NodeOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.NodeOutboundLB().AvailabilityZones, }) } @@ -372,6 +378,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.ControlPlaneOutboundLB().IdleTimeoutInMinutes, Role: infrav1.ControlPlaneOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.ControlPlaneOutboundLB().AvailabilityZones, }) } @@ -1021,6 +1028,23 @@ func (s *ClusterScope) FailureDomains() []*string { return fds } +// getPublicIPFailureDomains returns the failure domains to use for public IP addresses. +// If availability zones are explicitly specified on the load balancer, those zones are used. +// Otherwise, falls back to the cluster's failure domains. +// +// This is important because for public load balancers, zone-redundancy is achieved by setting +// zones on the public IP address resource, NOT on the load balancer's frontend IP configuration. +// Azure returns error "LoadBalancerFrontendIPConfigCannotHaveZoneWhenReferencingPublicIPAddress" +// if zones are specified on a frontend that references a public IP. +// +// See https://learn.microsoft.com/en-us/azure/reliability/reliability-load-balancer for details. +func (s *ClusterScope) getPublicIPFailureDomains(lbAvailabilityZones []string) []*string { + if len(lbAvailabilityZones) > 0 { + return azure.PtrSlice(&lbAvailabilityZones) + } + return s.FailureDomains() +} + // SetControlPlaneSecurityRules sets the default security rules of the control plane subnet. // Note that this is not done in a webhook as it requires a valid Cluster object to exist to get the API Server port. func (s *ClusterScope) SetControlPlaneSecurityRules() { diff --git a/azure/scope/cluster_test.go b/azure/scope/cluster_test.go index a22393a532c..527336bce48 100644 --- a/azure/scope/cluster_test.go +++ b/azure/scope/cluster_test.go @@ -904,6 +904,149 @@ func TestPublicIPSpecs(t *testing.T) { }, }, }, + { + name: "Azure cluster with public LB and AvailabilityZones uses LB zones instead of failure domains", + azureCluster: &infrav1.AzureCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + }, + Status: infrav1.AzureClusterStatus{ + FailureDomains: map[string]clusterv1beta1.FailureDomainSpec{ + "failure-domain-id-1": {}, + "failure-domain-id-2": {}, + "failure-domain-id-3": {}, + }, + }, + Spec: infrav1.AzureClusterSpec{ + ResourceGroup: "my-rg", + ControlPlaneEnabled: true, + AzureClusterClassSpec: infrav1.AzureClusterClassSpec{ + Location: "centralIndia", + AdditionalTags: infrav1.Tags{ + "Name": "my-publicip-ipv6", + "sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": "owned", + }, + }, + NetworkSpec: infrav1.NetworkSpec{ + APIServerLB: &infrav1.LoadBalancerSpec{ + FrontendIPs: []infrav1.FrontendIP{ + { + PublicIP: &infrav1.PublicIPSpec{ + Name: "my-apiserver-ip", + DNSName: "my-cluster.centralIndia.cloudapp.azure.com", + }, + }, + }, + AvailabilityZones: []string{"1", "2", "3"}, + LoadBalancerClassSpec: infrav1.LoadBalancerClassSpec{ + Type: infrav1.Public, + }, + }, + }, + }, + }, + expectedPublicIPSpec: []azure.ResourceSpecGetter{ + &publicips.PublicIPSpec{ + Name: "my-apiserver-ip", + ResourceGroup: "my-rg", + DNSName: "my-cluster.centralIndia.cloudapp.azure.com", + IsIPv6: false, + ClusterName: "my-cluster", + Location: "centralIndia", + FailureDomains: []*string{ptr.To("1"), ptr.To("2"), ptr.To("3")}, + AdditionalTags: infrav1.Tags{ + "Name": "my-publicip-ipv6", + "sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": "owned", + }, + }, + }, + }, + { + name: "Azure cluster with internal LB and AvailabilityZones and node outbound LB with zones", + azureCluster: &infrav1.AzureCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + }, + Status: infrav1.AzureClusterStatus{ + FailureDomains: map[string]clusterv1beta1.FailureDomainSpec{ + "failure-domain-id-1": {}, + "failure-domain-id-2": {}, + "failure-domain-id-3": {}, + }, + }, + Spec: infrav1.AzureClusterSpec{ + ResourceGroup: "my-rg", + ControlPlaneEnabled: true, + AzureClusterClassSpec: infrav1.AzureClusterClassSpec{ + Location: "centralIndia", + AdditionalTags: infrav1.Tags{ + "Name": "my-publicip-ipv6", + "sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": "owned", + }, + }, + NetworkSpec: infrav1.NetworkSpec{ + APIServerLB: &infrav1.LoadBalancerSpec{ + AvailabilityZones: []string{"1", "2", "3"}, + LoadBalancerClassSpec: infrav1.LoadBalancerClassSpec{ + Type: infrav1.Internal, + }, + }, + ControlPlaneOutboundLB: &infrav1.LoadBalancerSpec{ + FrontendIPsCount: ptr.To[int32](1), + FrontendIPs: []infrav1.FrontendIP{ + { + Name: "cp-outbound-frontend", + PublicIP: &infrav1.PublicIPSpec{ + Name: "pip-cp-outbound", + }, + }, + }, + AvailabilityZones: []string{"1", "2"}, + }, + NodeOutboundLB: &infrav1.LoadBalancerSpec{ + FrontendIPsCount: ptr.To[int32](1), + FrontendIPs: []infrav1.FrontendIP{ + { + Name: "node-outbound-frontend", + PublicIP: &infrav1.PublicIPSpec{ + Name: "pip-node-outbound", + }, + }, + }, + AvailabilityZones: []string{"1", "3"}, + }, + }, + }, + }, + expectedPublicIPSpec: []azure.ResourceSpecGetter{ + &publicips.PublicIPSpec{ + Name: "pip-cp-outbound", + ResourceGroup: "my-rg", + DNSName: "", + IsIPv6: false, + ClusterName: "my-cluster", + Location: "centralIndia", + FailureDomains: []*string{ptr.To("1"), ptr.To("2")}, + AdditionalTags: infrav1.Tags{ + "Name": "my-publicip-ipv6", + "sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": "owned", + }, + }, + &publicips.PublicIPSpec{ + Name: "pip-node-outbound", + ResourceGroup: "my-rg", + DNSName: "", + IsIPv6: false, + ClusterName: "my-cluster", + Location: "centralIndia", + FailureDomains: []*string{ptr.To("1"), ptr.To("3")}, + AdditionalTags: infrav1.Tags{ + "Name": "my-publicip-ipv6", + "sigs.k8s.io_cluster-api-provider-azure_cluster_my-cluster": "owned", + }, + }, + }, + }, } for _, tc := range tests { diff --git a/azure/services/loadbalancers/loadbalancers_test.go b/azure/services/loadbalancers/loadbalancers_test.go index 3a61789a507..47d65e1e4eb 100644 --- a/azure/services/loadbalancers/loadbalancers_test.go +++ b/azure/services/loadbalancers/loadbalancers_test.go @@ -111,6 +111,59 @@ var ( APIServerPort: 6443, } + fakeInternalAPILBSpecWithZones = LBSpec{ + Name: "my-private-lb", + ResourceGroup: "my-rg", + SubscriptionID: "123", + ClusterName: "my-cluster", + Location: "my-location", + Role: infrav1.APIServerRole, + Type: infrav1.Internal, + SKU: infrav1.SKUStandard, + SubnetName: "my-cp-subnet", + BackendPoolName: "my-private-lb-backendPool", + IdleTimeoutInMinutes: ptr.To[int32](4), + AvailabilityZones: []string{"1", "2", "3"}, + FrontendIPConfigs: []infrav1.FrontendIP{ + { + Name: "my-private-lb-frontEnd", + FrontendIPClass: infrav1.FrontendIPClass{ + PrivateIPAddress: "10.0.0.10", + }, + }, + }, + APIServerPort: 6443, + } + + // fakePublicAPILBSpecWithZones tests that zones are NOT applied to public LB frontends. + // Azure does not allow zones on frontend IP configurations that reference public IP addresses. + // Instead, zone-redundancy for public LBs is achieved by setting zones on the public IP itself. + // See: https://learn.microsoft.com/en-us/azure/reliability/reliability-load-balancer#zone-redundant-load-balancer + fakePublicAPILBSpecWithZones = LBSpec{ + Name: "my-publiclb", + ResourceGroup: "my-rg", + SubscriptionID: "123", + ClusterName: "my-cluster", + Location: "my-location", + Role: infrav1.APIServerRole, + Type: infrav1.Public, + SKU: infrav1.SKUStandard, + SubnetName: "my-cp-subnet", + BackendPoolName: "my-publiclb-backendPool", + IdleTimeoutInMinutes: ptr.To[int32](4), + AvailabilityZones: []string{"1", "2", "3"}, // These should NOT be applied to frontend + FrontendIPConfigs: []infrav1.FrontendIP{ + { + Name: "my-publiclb-frontEnd", + PublicIP: &infrav1.PublicIPSpec{ + Name: "my-publicip", + DNSName: "my-cluster.12345.mydomain.com", + }, + }, + }, + APIServerPort: 6443, + } + fakeNodeOutboundLBSpec = LBSpec{ Name: "my-cluster", ResourceGroup: "my-rg", diff --git a/azure/services/loadbalancers/spec.go b/azure/services/loadbalancers/spec.go index cbdf9ffe48e..97858b194b2 100644 --- a/azure/services/loadbalancers/spec.go +++ b/azure/services/loadbalancers/spec.go @@ -48,6 +48,7 @@ type LBSpec struct { IdleTimeoutInMinutes *int32 AdditionalTags map[string]string AdditionalPorts []infrav1.LoadBalancerPort + AvailabilityZones []string } // ResourceName returns the name of the load balancer. @@ -167,6 +168,27 @@ func (s *LBSpec) Parameters(_ context.Context, existing any) (parameters any, er func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, []*armnetwork.SubResource) { frontendIPConfigurations := make([]*armnetwork.FrontendIPConfiguration, 0) frontendIDs := make([]*armnetwork.SubResource, 0) + + // Convert availability zones to []*string for Azure SDK. + // IMPORTANT: Zones can only be set on frontend IP configurations for internal load balancers + // (where the frontend references a subnet). For public load balancers, zone-redundancy is + // achieved by setting zones on the associated public IP address resource, NOT on the load + // balancer's frontend IP configuration. + // + // Azure returns error "LoadBalancerFrontendIPConfigCannotHaveZoneWhenReferencingPublicIPAddress" + // if zones are specified on a frontend that references a public IP. + // + // See: https://learn.microsoft.com/en-us/azure/reliability/reliability-load-balancer#zone-redundant-load-balancer + // Section: "Zone-redundant load balancer" - "For public load balancers, if the public IP in the + // Load balancer's frontend is zone redundant then the load balancer is also zone-redundant." + var zones []*string + if len(lbSpec.AvailabilityZones) > 0 && lbSpec.Type == infrav1.Internal { + zones = make([]*string, len(lbSpec.AvailabilityZones)) + for i, zone := range lbSpec.AvailabilityZones { + zones[i] = ptr.To(zone) + } + } + for _, ipConfig := range lbSpec.FrontendIPConfigs { var properties armnetwork.FrontendIPConfigurationPropertiesFormat if lbSpec.Type == infrav1.Internal { @@ -187,6 +209,7 @@ func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, frontendIPConfigurations = append(frontendIPConfigurations, &armnetwork.FrontendIPConfiguration{ Properties: &properties, Name: ptr.To(ipConfig.Name), + Zones: zones, }) frontendIDs = append(frontendIDs, &armnetwork.SubResource{ ID: ptr.To(azure.FrontendIPConfigID(lbSpec.SubscriptionID, lbSpec.ResourceGroup, lbSpec.Name, ipConfig.Name)), diff --git a/azure/services/loadbalancers/spec_test.go b/azure/services/loadbalancers/spec_test.go index 691e1ce617b..390505f3fb8 100644 --- a/azure/services/loadbalancers/spec_test.go +++ b/azure/services/loadbalancers/spec_test.go @@ -178,6 +178,39 @@ func TestParameters(t *testing.T) { }, expectedError: "", }, + { + name: "internal load balancer with availability zones", + spec: &fakeInternalAPILBSpecWithZones, + existing: nil, + expect: func(g *WithT, result interface{}) { + g.Expect(result).To(BeAssignableToTypeOf(armnetwork.LoadBalancer{})) + lb := result.(armnetwork.LoadBalancer) + // Verify zones are set on frontend IP configuration for internal LBs + g.Expect(lb.Properties.FrontendIPConfigurations).To(HaveLen(1)) + g.Expect(lb.Properties.FrontendIPConfigurations[0].Zones).To(HaveLen(3)) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[0]).To(Equal("1")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[1]).To(Equal("2")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[2]).To(Equal("3")) + }, + expectedError: "", + }, + { + name: "public load balancer with availability zones - zones NOT applied to frontend", + spec: &fakePublicAPILBSpecWithZones, + existing: nil, + expect: func(g *WithT, result interface{}) { + g.Expect(result).To(BeAssignableToTypeOf(armnetwork.LoadBalancer{})) + lb := result.(armnetwork.LoadBalancer) + // Verify zones are NOT set on frontend IP configuration for public LBs. + // Azure does not allow zones on frontend IP configs that reference public IPs. + // Zone-redundancy for public LBs is achieved by setting zones on the public IP itself. + // See: https://learn.microsoft.com/en-us/azure/reliability/reliability-load-balancer#zone-redundant-load-balancer + g.Expect(lb.Properties.FrontendIPConfigurations).To(HaveLen(1)) + g.Expect(lb.Properties.FrontendIPConfigurations[0].Zones).To(BeNil(), + "zones should not be set on public LB frontend - Azure error: LoadBalancerFrontendIPConfigCannotHaveZoneWhenReferencingPublicIPAddress") + }, + expectedError: "", + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml index e42c11a7ad9..9f4fcf1e103 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml @@ -686,6 +686,18 @@ spec: description: APIServerLB is the configuration for the control-plane load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + pattern: ^[1-3]$ + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -769,6 +781,18 @@ spec: ControlPlaneOutboundLB is the configuration for the control-plane outbound load balancer. This is different from APIServerLB, and is used only in private clusters (optionally) for enabling outbound traffic. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + pattern: ^[1-3]$ + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -851,6 +875,18 @@ spec: description: NodeOutboundLB is the configuration for the node outbound load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + pattern: ^[1-3]$ + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md index 685def09456..a04d6931faa 100644 --- a/docs/book/src/SUMMARY.md +++ b/docs/book/src/SUMMARY.md @@ -38,6 +38,7 @@ - [Flatcar](./self-managed/flatcar.md) - [GPU-enabled Clusters](./self-managed/gpu.md) - [IPv6](./self-managed/ipv6.md) + - [Load Balancer Zone Redundancy](./self-managed/load-balancer-zone-redundancy.md) - [Machine Pools (VMSS)](./self-managed/machinepools.md) - [Node Outbound Connection](./self-managed/node-outbound-connection.md) - [Spot Virtual Machines](./self-managed/spot-vms.md) diff --git a/docs/book/src/self-managed/load-balancer-zone-redundancy.md b/docs/book/src/self-managed/load-balancer-zone-redundancy.md new file mode 100644 index 00000000000..95e346840d3 --- /dev/null +++ b/docs/book/src/self-managed/load-balancer-zone-redundancy.md @@ -0,0 +1,342 @@ +# Load Balancer Zone Redundancy + +## Zone Redundancy for Load Balancers in Azure + +Azure Load Balancers can be configured as zone-redundant to ensure high availability across multiple availability zones within a region. A zone-redundant load balancer distributes traffic across all zones, providing resilience against zone failures. + +**Key concepts:** +- Zone redundancy for load balancers is configured through the **frontend IP configuration** +- For **internal load balancers**, zones are set directly on the frontend IP configuration +- For **public load balancers**, zones are inherited from the zone configuration of the public IP address +- **Zones are immutable** - once created, they cannot be changed, added, or removed + +Full details can be found in the [Azure Load Balancer reliability documentation](https://learn.microsoft.com/azure/reliability/reliability-load-balancer). + +## How Azure Implements Zone-Redundant Load Balancers + +It's important to understand how Azure handles zone configuration differently for internal and public load balancers. This is a critical Azure platform behavior that affects how CAPZ configures your infrastructure. + +### Internal Load Balancers + +For internal (private) load balancers, zone-redundancy is configured by setting the `zones` property directly on the **frontend IP configuration**. The frontend IP references a subnet (not a public IP), and Azure allows zones to be specified on subnet-based frontends. + +From the [Azure documentation](https://learn.microsoft.com/azure/reliability/reliability-load-balancer#zone-redundant-load-balancer): + +> "For internal load balancers, set the sku.name property to Standard and set the properties.frontendIPConfigurations[\*].zones property to at least two availability zones." + +### Public Load Balancers + +For public load balancers, **you cannot set zones on the frontend IP configuration**. Instead, zone-redundancy is achieved by setting zones on the **public IP address resource** that the frontend references. + +From the [Azure documentation](https://learn.microsoft.com/azure/reliability/reliability-load-balancer#zone-redundant-load-balancer): + +> "For public load balancers, if the public IP in the Load balancer's frontend is zone redundant then the load balancer is also zone-redundant." + +If you attempt to set zones on a public load balancer's frontend IP configuration, Azure returns the error: + +``` +LoadBalancerFrontendIPConfigCannotHaveZoneWhenReferencingPublicIPAddress: +Load balancer frontendIPConfiguration has zones specified and is referencing a publicIPAddress. +Networking supports zones only for frontendIpconfigurations which reference a subnet. +``` + +### How CAPZ Handles This + +When you specify `availabilityZones` on a load balancer in CAPZ: + +| Load Balancer Type | Where Zones Are Applied | +|-------------------|------------------------| +| **Internal** (type: Internal) | Frontend IP configuration | +| **Public** (type: Public) | Associated public IP address resource | + +This means that for public load balancers, CAPZ automatically applies the zones you specify to the public IP addresses, ensuring zone-redundancy without Azure API errors. + +## Configuring Zone-Redundant Load Balancers + +CAPZ exposes the `availabilityZones` field on load balancer specifications to enable zone redundancy. + +### Internal Load Balancers + +For internal load balancers (such as a private API server), you can configure availability zones directly on the load balancer spec: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" +``` + +This configuration creates a zone-redundant internal load balancer with frontend IPs distributed across zones 1, 2, and 3. + +### Public Load Balancers + +For public load balancers, zone redundancy is controlled by the public IP addresses. When you specify `availabilityZones` on a public load balancer, CAPZ automatically applies those zones to the associated public IP addresses: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: api-server-ip + publicIP: + name: api-server-publicip + dnsName: my-cluster.eastus.cloudapp.azure.com +``` + +When this configuration is applied, CAPZ will: +1. Create the public IP `api-server-publicip` with zones `["1", "2", "3"]` +2. Create the load balancer frontend referencing that public IP (without zones on the frontend itself) + +This results in a zone-redundant public load balancer that complies with Azure's requirements. + +### Node Outbound Load Balancer + +You can also configure zone redundancy for node outbound load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: westus2 + networkSpec: + nodeOutboundLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip +``` + +### Control Plane Outbound Load Balancer + +For clusters with private API servers, you can configure the control plane outbound load balancer: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" + controlPlaneOutboundLB: + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: controlplane-outbound-ip + publicIP: + name: controlplane-outbound-publicip +``` + +## Complete Example: Highly Available Cluster + +Here's a complete example of a highly available cluster with zone-redundant load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ha-cluster + namespace: default +spec: + location: eastus + resourceGroup: ha-cluster-rg + networkSpec: + # Zone-redundant internal API server load balancer + apiServerLB: + type: Internal + name: ha-cluster-internal-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: api-server-internal-ip + privateIPAddress: "10.0.0.100" + + # Zone-redundant control plane outbound load balancer + controlPlaneOutboundLB: + name: ha-cluster-cp-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: cp-outbound-ip + publicIP: + name: cp-outbound-publicip + + # Zone-redundant node outbound load balancer + nodeOutboundLB: + name: ha-cluster-node-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip + + # Custom VNet configuration + vnet: + name: ha-cluster-vnet + cidrBlocks: + - "10.0.0.0/16" + + subnets: + - name: control-plane-subnet + role: control-plane + cidrBlocks: + - "10.0.0.0/24" + - name: node-subnet + role: node + cidrBlocks: + - "10.0.1.0/24" +``` + +## Important Considerations + +### Immutability + +Once a load balancer is created with availability zones, the zone configuration **cannot be changed**. This is an Azure platform limitation. To change zones, you must: + +1. Delete the load balancer +2. Recreate it with the new zone configuration + +> **Warning**: Changing load balancer zones requires recreating the cluster's load balancers, which will cause service interruption. + +### Region Support + +Not all Azure regions support availability zones. Before configuring zone-redundant load balancers, verify that your target region supports zones: + +```bash +az vm list-skus -l --zone -o table +``` + +### Standard SKU Requirement + +Zone-redundant load balancers require the **Standard SKU**. CAPZ uses Standard SKU by default, so no additional configuration is needed. + +### Backend Pool Placement + +For optimal high availability: +- Spread your control plane nodes across all availability zones +- Spread your worker nodes across all availability zones +- Ensure backend pool members exist in the same zones as the load balancer + +See the [Failure Domains](failure-domains.md) documentation for details on distributing VMs across zones. + +## Migration from Non-Zone-Redundant Load Balancers + +If you have an existing cluster without zone-redundant load balancers, migration requires careful planning: + +### For New Clusters + +When creating a new cluster, simply include the `availabilityZones` field in your `AzureCluster` specification from the start. + +### For Existing Clusters + +**Migration is not straightforward** because: +1. Azure does not allow modifying zones on existing load balancers +2. CAPZ's webhook validation prevents zone changes to enforce this immutability +3. Load balancer recreation requires cluster downtime + +**Recommended approach for existing clusters:** +1. Create a new cluster with zone-redundant configuration +2. Migrate workloads to the new cluster +3. Decommission the old cluster + +**Alternative for development/test clusters:** +1. Delete the `AzureCluster` resource (this will delete the infrastructure) +2. Recreate the `AzureCluster` with `availabilityZones` configured +3. Reconcile the cluster + +> **Important**: The alternative approach causes significant downtime and should only be used in non-production environments. + +## Troubleshooting + +### Load Balancer Not Zone-Redundant + +If your load balancer is not zone-redundant despite configuration: + +1. **Verify the zones are set in spec:** + ```bash + kubectl get azurecluster -o jsonpath='{.spec.networkSpec.apiServerLB.availabilityZones}' + ``` + +2. **Check the Azure load balancer frontend configuration:** + ```bash + az network lb frontend-ip show \ + --lb-name \ + --name \ + --resource-group \ + --query zones + ``` + +3. **Verify the region supports zones:** + ```bash + az vm list-skus -l --zone -o table | grep -i standardsku + ``` + +### Validation Errors + +If you encounter validation errors when updating `availabilityZones`: + +``` +field is immutable +``` + +This is expected behavior. Zones cannot be modified after creation. You must recreate the load balancer with the desired configuration. + +## Best Practices + +1. **Enable zone redundancy from the start** when creating new clusters in zone-capable regions +2. **Use all available zones** in the region (typically 3 zones) for maximum resilience +3. **Spread backend pools** across all zones configured on the load balancer +4. **Monitor zone health** and be prepared to handle zone failures +5. **Test failover scenarios** to ensure your cluster can survive zone outages +6. **Document your zone configuration** for disaster recovery procedures + +## Related Documentation + +- [Failure Domains](failure-domains.md) - Configure VMs across availability zones +- [API Server Endpoint](api-server-endpoint.md) - API server load balancer configuration +- [Azure Load Balancer Reliability](https://learn.microsoft.com/azure/reliability/reliability-load-balancer) - Azure official documentation diff --git a/internal/webhooks/azurecluster_webhook.go b/internal/webhooks/azurecluster_webhook.go index 16c47c6cfc6..605a356ffac 100644 --- a/internal/webhooks/azurecluster_webhook.go +++ b/internal/webhooks/azurecluster_webhook.go @@ -150,6 +150,43 @@ func (*AzureClusterWebhook) ValidateUpdate(_ context.Context, old, c *infrav1.Az allErrs = append(allErrs, err) } + // Validate availability zones are immutable for load balancers + if c.Spec.NetworkSpec.APIServerLB != nil && old.Spec.NetworkSpec.APIServerLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + old.Spec.NetworkSpec.APIServerLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "apiServerLB", "availabilityZones"), + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.NodeOutboundLB != nil && old.Spec.NetworkSpec.NodeOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "nodeOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.ControlPlaneOutboundLB != nil && old.Spec.NetworkSpec.ControlPlaneOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "controlPlaneOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + allErrs = append(allErrs, validateAzureClusterSubnetUpdate(c, old)...) if len(allErrs) == 0 { diff --git a/internal/webhooks/azurecluster_webhook_test.go b/internal/webhooks/azurecluster_webhook_test.go index 4265c2c9048..56dc0622d27 100644 --- a/internal/webhooks/azurecluster_webhook_test.go +++ b/internal/webhooks/azurecluster_webhook_test.go @@ -332,6 +332,92 @@ func TestAzureCluster_ValidateUpdate(t *testing.T) { }(), wantErr: true, }, + { + name: "apiServerLB availability zones are immutable", + oldCluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + cluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2"} + return cluster + }(), + wantErr: true, + }, + { + name: "apiServerLB availability zones cannot be set when previously unset", + oldCluster: apifixtures.CreateValidCluster(), + cluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + wantErr: true, + }, + { + name: "apiServerLB availability zones cannot be removed", + oldCluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + cluster: apifixtures.CreateValidCluster(), + wantErr: true, + }, + { + name: "apiServerLB availability zones unchanged is valid", + oldCluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + cluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + wantErr: false, + }, + { + name: "nodeOutboundLB availability zones are immutable", + oldCluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones = []string{"1", "2", "3"} + return cluster + }(), + cluster: func() *infrav1.AzureCluster { + cluster := apifixtures.CreateValidCluster() + cluster.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones = []string{"1"} + return cluster + }(), + wantErr: true, + }, + { + name: "controlPlaneOutboundLB availability zones are immutable", + oldCluster: &infrav1.AzureCluster{ + Spec: infrav1.AzureClusterSpec{ + NetworkSpec: infrav1.NetworkSpec{ + ControlPlaneOutboundLB: &infrav1.LoadBalancerSpec{ + Name: "cp-lb", + AvailabilityZones: []string{"1", "2", "3"}, + }, + }, + }, + }, + cluster: &infrav1.AzureCluster{ + Spec: infrav1.AzureClusterSpec{ + NetworkSpec: infrav1.NetworkSpec{ + ControlPlaneOutboundLB: &infrav1.LoadBalancerSpec{ + Name: "cp-lb", + AvailabilityZones: []string{"1"}, + }, + }, + }, + }, + wantErr: true, + }, { name: "natGateway name can be empty before AzureCluster is updated", oldCluster: apifixtures.CreateValidCluster(), diff --git a/templates/cluster-template-private.yaml b/templates/cluster-template-private.yaml index 786b6d52fc2..759fe70d84f 100644 --- a/templates/cluster-template-private.yaml +++ b/templates/cluster-template-private.yaml @@ -32,6 +32,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" name: ${CLUSTER_NAME}-internal-lb type: Internal controlPlaneOutboundLB: diff --git a/templates/flavors/private/patches/private-lb.yaml b/templates/flavors/private/patches/private-lb.yaml index 76e1539df2a..a2933e29963 100644 --- a/templates/flavors/private/patches/private-lb.yaml +++ b/templates/flavors/private/patches/private-lb.yaml @@ -7,6 +7,10 @@ spec: apiServerLB: name: ${CLUSTER_NAME}-internal-lb type: Internal + availabilityZones: + - "1" + - "2" + - "3" nodeOutboundLB: frontendIPsCount: 1 controlPlaneOutboundLB: diff --git a/templates/test/ci/cluster-template-prow-private.yaml b/templates/test/ci/cluster-template-prow-private.yaml index 26910fdcb43..47d6e99cc7a 100644 --- a/templates/test/ci/cluster-template-prow-private.yaml +++ b/templates/test/ci/cluster-template-prow-private.yaml @@ -49,6 +49,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" frontendIPs: - name: ${CLUSTER_NAME}-internal-lb-frontend privateIP: ${AZURE_INTERNAL_LB_IP} diff --git a/templates/test/ci/prow-private/patches/vnet-peerings.yaml b/templates/test/ci/prow-private/patches/vnet-peerings.yaml index fc51c6a5d5e..daafe8b7da5 100644 --- a/templates/test/ci/prow-private/patches/vnet-peerings.yaml +++ b/templates/test/ci/prow-private/patches/vnet-peerings.yaml @@ -10,6 +10,10 @@ spec: privateIP: ${AZURE_INTERNAL_LB_IP} name: ${CLUSTER_NAME}-internal-lb type: Internal + availabilityZones: + - "1" + - "2" + - "3" vnet: name: ${AZURE_VNET_NAME} cidrBlocks: diff --git a/test/e2e/azure_privatecluster.go b/test/e2e/azure_privatecluster.go index e169aa1ea8c..cf416c9678a 100644 --- a/test/e2e/azure_privatecluster.go +++ b/test/e2e/azure_privatecluster.go @@ -40,6 +40,7 @@ import ( "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/controller-runtime/pkg/client" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure" ) @@ -202,6 +203,55 @@ func AzurePrivateClusterSpec(ctx context.Context, inputGetter func() AzurePrivat Expect(err).NotTo(HaveOccurred()) } + + // Verify zone-redundant load balancer configuration + { + By("verifying the internal API server load balancer has zone-redundant frontend IPs") + expectedZones := []string{"1", "2", "3"} + + azureCluster := &infrav1.AzureCluster{} + err := publicClusterProxy.GetClient().Get(ctx, client.ObjectKey{ + Namespace: input.Namespace.Name, + Name: clusterName, + }, azureCluster) + Expect(err).NotTo(HaveOccurred()) + + Expect(azureCluster.Spec.NetworkSpec.APIServerLB).NotTo(BeNil()) + Expect(azureCluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones).To(Equal(expectedZones), + "APIServerLB should have availability zones configured in AzureCluster spec") + + resourceGroupName := azureCluster.Spec.ResourceGroup + Expect(resourceGroupName).NotTo(BeEmpty()) + + cred, err := azidentity.NewDefaultAzureCredential(nil) + Expect(err).NotTo(HaveOccurred()) + + lbClient, err := armnetwork.NewLoadBalancersClient(getSubscriptionID(Default), cred, nil) + Expect(err).NotTo(HaveOccurred()) + + lbName := azureCluster.Spec.NetworkSpec.APIServerLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Internal LB frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Internal LB frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured on internal LB frontend IP", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } } // SetupExistingVNet creates a resource group and a VNet to be used by a workload cluster.