diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/AROSwift/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/AROSwift/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml index ba0322c7603..0480c2b29ca 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/AROSwift/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/AROSwift/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml @@ -310,7 +310,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -318,7 +319,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" @@ -334,6 +349,10 @@ spec: fieldRef: apiVersion: v1 fieldPath: metadata.namespace + - name: ETCD_EXPECTED_MEMBER_COUNT + value: "3" + - name: ETCD_QUORUM_MIN_MEMBERS + value: "2" image: etcd imagePullPolicy: IfNotPresent name: reset-member diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/GCP/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/GCP/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml index 902857d72a0..e75da18901c 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/GCP/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/GCP/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml @@ -331,7 +331,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -339,7 +340,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" @@ -355,6 +370,10 @@ spec: fieldRef: apiVersion: v1 fieldPath: metadata.namespace + - name: ETCD_EXPECTED_MEMBER_COUNT + value: "3" + - name: ETCD_QUORUM_MIN_MEMBERS + value: "2" image: etcd imagePullPolicy: IfNotPresent name: reset-member diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/IBMCloud/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/IBMCloud/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml index ba0322c7603..0480c2b29ca 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/IBMCloud/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/IBMCloud/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml @@ -310,7 +310,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -318,7 +319,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" @@ -334,6 +349,10 @@ spec: fieldRef: apiVersion: v1 fieldPath: metadata.namespace + - name: ETCD_EXPECTED_MEMBER_COUNT + value: "3" + - name: ETCD_QUORUM_MIN_MEMBERS + value: "2" image: etcd imagePullPolicy: IfNotPresent name: reset-member diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml index ba0322c7603..0480c2b29ca 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/TechPreviewNoUpgrade/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml @@ -310,7 +310,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -318,7 +319,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" @@ -334,6 +349,10 @@ spec: fieldRef: apiVersion: v1 fieldPath: metadata.namespace + - name: ETCD_EXPECTED_MEMBER_COUNT + value: "3" + - name: ETCD_QUORUM_MIN_MEMBERS + value: "2" image: etcd imagePullPolicy: IfNotPresent name: reset-member diff --git a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml index ba0322c7603..0480c2b29ca 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/testdata/etcd/zz_fixture_TestControlPlaneComponents_etcd_statefulset.yaml @@ -310,7 +310,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -318,7 +319,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" @@ -334,6 +349,10 @@ spec: fieldRef: apiVersion: v1 fieldPath: metadata.namespace + - name: ETCD_EXPECTED_MEMBER_COUNT + value: "3" + - name: ETCD_QUORUM_MIN_MEMBERS + value: "2" image: etcd imagePullPolicy: IfNotPresent name: reset-member diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/assets/etcd/statefulset.yaml b/control-plane-operator/controllers/hostedcontrolplane/v2/assets/etcd/statefulset.yaml index cac5289f08b..bc2ec3cf58e 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/v2/assets/etcd/statefulset.yaml +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/assets/etcd/statefulset.yaml @@ -227,7 +227,8 @@ spec: echo "Checking if cluster is functional" if etcdctl member list; then echo "Cluster is functional" - MEMBER_ID=$(etcdctl member list -w simple | grep "${HOSTNAME}" | awk -F, '{ print $1 }') + MEMBER_LIST=$(etcdctl member list -w simple) + MEMBER_ID=$(echo "${MEMBER_LIST}" | grep "${HOSTNAME}" | awk -F, '{ print $1 }') if [[ -n "${MEMBER_ID}" ]]; then echo "A member with this name (${HOSTNAME}) already exists, removing" etcdctl member remove "${MEMBER_ID}" @@ -235,7 +236,21 @@ spec: etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 echo "existing" > /etc/etcd/clusterstate/state else - echo "A member does not exist with name (${HOSTNAME}), nothing to do" + echo "A member does not exist with name (${HOSTNAME}), evaluating straggler join" + if [[ -n "${ETCD_EXPECTED_MEMBER_COUNT:-}" && -n "${ETCD_QUORUM_MIN_MEMBERS:-}" ]]; then + MEMBER_COUNT=$(echo "${MEMBER_LIST}" | grep -c . || true) + if [[ "${MEMBER_COUNT}" -ge "${ETCD_QUORUM_MIN_MEMBERS}" && "${MEMBER_COUNT}" -lt "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster reports at least quorum members (${MEMBER_COUNT} >= ${ETCD_QUORUM_MIN_MEMBERS} for size ${ETCD_EXPECTED_MEMBER_COUNT}); adding this member dynamically" + etcdctl member add ${HOSTNAME} --peer-urls https://${HOSTNAME}.etcd-discovery.${NAMESPACE}.svc:2380 + echo "existing" > /etc/etcd/clusterstate/state + elif [[ "${MEMBER_COUNT}" -ge "${ETCD_EXPECTED_MEMBER_COUNT}" ]]; then + echo "Cluster already has the expected member count (${MEMBER_COUNT}/${ETCD_EXPECTED_MEMBER_COUNT}); refusing to grow membership dynamically" + else + echo "Cluster membership (${MEMBER_COUNT}) is below quorum (${ETCD_QUORUM_MIN_MEMBERS}); static bootstrap (new) path" + fi + else + echo "Skipping straggler join: ETCD_EXPECTED_MEMBER_COUNT and ETCD_QUORUM_MIN_MEMBERS must both be set; not adding member dynamically" + fi fi else echo "Cannot list members in cluster, so likely not up yet" diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership.go b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership.go new file mode 100644 index 00000000000..eac5ec73437 --- /dev/null +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership.go @@ -0,0 +1,21 @@ +package etcd + +// etcdRaftQuorumSize returns the minimum number of voting members that must be +// present for a Raft quorum for a cluster of clusterSize members. +func etcdRaftQuorumSize(clusterSize int) int { + if clusterSize <= 0 { + return 0 + } + return clusterSize/2 + 1 +} + +// resetMemberStragglerJoinQuorumMet returns true when the number of members +// reported by etcdctl member list is already sufficient for the cluster to have +// raft quorum for an expected cluster of expectedClusterSize, and the cluster +// is not already at the expected size (so dynamic member add is appropriate). +func resetMemberStragglerJoinQuorumMet(memberCount, expectedClusterSize int) bool { + if expectedClusterSize <= 0 { + return false + } + return memberCount >= etcdRaftQuorumSize(expectedClusterSize) && memberCount < expectedClusterSize +} diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership_test.go b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership_test.go new file mode 100644 index 00000000000..b8fdcbd694d --- /dev/null +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/membership_test.go @@ -0,0 +1,125 @@ +package etcd + +import ( + "testing" + + . "github.com/onsi/gomega" +) + +func TestEtcdRaftQuorumSize(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + clusterSize int + expectedVotes int + }{ + { + name: "When clusterSize is zero, it should return zero", + clusterSize: 0, + expectedVotes: 0, + }, + { + name: "When clusterSize is negative, it should return zero", + clusterSize: -1, + expectedVotes: 0, + }, + { + name: "When clusterSize is one, it should return one", + clusterSize: 1, + expectedVotes: 1, + }, + { + name: "When clusterSize is three, it should return two", + clusterSize: 3, + expectedVotes: 2, + }, + { + name: "When clusterSize is five, it should return three", + clusterSize: 5, + expectedVotes: 3, + }, + { + name: "When clusterSize is four, it should return three", + clusterSize: 4, + expectedVotes: 3, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + g.Expect(etcdRaftQuorumSize(tc.clusterSize)).To( + Equal(tc.expectedVotes), + "Calculated quorum size for cluster size %d should be %d", + tc.clusterSize, tc.expectedVotes) + }) + } +} + +func TestResetMemberStragglerJoinQuorumMet(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + memberCount int + expectedClusterSz int + expectStragglerJoin bool + }{ + { + name: "When expected cluster size is zero, it should return false", + memberCount: 3, + expectedClusterSz: 0, + expectStragglerJoin: false, + }, + { + name: "When member count is below quorum for three members, it should return false", + memberCount: 1, + expectedClusterSz: 3, + expectStragglerJoin: false, + }, + { + name: "When member count meets quorum for three members, it should return true", + memberCount: 2, + expectedClusterSz: 3, + expectStragglerJoin: true, + }, + { + name: "When member count is already at expected cluster size, it should return false", + memberCount: 3, + expectedClusterSz: 3, + expectStragglerJoin: false, + }, + { + name: "When member count is two below quorum for five members, it should return false", + memberCount: 2, + expectedClusterSz: 5, + expectStragglerJoin: false, + }, + { + name: "When member count meets quorum for five members, it should return true", + memberCount: 3, + expectedClusterSz: 5, + expectStragglerJoin: true, + }, + { + name: "When member count is at expected cluster size for five members, it should return false", + memberCount: 5, + expectedClusterSz: 5, + expectStragglerJoin: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + got := resetMemberStragglerJoinQuorumMet(tc.memberCount, tc.expectedClusterSz) + g.Expect(got).To( + Equal(tc.expectStragglerJoin), + "Expected straggler join for member count %d and expected cluster size %d should be %v", + tc.memberCount, tc.expectedClusterSz, tc.expectStragglerJoin) + }) + } +} diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/statefulset.go b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/statefulset.go index 49de8fc17d3..aa19cbb2650 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/statefulset.go +++ b/control-plane-operator/controllers/hostedcontrolplane/v2/etcd/statefulset.go @@ -21,13 +21,15 @@ func adaptStatefulSet(cpContext component.WorkloadContext, sts *appsv1.StatefulS hcp := cpContext.HCP managedEtcdSpec := hcp.Spec.Etcd.Managed + replicas := component.DefaultReplicas(hcp, &etcd{}, ComponentName) + quorumMinMembers := etcdRaftQuorumSize(int(replicas)) + ipv4, err := netutil.IsIPv4CIDR(hcp.Spec.Networking.ClusterNetwork[0].CIDR.String()) if err != nil { return fmt.Errorf("error checking the ClusterNetworkCIDR: %v", err) } podspec.UpdateContainer(ComponentName, sts.Spec.Template.Spec.Containers, func(c *corev1.Container) { - replicas := component.DefaultReplicas(hcp, &etcd{}, ComponentName) var members []string for i := range replicas { name := fmt.Sprintf("etcd-%d", i) @@ -56,6 +58,17 @@ func adaptStatefulSet(cpContext component.WorkloadContext, sts *appsv1.StatefulS } }) + podspec.UpdateContainer("reset-member", sts.Spec.Template.Spec.InitContainers, func(c *corev1.Container) { + podspec.UpsertEnvVar(c, corev1.EnvVar{ + Name: "ETCD_EXPECTED_MEMBER_COUNT", + Value: fmt.Sprintf("%d", replicas), + }) + podspec.UpsertEnvVar(c, corev1.EnvVar{ + Name: "ETCD_QUORUM_MIN_MEMBERS", + Value: fmt.Sprintf("%d", quorumMinMembers), + }) + }) + podspec.UpdateContainer("etcd-metrics", sts.Spec.Template.Spec.Containers, func(c *corev1.Container) { var loInterface, allInterfaces string if ipv4 {