From 356a8645a1372da596bc4f2ab7916c10580b5ac9 Mon Sep 17 00:00:00 2001 From: Dmitry Muzyka Date: Mon, 23 Feb 2026 10:38:29 +0100 Subject: [PATCH] feat: rollout restart single-replica Deployments instead of evicting For Deployments with replicas=1 and RollingUpdate strategy, trigger a rollout restart (patch pod template annotation) instead of using the Pod Eviction API. This avoids downtime by letting the Deployment controller create a new pod before terminating the old one. Falls through to normal eviction on errors, non-Deployment pods, multi-replica Deployments, or Recreate strategy. Co-Authored-By: Claude Opus 4.6 --- charts/descheduler/templates/clusterrole.yaml | 6 + kubernetes/base/rbac.yaml | 6 + pkg/descheduler/evictions/evictions.go | 115 ++++- pkg/descheduler/evictions/evictions_test.go | 466 ++++++++++++++++++ pkg/descheduler/kubeclientsandbox.go | 3 + 5 files changed, 593 insertions(+), 3 deletions(-) diff --git a/charts/descheduler/templates/clusterrole.yaml b/charts/descheduler/templates/clusterrole.yaml index 605b288861..a0f1d0b2e3 100644 --- a/charts/descheduler/templates/clusterrole.yaml +++ b/charts/descheduler/templates/clusterrole.yaml @@ -27,6 +27,12 @@ rules: - apiGroups: ["policy"] resources: ["poddisruptionbudgets"] verbs: ["get", "watch", "list"] +- apiGroups: ["apps"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "patch"] {{- if .Values.leaderElection.enabled }} - apiGroups: ["coordination.k8s.io"] resources: ["leases"] diff --git a/kubernetes/base/rbac.yaml b/kubernetes/base/rbac.yaml index a87e99415d..67816c2967 100644 --- a/kubernetes/base/rbac.yaml +++ b/kubernetes/base/rbac.yaml @@ -38,6 +38,12 @@ rules: - apiGroups: [""] resources: ["persistentvolumeclaims"] verbs: ["get", "watch", "list"] +- apiGroups: ["apps"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "patch"] --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index d0c2c36bb4..552b73120a 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -25,10 +25,12 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" policy "k8s.io/api/policy/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" @@ -229,6 +231,11 @@ type PodEvictor struct { erCache *evictionRequestsCache featureGates featuregate.FeatureGate + // restartedDeployments tracks "namespace/name" of Deployments already rollout-restarted this cycle + restartedDeployments map[string]bool + // lastRolloutRestart indicates the most recent evictPod call used rollout restart + lastRolloutRestart bool + // registeredHandlers contains the registrations of all handlers. It's used to check if all handlers have finished syncing before the scheduling cycles start. registeredHandlers []cache.ResourceEventHandlerRegistration } @@ -258,6 +265,7 @@ func NewPodEvictor( metricsEnabled: options.metricsEnabled, nodePodCount: make(nodePodEvictedCount), namespacePodCount: make(namespacePodEvictCount), + restartedDeployments: make(map[string]bool), featureGates: featureGates, } @@ -401,6 +409,7 @@ func (pe *PodEvictor) ResetCounters() { pe.nodePodCount = make(nodePodEvictedCount) pe.namespacePodCount = make(namespacePodEvictCount) pe.totalPodCount = 0 + pe.restartedDeployments = make(map[string]bool) } func (pe *PodEvictor) evictionRequestsTotal() uint { @@ -552,10 +561,15 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio metrics.PodsEvictedTotal.With(map[string]string{"result": "success", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc() } + method := "eviction" + if pe.lastRolloutRestart { + method = "rollout-restart" + } + if pe.dryRun { - klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName) + klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName, "method", method) } else { - klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName) + klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName, "method", method) reason := opts.Reason if len(reason) == 0 { reason = opts.StrategyName @@ -563,13 +577,108 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio reason = "NotSet" } } - pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName) + if pe.lastRolloutRestart { + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod rollout-restarted (single-replica) from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName) + } else { + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName) + } } return nil } +// resolveDeploymentOwner walks the owner chain Pod → ReplicaSet → Deployment. +// Returns (nil, nil) if the pod is not owned by a Deployment. +func (pe *PodEvictor) resolveDeploymentOwner(ctx context.Context, pod *v1.Pod) (*appsv1.Deployment, error) { + var rsName string + for _, ref := range pod.OwnerReferences { + if ref.Kind == "ReplicaSet" { + rsName = ref.Name + break + } + } + if rsName == "" { + return nil, nil + } + + rs, err := pe.client.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, rsName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get ReplicaSet %s/%s: %v", pod.Namespace, rsName, err) + } + + var deployName string + for _, ref := range rs.OwnerReferences { + if ref.Kind == "Deployment" { + deployName = ref.Name + break + } + } + if deployName == "" { + return nil, nil + } + + deploy, err := pe.client.AppsV1().Deployments(pod.Namespace).Get(ctx, deployName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get Deployment %s/%s: %v", pod.Namespace, deployName, err) + } + + return deploy, nil +} + +// rolloutRestartDeployment patches the Deployment's pod template annotation to trigger a rolling restart. +func (pe *PodEvictor) rolloutRestartDeployment(ctx context.Context, deploy *appsv1.Deployment) error { + patch := fmt.Sprintf(`{"spec":{"template":{"metadata":{"annotations":{"kubectl.kubernetes.io/restartedAt":"%s"}}}}}`, time.Now().Format(time.RFC3339)) + _, err := pe.client.AppsV1().Deployments(deploy.Namespace).Patch(ctx, deploy.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) + return err +} + +// tryRolloutRestart checks if the pod belongs to a single-replica Deployment with RollingUpdate +// strategy and triggers a rollout restart instead of eviction to avoid downtime. +// Returns (true, true/false, nil) if handled (ignore, rolloutRestarted), +// or (false, false, nil) if normal eviction should proceed. +func (pe *PodEvictor) tryRolloutRestart(ctx context.Context, pod *v1.Pod) (handled bool, ignore bool) { + deploy, err := pe.resolveDeploymentOwner(ctx, pod) + if err != nil { + klog.V(3).InfoS("Failed to resolve Deployment owner, falling through to normal eviction", "pod", klog.KObj(pod), "err", err) + return false, false + } + if deploy == nil { + return false, false + } + + replicas := int32(1) + if deploy.Spec.Replicas != nil { + replicas = *deploy.Spec.Replicas + } + isRecreate := deploy.Spec.Strategy.Type == appsv1.RecreateDeploymentStrategyType + + if replicas != 1 || isRecreate || deploy.Status.UnavailableReplicas != 0 { + return false, false + } + + deployKey := deploy.Namespace + "/" + deploy.Name + if pe.restartedDeployments[deployKey] { + klog.V(3).InfoS("Deployment already rollout-restarted this cycle, skipping", "deployment", deployKey, "pod", klog.KObj(pod)) + return true, true + } + if !pe.dryRun { + if err := pe.rolloutRestartDeployment(ctx, deploy); err != nil { + klog.V(1).InfoS("Failed to rollout restart Deployment, falling through to normal eviction", "deployment", deployKey, "pod", klog.KObj(pod), "err", err) + return false, false + } + } + klog.V(1).InfoS("Triggered rollout restart for single-replica Deployment instead of eviction", "deployment", deployKey, "pod", klog.KObj(pod), "dryRun", pe.dryRun) + pe.restartedDeployments[deployKey] = true + return true, false +} + // return (ignore, err) func (pe *PodEvictor) evictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) (bool, error) { + pe.lastRolloutRestart = false + + if handled, ignore := pe.tryRolloutRestart(ctx, pod); handled { + pe.lastRolloutRestart = !ignore + return ignore, nil + } deleteOptions := &metav1.DeleteOptions{ GracePeriodSeconds: pe.gracePeriodSeconds, } diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 2e133e3885..c5e033f934 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -21,9 +21,11 @@ import ( "fmt" "reflect" "strings" + "sync/atomic" "testing" "time" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" policy "k8s.io/api/policy/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -463,6 +465,470 @@ func TestEvictionRequestsCacheCleanup(t *testing.T) { } } +// helper to build a ReplicaSet owned by a Deployment +func buildTestReplicaSet(name, namespace, deployName string) *appsv1.ReplicaSet { + return &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + OwnerReferences: []metav1.OwnerReference{ + {Kind: "Deployment", Name: deployName, APIVersion: "apps/v1"}, + }, + }, + } +} + +// helper to build a Deployment +func buildTestDeployment(name, namespace string, replicas *int32, strategy appsv1.DeploymentStrategyType) *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: replicas, + Strategy: appsv1.DeploymentStrategy{Type: strategy}, + }, + } +} + +func TestRolloutRestartSingleReplica(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType) + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + var evictionCalled int32 + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + atomic.AddInt32(&evictionCalled, 1) + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for rollout restart") + } + if atomic.LoadInt32(&patchCount) != 1 { + t.Errorf("Expected 1 PATCH on deployments, got %d", atomic.LoadInt32(&patchCount)) + } + if atomic.LoadInt32(&evictionCalled) != 0 { + t.Errorf("Expected 0 eviction API calls, got %d", atomic.LoadInt32(&evictionCalled)) + } +} + +func TestRolloutRestartMultiReplica(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](3), appsv1.RollingUpdateDeploymentStrategyType) + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for normal eviction") + } + if atomic.LoadInt32(&patchCount) != 0 { + t.Errorf("Expected 0 PATCH on deployments for multi-replica, got %d", atomic.LoadInt32(&patchCount)) + } +} + +func TestRolloutRestartNonDeploymentPod(t *testing.T) { + ctx := context.Background() + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "StatefulSet", Name: "ss-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, nil, nil + }) + + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for normal eviction") + } + if atomic.LoadInt32(&patchCount) != 0 { + t.Errorf("Expected 0 PATCH for non-Deployment pod, got %d", atomic.LoadInt32(&patchCount)) + } +} + +func TestRolloutRestartDeduplication(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType) + + pod1 := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + pod2 := test.BuildTestPod("p2", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod1, pod2, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // First call should trigger rollout restart + ignore1, err := podEvictor.evictPod(ctx, pod1, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error on first call: %v", err) + } + if ignore1 { + t.Fatal("Expected ignore=false on first call") + } + + // Second call for same deployment should be ignored (deduplicated) + ignore2, err := podEvictor.evictPod(ctx, pod2, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error on second call: %v", err) + } + if !ignore2 { + t.Fatal("Expected ignore=true on second call (deduplication)") + } + + if atomic.LoadInt32(&patchCount) != 1 { + t.Errorf("Expected exactly 1 PATCH, got %d", atomic.LoadInt32(&patchCount)) + } +} + +func TestRolloutRestartDryRun(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType) + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), + NewOptions().WithDryRun(true)) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for dry-run rollout restart") + } + if atomic.LoadInt32(&patchCount) != 0 { + t.Errorf("Expected 0 PATCH in dry-run mode, got %d", atomic.LoadInt32(&patchCount)) + } + if !podEvictor.restartedDeployments["default/deploy-1"] { + t.Error("Expected restartedDeployments to be set even in dry-run") + } +} + +func TestRolloutRestartResolveFailureFallback(t *testing.T) { + ctx := context.Background() + + // Pod references an RS that doesn't exist → resolveDeploymentOwner fails → fallback to normal eviction + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "nonexistent-rs", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod) + + var evictionCalled int32 + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + atomic.AddInt32(&evictionCalled, 1) + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for fallback eviction") + } + if atomic.LoadInt32(&evictionCalled) != 1 { + t.Errorf("Expected 1 eviction API call (fallback), got %d", atomic.LoadInt32(&evictionCalled)) + } +} + +func TestRolloutRestartRecreateStrategy(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RecreateDeploymentStrategyType) + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + var evictionCalled int32 + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + atomic.AddInt32(&evictionCalled, 1) + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for Recreate strategy") + } + if atomic.LoadInt32(&patchCount) != 0 { + t.Errorf("Expected 0 PATCH for Recreate strategy, got %d", atomic.LoadInt32(&patchCount)) + } + if atomic.LoadInt32(&evictionCalled) != 1 { + t.Errorf("Expected 1 eviction API call for Recreate strategy, got %d", atomic.LoadInt32(&evictionCalled)) + } +} + +func TestRolloutRestartNilReplicas(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + // nil replicas = k8s default of 1 + deploy := buildTestDeployment("deploy-1", "default", nil, appsv1.RollingUpdateDeploymentStrategyType) + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if ignore { + t.Fatal("Expected ignore=false for rollout restart with nil replicas") + } + if atomic.LoadInt32(&patchCount) != 1 { + t.Errorf("Expected 1 PATCH for nil replicas (treated as 1), got %d", atomic.LoadInt32(&patchCount)) + } +} + +func TestRolloutRestartUnhealthyDeployment(t *testing.T) { + ctx := context.Background() + + rs := buildTestReplicaSet("rs-1", "default", "deploy-1") + deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType) + // Simulate a failed rollout: new pod is not ready + deploy.Status.UnavailableReplicas = 1 + + pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) { + p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ + {Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"}, + } + }) + + fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy) + + var patchCount int32 + fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) { + atomic.AddInt32(&patchCount, 1) + return true, deploy, nil + }) + + var evictionCount int32 + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + atomic.AddInt32(&evictionCount, 1) + return true, nil, nil + } + return false, nil, nil + }) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + _, err = podEvictor.evictPod(ctx, pod, EvictOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if atomic.LoadInt32(&patchCount) != 0 { + t.Errorf("Expected no PATCH for unhealthy deployment, got %d", atomic.LoadInt32(&patchCount)) + } + if atomic.LoadInt32(&evictionCount) != 1 { + t.Errorf("Expected normal eviction for unhealthy deployment, got %d", atomic.LoadInt32(&evictionCount)) + } +} + func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) { t.Logf("Assert for events: %v", expected) c := time.After(wait.ForeverTestTimeout) diff --git a/pkg/descheduler/kubeclientsandbox.go b/pkg/descheduler/kubeclientsandbox.go index 5e6420b1ba..f125ebae74 100644 --- a/pkg/descheduler/kubeclientsandbox.go +++ b/pkg/descheduler/kubeclientsandbox.go @@ -22,6 +22,7 @@ import ( "sync" "time" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" schedulingv1 "k8s.io/api/scheduling/v1" @@ -103,6 +104,8 @@ func newDefaultKubeClientSandbox(client clientset.Interface, sharedInformerFacto schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"), + appsv1.SchemeGroupVersion.WithResource("replicasets"), + appsv1.SchemeGroupVersion.WithResource("deployments"), ) }