From 356a8645a1372da596bc4f2ab7916c10580b5ac9 Mon Sep 17 00:00:00 2001
From: Dmitry Muzyka <a5b1ca+git@gmail.com>
Date: Mon, 23 Feb 2026 10:38:29 +0100
Subject: [PATCH] feat: rollout restart single-replica Deployments instead of
 evicting

For Deployments with replicas=1 and RollingUpdate strategy, trigger a
rollout restart (patch pod template annotation) instead of using the
Pod Eviction API. This avoids downtime by letting the Deployment
controller create a new pod before terminating the old one.

Falls through to normal eviction on errors, non-Deployment pods,
multi-replica Deployments, or Recreate strategy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 charts/descheduler/templates/clusterrole.yaml |   6 +
 kubernetes/base/rbac.yaml                     |   6 +
 pkg/descheduler/evictions/evictions.go        | 115 ++++-
 pkg/descheduler/evictions/evictions_test.go   | 466 ++++++++++++++++++
 pkg/descheduler/kubeclientsandbox.go          |   3 +
 5 files changed, 593 insertions(+), 3 deletions(-)

diff --git a/charts/descheduler/templates/clusterrole.yaml b/charts/descheduler/templates/clusterrole.yaml
index 605b288861..a0f1d0b2e3 100644
--- a/charts/descheduler/templates/clusterrole.yaml
+++ b/charts/descheduler/templates/clusterrole.yaml
@@ -27,6 +27,12 @@ rules:
 - apiGroups: ["policy"]
   resources: ["poddisruptionbudgets"]
   verbs: ["get", "watch", "list"]
+- apiGroups: ["apps"]
+  resources: ["replicasets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["apps"]
+  resources: ["deployments"]
+  verbs: ["get", "patch"]
 {{- if .Values.leaderElection.enabled }}
 - apiGroups: ["coordination.k8s.io"]
   resources: ["leases"]
diff --git a/kubernetes/base/rbac.yaml b/kubernetes/base/rbac.yaml
index a87e99415d..67816c2967 100644
--- a/kubernetes/base/rbac.yaml
+++ b/kubernetes/base/rbac.yaml
@@ -38,6 +38,12 @@ rules:
 - apiGroups: [""]
   resources: ["persistentvolumeclaims"]
   verbs: ["get", "watch", "list"]
+- apiGroups: ["apps"]
+  resources: ["replicasets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["apps"]
+  resources: ["deployments"]
+  verbs: ["get", "patch"]
 ---
 kind: Role
 apiVersion: rbac.authorization.k8s.io/v1
diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go
index d0c2c36bb4..552b73120a 100644
--- a/pkg/descheduler/evictions/evictions.go
+++ b/pkg/descheduler/evictions/evictions.go
@@ -25,10 +25,12 @@ import (
 
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
+	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	policy "k8s.io/api/policy/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/wait"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/tools/cache"
@@ -229,6 +231,11 @@ type PodEvictor struct {
 	erCache                          *evictionRequestsCache
 	featureGates                     featuregate.FeatureGate
 
+	// restartedDeployments tracks "namespace/name" of Deployments already rollout-restarted this cycle
+	restartedDeployments map[string]bool
+	// lastRolloutRestart indicates the most recent evictPod call used rollout restart
+	lastRolloutRestart bool
+
 	// registeredHandlers contains the registrations of all handlers. It's used to check if all handlers have finished syncing before the scheduling cycles start.
 	registeredHandlers []cache.ResourceEventHandlerRegistration
 }
@@ -258,6 +265,7 @@ func NewPodEvictor(
 		metricsEnabled:                   options.metricsEnabled,
 		nodePodCount:                     make(nodePodEvictedCount),
 		namespacePodCount:                make(namespacePodEvictCount),
+		restartedDeployments:             make(map[string]bool),
 		featureGates:                     featureGates,
 	}
 
@@ -401,6 +409,7 @@ func (pe *PodEvictor) ResetCounters() {
 	pe.nodePodCount = make(nodePodEvictedCount)
 	pe.namespacePodCount = make(namespacePodEvictCount)
 	pe.totalPodCount = 0
+	pe.restartedDeployments = make(map[string]bool)
 }
 
 func (pe *PodEvictor) evictionRequestsTotal() uint {
@@ -552,10 +561,15 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
 		metrics.PodsEvictedTotal.With(map[string]string{"result": "success", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
 	}
 
+	method := "eviction"
+	if pe.lastRolloutRestart {
+		method = "rollout-restart"
+	}
+
 	if pe.dryRun {
-		klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
+		klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName, "method", method)
 	} else {
-		klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName)
+		klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", opts.StrategyName, "node", pod.Spec.NodeName, "profile", opts.ProfileName, "method", method)
 		reason := opts.Reason
 		if len(reason) == 0 {
 			reason = opts.StrategyName
@@ -563,13 +577,108 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
 				reason = "NotSet"
 			}
 		}
-		pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
+		if pe.lastRolloutRestart {
+			pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod rollout-restarted (single-replica) from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
+		} else {
+			pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
+		}
 	}
 	return nil
 }
 
+// resolveDeploymentOwner walks the owner chain Pod → ReplicaSet → Deployment.
+// Returns (nil, nil) if the pod is not owned by a Deployment.
+func (pe *PodEvictor) resolveDeploymentOwner(ctx context.Context, pod *v1.Pod) (*appsv1.Deployment, error) {
+	var rsName string
+	for _, ref := range pod.OwnerReferences {
+		if ref.Kind == "ReplicaSet" {
+			rsName = ref.Name
+			break
+		}
+	}
+	if rsName == "" {
+		return nil, nil
+	}
+
+	rs, err := pe.client.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, rsName, metav1.GetOptions{})
+	if err != nil {
+		return nil, fmt.Errorf("failed to get ReplicaSet %s/%s: %v", pod.Namespace, rsName, err)
+	}
+
+	var deployName string
+	for _, ref := range rs.OwnerReferences {
+		if ref.Kind == "Deployment" {
+			deployName = ref.Name
+			break
+		}
+	}
+	if deployName == "" {
+		return nil, nil
+	}
+
+	deploy, err := pe.client.AppsV1().Deployments(pod.Namespace).Get(ctx, deployName, metav1.GetOptions{})
+	if err != nil {
+		return nil, fmt.Errorf("failed to get Deployment %s/%s: %v", pod.Namespace, deployName, err)
+	}
+
+	return deploy, nil
+}
+
+// rolloutRestartDeployment patches the Deployment's pod template annotation to trigger a rolling restart.
+func (pe *PodEvictor) rolloutRestartDeployment(ctx context.Context, deploy *appsv1.Deployment) error {
+	patch := fmt.Sprintf(`{"spec":{"template":{"metadata":{"annotations":{"kubectl.kubernetes.io/restartedAt":"%s"}}}}}`, time.Now().Format(time.RFC3339))
+	_, err := pe.client.AppsV1().Deployments(deploy.Namespace).Patch(ctx, deploy.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
+	return err
+}
+
+// tryRolloutRestart checks if the pod belongs to a single-replica Deployment with RollingUpdate
+// strategy and triggers a rollout restart instead of eviction to avoid downtime.
+// Returns (true, true/false, nil) if handled (ignore, rolloutRestarted),
+// or (false, false, nil) if normal eviction should proceed.
+func (pe *PodEvictor) tryRolloutRestart(ctx context.Context, pod *v1.Pod) (handled bool, ignore bool) {
+	deploy, err := pe.resolveDeploymentOwner(ctx, pod)
+	if err != nil {
+		klog.V(3).InfoS("Failed to resolve Deployment owner, falling through to normal eviction", "pod", klog.KObj(pod), "err", err)
+		return false, false
+	}
+	if deploy == nil {
+		return false, false
+	}
+
+	replicas := int32(1)
+	if deploy.Spec.Replicas != nil {
+		replicas = *deploy.Spec.Replicas
+	}
+	isRecreate := deploy.Spec.Strategy.Type == appsv1.RecreateDeploymentStrategyType
+
+	if replicas != 1 || isRecreate || deploy.Status.UnavailableReplicas != 0 {
+		return false, false
+	}
+
+	deployKey := deploy.Namespace + "/" + deploy.Name
+	if pe.restartedDeployments[deployKey] {
+		klog.V(3).InfoS("Deployment already rollout-restarted this cycle, skipping", "deployment", deployKey, "pod", klog.KObj(pod))
+		return true, true
+	}
+	if !pe.dryRun {
+		if err := pe.rolloutRestartDeployment(ctx, deploy); err != nil {
+			klog.V(1).InfoS("Failed to rollout restart Deployment, falling through to normal eviction", "deployment", deployKey, "pod", klog.KObj(pod), "err", err)
+			return false, false
+		}
+	}
+	klog.V(1).InfoS("Triggered rollout restart for single-replica Deployment instead of eviction", "deployment", deployKey, "pod", klog.KObj(pod), "dryRun", pe.dryRun)
+	pe.restartedDeployments[deployKey] = true
+	return true, false
+}
+
 // return (ignore, err)
 func (pe *PodEvictor) evictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) (bool, error) {
+	pe.lastRolloutRestart = false
+
+	if handled, ignore := pe.tryRolloutRestart(ctx, pod); handled {
+		pe.lastRolloutRestart = !ignore
+		return ignore, nil
+	}
 	deleteOptions := &metav1.DeleteOptions{
 		GracePeriodSeconds: pe.gracePeriodSeconds,
 	}
diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go
index 2e133e3885..c5e033f934 100644
--- a/pkg/descheduler/evictions/evictions_test.go
+++ b/pkg/descheduler/evictions/evictions_test.go
@@ -21,9 +21,11 @@ import (
 	"fmt"
 	"reflect"
 	"strings"
+	"sync/atomic"
 	"testing"
 	"time"
 
+	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	policy "k8s.io/api/policy/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -463,6 +465,470 @@ func TestEvictionRequestsCacheCleanup(t *testing.T) {
 	}
 }
 
+// helper to build a ReplicaSet owned by a Deployment
+func buildTestReplicaSet(name, namespace, deployName string) *appsv1.ReplicaSet {
+	return &appsv1.ReplicaSet{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: namespace,
+			OwnerReferences: []metav1.OwnerReference{
+				{Kind: "Deployment", Name: deployName, APIVersion: "apps/v1"},
+			},
+		},
+	}
+}
+
+// helper to build a Deployment
+func buildTestDeployment(name, namespace string, replicas *int32, strategy appsv1.DeploymentStrategyType) *appsv1.Deployment {
+	return &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: namespace,
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: replicas,
+			Strategy: appsv1.DeploymentStrategy{Type: strategy},
+		},
+	}
+}
+
+func TestRolloutRestartSingleReplica(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType)
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	var evictionCalled int32
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			atomic.AddInt32(&evictionCalled, 1)
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for rollout restart")
+	}
+	if atomic.LoadInt32(&patchCount) != 1 {
+		t.Errorf("Expected 1 PATCH on deployments, got %d", atomic.LoadInt32(&patchCount))
+	}
+	if atomic.LoadInt32(&evictionCalled) != 0 {
+		t.Errorf("Expected 0 eviction API calls, got %d", atomic.LoadInt32(&evictionCalled))
+	}
+}
+
+func TestRolloutRestartMultiReplica(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](3), appsv1.RollingUpdateDeploymentStrategyType)
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for normal eviction")
+	}
+	if atomic.LoadInt32(&patchCount) != 0 {
+		t.Errorf("Expected 0 PATCH on deployments for multi-replica, got %d", atomic.LoadInt32(&patchCount))
+	}
+}
+
+func TestRolloutRestartNonDeploymentPod(t *testing.T) {
+	ctx := context.Background()
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "StatefulSet", Name: "ss-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, nil, nil
+	})
+
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for normal eviction")
+	}
+	if atomic.LoadInt32(&patchCount) != 0 {
+		t.Errorf("Expected 0 PATCH for non-Deployment pod, got %d", atomic.LoadInt32(&patchCount))
+	}
+}
+
+func TestRolloutRestartDeduplication(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType)
+
+	pod1 := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+	pod2 := test.BuildTestPod("p2", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod1, pod2, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	// First call should trigger rollout restart
+	ignore1, err := podEvictor.evictPod(ctx, pod1, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error on first call: %v", err)
+	}
+	if ignore1 {
+		t.Fatal("Expected ignore=false on first call")
+	}
+
+	// Second call for same deployment should be ignored (deduplicated)
+	ignore2, err := podEvictor.evictPod(ctx, pod2, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error on second call: %v", err)
+	}
+	if !ignore2 {
+		t.Fatal("Expected ignore=true on second call (deduplication)")
+	}
+
+	if atomic.LoadInt32(&patchCount) != 1 {
+		t.Errorf("Expected exactly 1 PATCH, got %d", atomic.LoadInt32(&patchCount))
+	}
+}
+
+func TestRolloutRestartDryRun(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType)
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(),
+		NewOptions().WithDryRun(true))
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for dry-run rollout restart")
+	}
+	if atomic.LoadInt32(&patchCount) != 0 {
+		t.Errorf("Expected 0 PATCH in dry-run mode, got %d", atomic.LoadInt32(&patchCount))
+	}
+	if !podEvictor.restartedDeployments["default/deploy-1"] {
+		t.Error("Expected restartedDeployments to be set even in dry-run")
+	}
+}
+
+func TestRolloutRestartResolveFailureFallback(t *testing.T) {
+	ctx := context.Background()
+
+	// Pod references an RS that doesn't exist → resolveDeploymentOwner fails → fallback to normal eviction
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "nonexistent-rs", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod)
+
+	var evictionCalled int32
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			atomic.AddInt32(&evictionCalled, 1)
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for fallback eviction")
+	}
+	if atomic.LoadInt32(&evictionCalled) != 1 {
+		t.Errorf("Expected 1 eviction API call (fallback), got %d", atomic.LoadInt32(&evictionCalled))
+	}
+}
+
+func TestRolloutRestartRecreateStrategy(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RecreateDeploymentStrategyType)
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	var evictionCalled int32
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			atomic.AddInt32(&evictionCalled, 1)
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for Recreate strategy")
+	}
+	if atomic.LoadInt32(&patchCount) != 0 {
+		t.Errorf("Expected 0 PATCH for Recreate strategy, got %d", atomic.LoadInt32(&patchCount))
+	}
+	if atomic.LoadInt32(&evictionCalled) != 1 {
+		t.Errorf("Expected 1 eviction API call for Recreate strategy, got %d", atomic.LoadInt32(&evictionCalled))
+	}
+}
+
+func TestRolloutRestartNilReplicas(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	// nil replicas = k8s default of 1
+	deploy := buildTestDeployment("deploy-1", "default", nil, appsv1.RollingUpdateDeploymentStrategyType)
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	ignore, err := podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if ignore {
+		t.Fatal("Expected ignore=false for rollout restart with nil replicas")
+	}
+	if atomic.LoadInt32(&patchCount) != 1 {
+		t.Errorf("Expected 1 PATCH for nil replicas (treated as 1), got %d", atomic.LoadInt32(&patchCount))
+	}
+}
+
+func TestRolloutRestartUnhealthyDeployment(t *testing.T) {
+	ctx := context.Background()
+
+	rs := buildTestReplicaSet("rs-1", "default", "deploy-1")
+	deploy := buildTestDeployment("deploy-1", "default", utilptr.To[int32](1), appsv1.RollingUpdateDeploymentStrategyType)
+	// Simulate a failed rollout: new pod is not ready
+	deploy.Status.UnavailableReplicas = 1
+
+	pod := test.BuildTestPod("p1", 400, 0, "node1", func(p *v1.Pod) {
+		p.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
+			{Kind: "ReplicaSet", Name: "rs-1", APIVersion: "apps/v1"},
+		}
+	})
+
+	fakeClient := fakeclientset.NewSimpleClientset(pod, rs, deploy)
+
+	var patchCount int32
+	fakeClient.PrependReactor("patch", "deployments", func(action core.Action) (bool, runtime.Object, error) {
+		atomic.AddInt32(&patchCount, 1)
+		return true, deploy, nil
+	})
+
+	var evictionCount int32
+	fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
+		if action.GetSubresource() == "eviction" {
+			atomic.AddInt32(&evictionCount, 1)
+			return true, nil, nil
+		}
+		return false, nil, nil
+	})
+
+	sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	podEvictor, err := NewPodEvictor(ctx, fakeClient, &events.FakeRecorder{}, sharedInformerFactory.Core().V1().Pods().Informer(), initFeatureGates(), NewOptions())
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	_, err = podEvictor.evictPod(ctx, pod, EvictOptions{})
+	if err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+	if atomic.LoadInt32(&patchCount) != 0 {
+		t.Errorf("Expected no PATCH for unhealthy deployment, got %d", atomic.LoadInt32(&patchCount))
+	}
+	if atomic.LoadInt32(&evictionCount) != 1 {
+		t.Errorf("Expected normal eviction for unhealthy deployment, got %d", atomic.LoadInt32(&evictionCount))
+	}
+}
+
 func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) {
 	t.Logf("Assert for events: %v", expected)
 	c := time.After(wait.ForeverTestTimeout)
diff --git a/pkg/descheduler/kubeclientsandbox.go b/pkg/descheduler/kubeclientsandbox.go
index 5e6420b1ba..f125ebae74 100644
--- a/pkg/descheduler/kubeclientsandbox.go
+++ b/pkg/descheduler/kubeclientsandbox.go
@@ -22,6 +22,7 @@ import (
 	"sync"
 	"time"
 
+	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	policyv1 "k8s.io/api/policy/v1"
 	schedulingv1 "k8s.io/api/scheduling/v1"
@@ -103,6 +104,8 @@ func newDefaultKubeClientSandbox(client clientset.Interface, sharedInformerFacto
 		schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"),
 		policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"),
 		v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"),
+		appsv1.SchemeGroupVersion.WithResource("replicasets"),
+		appsv1.SchemeGroupVersion.WithResource("deployments"),
 	)
 }