Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pkg/framework/plugins/removefailedpods/failedpods.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,18 @@ func New(ctx context.Context, args runtime.Object, handle frameworktypes.Handle)
}

// We can combine Filter and PreEvictionFilter since for this strategy it does not matter where we run PreEvictionFilter
defaultFilter := handle.Evictor().Filter
if failedPodsArgs.IncludingSystemCriticalPods {
// The operator has explicitly opted in to evicting failed pods that are
// otherwise protected by the framework default-evictor (system-critical
// priority, daemon set, etc.). Bypass the default-evictor filter at
// candidate-listing time; the PodFailed phase check (below),
// validateCanEvict, and PreEvictionFilter still gate eviction.
defaultFilter = func(pod *v1.Pod) bool { return true }
}

podFilter, err := podutil.NewOptions().
WithFilter(podutil.WrapFilterFuncs(handle.Evictor().Filter, handle.Evictor().PreEvictionFilter)).
WithFilter(podutil.WrapFilterFuncs(defaultFilter, handle.Evictor().PreEvictionFilter)).
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(failedPodsArgs.LabelSelector).
Expand Down
48 changes: 41 additions & 7 deletions pkg/framework/plugins/removefailedpods/failedpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,30 @@ func TestRemoveFailedPods(t *testing.T) {
buildTestPod("p2", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
},
},
{
description: "system-critical priority failed pod is protected by default",
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildCriticalPriorityPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
})),
},
},
{
description: "includingSystemCriticalPods=true, system-critical priority failed pod is evicted",
args: RemoveFailedPodsArgs{
IncludingSystemCriticalPods: true,
},
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildCriticalPriorityPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
})),
},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
Expand All @@ -363,13 +387,14 @@ func TestRemoveFailedPods(t *testing.T) {
}

plugin, err := New(ctx, &RemoveFailedPodsArgs{
Reasons: tc.args.Reasons,
ExitCodes: tc.args.ExitCodes,
MinPodLifetimeSeconds: tc.args.MinPodLifetimeSeconds,
IncludingInitContainers: tc.args.IncludingInitContainers,
ExcludeOwnerKinds: tc.args.ExcludeOwnerKinds,
LabelSelector: tc.args.LabelSelector,
Namespaces: tc.args.Namespaces,
Reasons: tc.args.Reasons,
ExitCodes: tc.args.ExitCodes,
MinPodLifetimeSeconds: tc.args.MinPodLifetimeSeconds,
IncludingInitContainers: tc.args.IncludingInitContainers,
IncludingSystemCriticalPods: tc.args.IncludingSystemCriticalPods,
ExcludeOwnerKinds: tc.args.ExcludeOwnerKinds,
LabelSelector: tc.args.LabelSelector,
Namespaces: tc.args.Namespaces,
},
handle,
)
Expand Down Expand Up @@ -414,3 +439,12 @@ func buildTestPod(podName, nodeName string, podStatus v1.PodStatus, deletionTime
pod.DeletionTimestamp = deletionTimestamp
return pod
}

// buildCriticalPriorityPod builds a failed pod whose priority matches the
// system-critical threshold checked by utils.IsCriticalPriorityPod.
func buildCriticalPriorityPod(podName, nodeName string, podStatus v1.PodStatus) *v1.Pod {
pod := buildTestPod(podName, nodeName, podStatus, nil)
priority := int32(2000000000)
pod.Spec.Priority = &priority
return pod
}
15 changes: 8 additions & 7 deletions pkg/framework/plugins/removefailedpods/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ import (
type RemoveFailedPodsArgs struct {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you double-checked that the new field is wired through any versioned API types as well? If RemoveFailedPodsArgs has a counterpart under pkg/api/v1alphaX/, the YAML decode path will silently drop includingSystemCriticalPods and the flag will look like it does nothing in real configs even though tests pass. Probably fine, but worth a grep -r RemoveFailedPodsArgs pkg/api/ to confirm.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checked — grep -rn RemoveFailedPodsArgs pkg/api/ returns nothing. Descheduler doesn't use the apimachinery per-plugin versioned-types pattern; plugin args are registered via pluginregistry.Register and decoded through pluginArgConversionScheme with the internal RemoveFailedPodsArgs as the target type, so the new field flows through the YAML decode path without a v1alpha2 shadow. The existing zz_generated.deepcopy.go uses *out = *in which already covers the new bool.

metav1.TypeMeta `json:",inline"`

Namespaces *api.Namespaces `json:"namespaces,omitempty"`
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
Reasons []string `json:"reasons,omitempty"`
ExitCodes []int32 `json:"exitCodes,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
Namespaces *api.Namespaces `json:"namespaces,omitempty"`
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
Reasons []string `json:"reasons,omitempty"`
ExitCodes []int32 `json:"exitCodes,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
IncludingSystemCriticalPods bool `json:"includingSystemCriticalPods,omitempty"`
}
Comment on lines 25 to 36
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small consistency thing: we already have EvictSystemCriticalPods on DefaultEvictor, and the docs/issues all use that wording. Could we name this one along the same lines — e.g.
EvictFailedSystemCriticalPods — so users don't have to keep two mental models for what's essentially the same concept at different scopes?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in f05b631 — renamed to EvictFailedSystemCriticalPods (and evictFailedSystemCriticalPods JSON tag) so it parallels EvictSystemCriticalPods on DefaultEvictor.