Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pkg/controller/core/workload_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,20 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
}

log.V(2).Info("Deleting workload because it has finished and the retention period has elapsed", "retention", *r.workloadRetention.afterFinished)
if err := r.client.Delete(ctx, &wl); err != nil {

// Finished Workloads should no longer need Kueue's resource-in-use finalizer.
// However, WorkloadSlices and other paths can mark a Workload as Finished without
// going through the job finalization path that normally removes it. Remove only
// Kueue's finalizer before deleting; otherwise Kubernetes may only set
// deletionTimestamp and the Workload can remain stuck until the finalizer is removed.
deleteRequested, err := workload.Delete(ctx, r.client, &wl)
if err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
r.recorder.Eventf(&wl, nil, corev1.EventTypeNormal, "Deleted", "Deleted", "Deleted finished workload due to elapsed retention")
if deleteRequested {
r.recorder.Eventf(&wl, nil, corev1.EventTypeNormal, "Deleted", "Deleted", "Deleted finished workload due to elapsed retention")
}

return ctrl.Result{}, nil
}

Expand Down
34 changes: 34 additions & 0 deletions pkg/controller/core/workload_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2724,6 +2724,40 @@ func TestReconcile(t *testing.T) {
wantWorkload: nil,
wantError: nil,
},
"should remove finalizer from owned finished workload already being deleted, object retention configured": {
Comment thread
ShaanveerS marked this conversation as resolved.
workload: utiltestingapi.MakeWorkload("wl", "ns").
Condition(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
LastTransitionTime: metav1.Time{
Time: now.Add(-2 * util.MediumTimeout),
},
}).
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
DeletionTimestamp(now).
Finalizers(kueue.ResourceInUseFinalizerName, "example.com/finalizer").
Obj(),
reconcilerOpts: []Option{
WithWorkloadRetention(
&workloadRetentionConfig{
afterFinished: ptr.To(util.MediumTimeout),
},
),
},
wantWorkload: utiltestingapi.MakeWorkload("wl", "ns").
Condition(metav1.Condition{
Type: kueue.WorkloadFinished,
Status: metav1.ConditionTrue,
LastTransitionTime: metav1.Time{
Time: now.Add(-2 * util.MediumTimeout),
},
}).
ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid").
DeletionTimestamp(now).
Finalizers("example.com/finalizer").
Obj(),
wantError: nil,
},
"shouldn't try to delete the workload because the retention period hasn't elapsed yet, object retention configured": {
workload: utiltestingapi.MakeWorkload("wl", "ns").
Condition(metav1.Condition{
Expand Down
9 changes: 2 additions & 7 deletions pkg/controller/jobframework/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -981,16 +981,11 @@ func (r *JobReconciler) ensureOneWorkload(ctx context.Context, job GenericJob, o
existedWls := 0
for _, wl := range toDelete {
wlKey := workload.Key(wl)
err := workload.RemoveFinalizer(ctx, r.client, wl)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to remove workload finalizer for: %w", err)
}

err = r.client.Delete(ctx, wl)
deleteRequested, err := workload.Delete(ctx, r.client, wl)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("deleting not matching workload: %w", err)
}
if err == nil {
if deleteRequested {
existedWls++
r.record.Eventf(object, nil, corev1.EventTypeNormal, ReasonDeletedWorkload, "DeletedWorkload",
"Deleted not matching Workload: %v", wlKey)
Expand Down
13 changes: 2 additions & 11 deletions pkg/controller/jobs/leaderworkerset/leaderworkerset_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"golang.org/x/sync/errgroup"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -463,19 +462,11 @@ func (r *Reconciler) deleteWorkload(ctx context.Context, wl *kueue.Workload) err
log := ctrl.LoggerFrom(ctx).WithValues("workload", klog.KObj(wl))
log.V(3).Info("Delete LeaderWorkerSet Workload")

// Remove the finalizer before deleting to ensure prompt cleanup,
// consistent with how the job framework reconciler deletes workloads.
if err := workload.RemoveFinalizer(ctx, r.client, wl); err != nil && !apierrors.IsNotFound(err) {
log.Error(err, "Failed to remove finalizer")
return err
}
err := r.client.Delete(ctx, wl)
_, err := workload.Delete(ctx, r.client, wl)
if err != nil {
log.Error(err, "Failed to delete workload")
return err
}

return nil
return err
}

func (r *Reconciler) reconcilePods(ctx context.Context, lws *leaderworkersetv1.LeaderWorkerSet, statefulSets []appsv1.StatefulSet, pods []corev1.Pod) error {
Expand Down
19 changes: 19 additions & 0 deletions pkg/workload/workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,25 @@ func RemoveFinalizer(ctx context.Context, c client.Client, wl *kueue.Workload) e
return nil
}

// Delete removes Kueue's resource-in-use finalizer before deleting the Workload.
// If deletion is already in progress, finalizer removal is enough for Kubernetes
// to continue object cleanup, so Delete returns false without issuing another
// delete request. The returned bool reports whether this call successfully
// requested deletion.
func Delete(ctx context.Context, c client.Client, wl *kueue.Workload) (bool, error) {
if err := RemoveFinalizer(ctx, c, wl); err != nil {
Comment thread
ShaanveerS marked this conversation as resolved.
return false, err
}
if !wl.DeletionTimestamp.IsZero() {
log.FromContext(ctx).V(3).Info("Skipping workload delete because deletion is already in progress", "workload", klog.KObj(wl))
return false, nil
}
if err := c.Delete(ctx, wl); err != nil {
return false, err
}
return true, nil
}

type flavorSet = sets.Set[kueue.ResourceFlavorReference]

// AdmissionChecksForWorkload returns AdmissionChecks that should be assigned to a specific Workload based on
Expand Down