Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/kueue/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func main() {
cacheOptions = append(cacheOptions, schdcache.WithResourceTransformations(cfg.Resources.Transformations))
queueOptions = append(queueOptions, qcache.WithResourceTransformations(cfg.Resources.Transformations))
}
if features.Enabled(features.DynamicResourceAllocation) && cfg.Resources != nil && len(cfg.Resources.DeviceClassMappings) > 0 {
if features.Enabled(features.KueueDRAIntegration) && cfg.Resources != nil && len(cfg.Resources.DeviceClassMappings) > 0 {
if err := dra.CreateMapperFromConfiguration(cfg.Resources.DeviceClassMappings); err != nil {
setupLog.Error(err, "Failed to initialize DRA mapper from configuration")
os.Exit(1)
Expand Down
4 changes: 2 additions & 2 deletions keps/2941-DRA/kep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ milestone:
# The following PRR answers are required at alpha release
# List the feature gate name and the components for which it must be enabled
feature-gates:
- name: DynamicResourceAllocation
- name: DRAExtendedResources
- name: KueueDRAIntegration
- name: KueueDRAIntegrationExtendedResource
disable-supported: true

# The following PRR answers are required at beta release
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/queue/cluster_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ func priorityBoostAnnotationChanged(oldInfo, newInfo *workload.Info) bool {
// DRA extended resources are resolved in Reconcile, which can modify TotalRequests
// without changing the workload Spec.
func draRequestsChanged(oldInfo, newInfo *workload.Info) bool {
if !features.Enabled(features.DynamicResourceAllocation) {
if !features.Enabled(features.KueueDRAIntegration) {
return false
}
return !equality.Semantic.DeepEqual(oldInfo.TotalRequests, newInfo.TotalRequests)
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/queue/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ func (m *Manager) AddLocalQueue(ctx context.Context, q *kueue.LocalQueue) error
return err
}

if !features.Enabled(features.DynamicResourceAllocation) {
if !features.Enabled(features.KueueDRAIntegration) {
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/queue/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestAddLocalQueueOrphans(t *testing.T) {
}

func TestAddLocalQueue_DRAReconcileChannelGuaranteedDelivery(t *testing.T) {
features.SetFeatureGateDuringTest(t, features.DynamicResourceAllocation, true)
features.SetFeatureGateDuringTest(t, features.KueueDRAIntegration, true)

// Create an admissible workload that triggers dra.NeedsDRAReconcile via HasDRA().
tmplName := "claim-tmpl"
Expand Down
15 changes: 11 additions & 4 deletions pkg/config/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func Validate(c *configapi.Configuration, scheme *runtime.Scheme) field.ErrorLis
allErrs = append(allErrs, validateVisibilityServer(c)...)
allErrs = append(allErrs, validateCustomLabels(c)...)
allErrs = append(allErrs, validateQuotaCheckStrategy(c)...)
allErrs = append(allErrs, validateDRAFeatureGateDependencies()...)
return allErrs
}

Expand Down Expand Up @@ -570,12 +571,18 @@ func LoadAndValidateFeatureGates(featureGateCLI string, featureGateMap map[strin
}
}

if features.Enabled(features.DRAExtendedResources) {
if !features.Enabled(features.DynamicResourceAllocation) {
allErrs = append(allErrs, field.Invalid(featureGatesPath, "DRAExtendedResources", "DRAExtendedResources requires DynamicResourceAllocation to be enabled"))
allErrs = append(allErrs, validateDRAFeatureGateDependencies()...)

return allErrs
}

func validateDRAFeatureGateDependencies() field.ErrorList {
var allErrs field.ErrorList
if features.Enabled(features.KueueDRAIntegrationExtendedResource) {
if !features.Enabled(features.KueueDRAIntegration) {
allErrs = append(allErrs, field.Invalid(featureGatesPath, "KueueDRAIntegrationExtendedResource", "KueueDRAIntegrationExtendedResource requires KueueDRAIntegration to be enabled"))
}
}

return allErrs
}

Expand Down
37 changes: 26 additions & 11 deletions pkg/config/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,21 @@ func TestValidate(t *testing.T) {
},
},
},
"KueueDRAIntegrationExtendedResource requires KueueDRAIntegration": {
cfg: &configapi.Configuration{
Integrations: defaultIntegrations,
},
featureGates: map[featuregate.Feature]bool{
features.KueueDRAIntegrationExtendedResource: true,
features.KueueDRAIntegration: false,
},
wantErr: field.ErrorList{
&field.Error{
Type: field.ErrorTypeInvalid,
Field: "featureGates",
},
},
},
}

for name, tc := range testCases {
Expand All @@ -1093,18 +1108,18 @@ func TestLoadAndValidateFeatureGates(t *testing.T) {
featureGatesCLI: "",
},
"feature gate cli": {
featureGatesCLI: string(features.DynamicResourceAllocation) + "=false",
featureGatesCLI: string(features.KueueDRAIntegration) + "=false",
gatesToRestore: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: false,
features.KueueDRAIntegration: false,
},
},
"cannot specify both feature gates": {
featureGatesCLI: string(features.DynamicResourceAllocation) + "=false",
featureGatesCLI: string(features.KueueDRAIntegration) + "=false",
featureGateMap: map[string]bool{
string(features.DynamicResourceAllocation): false,
string(features.KueueDRAIntegration): false,
},
gatesToRestore: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: false,
features.KueueDRAIntegration: false,
},
wantErr: field.ErrorList{
&field.Error{
Expand Down Expand Up @@ -1218,20 +1233,20 @@ func TestLoadAndValidateFeatureGates(t *testing.T) {
},
},
},
"DRAExtendedResources requires DynamicResourceAllocation": {
"KueueDRAIntegrationExtendedResource requires KueueDRAIntegration": {
featureGateMap: map[string]bool{
string(features.DRAExtendedResources): true,
string(features.DynamicResourceAllocation): false,
string(features.KueueDRAIntegrationExtendedResource): true,
string(features.KueueDRAIntegration): false,
},
gatesToRestore: map[featuregate.Feature]bool{
features.DRAExtendedResources: false,
features.DynamicResourceAllocation: true,
features.KueueDRAIntegrationExtendedResource: false,
features.KueueDRAIntegration: true,
},
wantErr: field.ErrorList{
&field.Error{
Type: field.ErrorTypeInvalid,
Field: "featureGates",
Detail: "DRAExtendedResources requires DynamicResourceAllocation to be enabled",
Detail: "KueueDRAIntegrationExtendedResource requires KueueDRAIntegration to be enabled",
},
},
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func SetupControllers(mgr ctrl.Manager, qManager *qcache.Manager, cc *schdcache.
WithWorkloadCustomLabels(customLabels),
WithAdmissionFairSharing(cfg.AdmissionFairSharing),
)
if features.Enabled(features.DynamicResourceAllocation) {
if features.Enabled(features.KueueDRAIntegration) {
qManager.SetDRAReconcileChannel(workloadRec.GetDRAReconcileChannel())
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/core/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func Setup(ctx context.Context, indexer client.FieldIndexer) error {
}
}
// Index DeviceClasses by extendedResourceName for fast lookup during extended resource translation.
if features.Enabled(features.DRAExtendedResources) {
if features.Enabled(features.KueueDRAIntegrationExtendedResource) {
if err := indexer.IndexField(ctx, &resourceapi.DeviceClass{}, DeviceClassExtendedResourceNameIndex, IndexDeviceClassExtendedResourceName); err != nil {
return fmt.Errorf("setting index on extendedResourceName for DeviceClass: %w", err)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/core/workload_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
if workload.HasResourceClaim(&wl) {
log.V(3).Info("Workload is inadmissible because it uses resource claims which is not supported")
err := workload.PatchAdmissionStatus(ctx, r.client, &wl, r.clock, func(wl *kueue.Workload) (bool, error) {
updated := workload.UnsetQuotaReservationWithCondition(wl, kueue.WorkloadInadmissible, "DynamicResourceAllocation feature does not support use of resource claims", r.clock.Now())
updated := workload.UnsetQuotaReservationWithCondition(wl, kueue.WorkloadInadmissible, "KueueDRAIntegration feature does not support use of resource claims", r.clock.Now())
if updated && workload.SetRequeuedCondition(wl, kueue.WorkloadInadmissible, "DRA resource claims not supported", false) {
updated = true
}
Expand Down Expand Up @@ -323,7 +323,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
// Process Extended Resources backed by DRA (new path)
var extendedResources map[kueue.PodSetReference]corev1.ResourceList
var replacedExtendedResources map[kueue.PodSetReference]sets.Set[corev1.ResourceName]
if features.Enabled(features.DRAExtendedResources) {
if features.Enabled(features.KueueDRAIntegrationExtendedResource) {
var extFieldErrs field.ErrorList
extendedResources, replacedExtendedResources, extFieldErrs = dra.ResolveExtendedResourceQuota(ctx, r.client, &wl)
if len(extFieldErrs) > 0 {
Expand Down
18 changes: 9 additions & 9 deletions pkg/controller/core/workload_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ func TestReconcile(t *testing.T) {
}{
"reconcile DRA ResourceClaim should be rejected as inadmissible": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: true,
features.KueueDRAIntegration: true,
features.MultiKueueOrchestratedPreemption: false,
},
workload: utiltestingapi.MakeWorkload("wlWithDRAResourceClaim", "ns").
Expand Down Expand Up @@ -453,7 +453,7 @@ func TestReconcile(t *testing.T) {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: kueue.WorkloadInadmissible,
Message: "DynamicResourceAllocation feature does not support use of resource claims",
Message: "KueueDRAIntegration feature does not support use of resource claims",
}).
Condition(metav1.Condition{
Type: kueue.WorkloadRequeued,
Expand All @@ -466,7 +466,7 @@ func TestReconcile(t *testing.T) {
},
"reconcile DRA ResourceClaimTemplate should be pre-processed and queued": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: true,
features.KueueDRAIntegration: true,
features.MultiKueueOrchestratedPreemption: false,
},
wantDRAResourceTotal: new(int64(1)),
Expand Down Expand Up @@ -504,7 +504,7 @@ func TestReconcile(t *testing.T) {
},
"reconcile DRA ResourceClaimTemplate multi-pod should be pre-processed and queued": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: true,
features.KueueDRAIntegration: true,
features.MultiKueueOrchestratedPreemption: false,
},
wantDRAResourceTotal: new(int64(6)),
Expand Down Expand Up @@ -542,7 +542,7 @@ func TestReconcile(t *testing.T) {
},
"reconcile DRA ResourceClaimTemplate with unmapped device class": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: true,
features.KueueDRAIntegration: true,
features.MultiKueueOrchestratedPreemption: false,
},
workload: utiltestingapi.MakeWorkload("wlUnmappedDRA", "ns").
Expand Down Expand Up @@ -594,7 +594,7 @@ func TestReconcile(t *testing.T) {
},
"reconcile DRA ResourceClaimTemplate not found should return error": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: true,
features.KueueDRAIntegration: true,
features.MultiKueueOrchestratedPreemption: false,
},
workload: utiltestingapi.MakeWorkload("wlMissingTemplate", "ns").
Expand Down Expand Up @@ -2936,7 +2936,7 @@ func TestReconcile(t *testing.T) {
},
"should synchronize the status of preemption gates": {
featureGates: map[featuregate.Feature]bool{
features.DynamicResourceAllocation: false,
features.KueueDRAIntegration: false,
features.MultiKueueOrchestratedPreemption: true,
},
cq: utiltestingapi.MakeClusterQueue("cq").Obj(),
Expand Down Expand Up @@ -3251,7 +3251,7 @@ func TestReconcile(t *testing.T) {
queueOptions := []qcache.Option{qcache.WithPreemptionExpectations(preemptexpectations.New())}
qManager := qcache.NewManagerForUnitTests(cl, cqCache, queueOptions...)
reconciler := NewWorkloadReconciler(cl, qManager, cqCache, recorder, tc.reconcilerOpts...)
if features.Enabled(features.DynamicResourceAllocation) {
if features.Enabled(features.KueueDRAIntegration) {
qManager.SetDRAReconcileChannel(reconciler.GetDRAReconcileChannel())
}
// use a fake clock with jitter = 0 to be able to assert on the requeueAt.
Expand Down Expand Up @@ -3346,7 +3346,7 @@ func TestReconcile(t *testing.T) {
}

// For DRA tests, verify that workloads are properly queued/cached
if tc.featureGates[features.DynamicResourceAllocation] && testWl != nil &&
if tc.featureGates[features.KueueDRAIntegration] && testWl != nil &&
len(testWl.Spec.PodSets) > 0 &&
len(testWl.Spec.PodSets[0].Template.Spec.ResourceClaims) > 0 {
workloadKey := client.ObjectKeyFromObject(testWl)
Expand Down
4 changes: 2 additions & 2 deletions pkg/dra/extended_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ import (
// Note: there is no DeviceClass watcher. If a DeviceClass is created after a workload
// was marked inadmissible, requeuing depends on the next QueueInadmissibleWorkloads event.
func NeedsDRAReconcile(wl *kueue.Workload) bool {
if !features.Enabled(features.DynamicResourceAllocation) {
if !features.Enabled(features.KueueDRAIntegration) {
return false
}
if workload.HasDRA(wl) {
return true
}
if !features.Enabled(features.DRAExtendedResources) {
if !features.Enabled(features.KueueDRAIntegrationExtendedResource) {
return false
}
for i := range wl.Spec.PodSets {
Expand Down
16 changes: 15 additions & 1 deletion pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,20 @@ const (
// owner: @alaypatel07
// kep: https://github.com/kubernetes-sigs/kueue/tree/main/keps/2941-DRA
//
// Enable quota accounting for Dynamic Resource Allocation (DRA) devices in workloads
// Enable quota accounting for Dynamic Resource Allocation (DRA) devices in workloads.
KueueDRAIntegration featuregate.Feature = "KueueDRAIntegration"

// Deprecated: planned to be removed in 0.19. Use KueueDRAIntegration instead.
DynamicResourceAllocation featuregate.Feature = "DynamicResourceAllocation"

// owner: @sohankunkerkar
// kep: https://github.com/kubernetes-sigs/kueue/tree/main/keps/2941-DRA
//
// Enable extended resources support for DRA. Allows workloads to request DRA devices
// via standard resources.requests using DeviceClass extendedResourceName.
KueueDRAIntegrationExtendedResource featuregate.Feature = "KueueDRAIntegrationExtendedResource"

// Deprecated: planned to be removed in 0.19. Use KueueDRAIntegrationExtendedResource instead.
DRAExtendedResources featuregate.Feature = "DRAExtendedResources"

// owner: @MaysaMacedo
Expand Down Expand Up @@ -484,11 +490,19 @@ var defaultVersionedFeatureGates = map[featuregate.Feature]featuregate.Versioned
TASBalancedPlacement: {
{Version: version.MustParse("0.15"), Default: false, PreRelease: featuregate.Alpha},
},
KueueDRAIntegration: {
{Version: version.MustParse("0.18"), Default: false, PreRelease: featuregate.Alpha},
},
DynamicResourceAllocation: {
{Version: version.MustParse("0.14"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("0.18"), Default: false, PreRelease: featuregate.Deprecated, LockToDefault: true}, // remove in 0.19
},
KueueDRAIntegrationExtendedResource: {
{Version: version.MustParse("0.18"), Default: false, PreRelease: featuregate.Alpha},
},
DRAExtendedResources: {
{Version: version.MustParse("0.17"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("0.18"), Default: false, PreRelease: featuregate.Deprecated, LockToDefault: true}, // remove in 0.19
},
MultiKueueAdaptersForCustomJobs: {
{Version: version.MustParse("0.14"), Default: false, PreRelease: featuregate.Alpha},
Expand Down
2 changes: 1 addition & 1 deletion pkg/workload/workload.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ func totalRequestsFromPodSets(wl *kueue.Workload, info *InfoOptions) []PodSetRes
effectiveRequests := dropExcludedResources(specRequests, info.excludedResourcePrefixes)
effectiveRequests = applyResourceTransformations(effectiveRequests, info.resourceTransformations)
setRes.Requests = resources.NewRequests(effectiveRequests)
if features.Enabled(features.DynamicResourceAllocation) && info.preprocessedDRAResources != nil {
if features.Enabled(features.KueueDRAIntegration) && info.preprocessedDRAResources != nil {
// First, remove extended resources that were converted to DRA logical resources
if replacedRes, exists := info.replacedExtendedResources[ps.Name]; exists {
for extRes := range replacedRes {
Expand Down
4 changes: 2 additions & 2 deletions pkg/workload/workload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1677,7 +1677,7 @@ func TestNeedsSecondPass(t *testing.T) {
}

func TestWithPreprocessedDRAResources(t *testing.T) {
features.SetFeatureGateDuringTest(t, features.DynamicResourceAllocation, true)
features.SetFeatureGateDuringTest(t, features.KueueDRAIntegration, true)

cases := map[string]struct {
workload kueue.Workload
Expand Down Expand Up @@ -1798,7 +1798,7 @@ func TestWithPreprocessedDRAResources(t *testing.T) {
}

func TestWithPreprocessedDRAResourcesReplacesExtendedResources(t *testing.T) {
features.SetFeatureGateDuringTest(t, features.DynamicResourceAllocation, true)
features.SetFeatureGateDuringTest(t, features.KueueDRAIntegration, true)

cases := map[string]struct {
workload kueue.Workload
Expand Down
14 changes: 10 additions & 4 deletions site/content/en/docs/concepts/dynamic_resource_allocation.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ description: >
Quota management for workloads using Kubernetes Dynamic Resource Allocation (DRA).
---

{{% alert title="Warning" color="warning" %}}
In Kueue 0.18, the DRA feature gates were renamed to avoid conflicts with upstream
Kubernetes feature gates: `DynamicResourceAllocation` is now `KueueDRAIntegration`,
and `DRAExtendedResources` is now `KueueDRAIntegrationExtendedResource`.
{{% /alert %}}

## Dynamic Resource Allocation

[Dynamic Resource Allocation (DRA)](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/)
Expand Down Expand Up @@ -57,20 +63,20 @@ For setup instructions, see

When a Pod requests an extended resource backed by DRA (e.g.,
`nvidia.com/gpu: 1`), the kube-scheduler auto-creates a `ResourceClaim`.
Without the `DRAExtendedResources` feature gate enabled, Kueue would charge
Without the `KueueDRAIntegrationExtendedResource` feature gate enabled, Kueue would charge
quota for both the `resources.requests` entry **and** the auto-created claim,
double counting the same device.

With `DRAExtendedResources` enabled, Kueue detects the matching `DeviceClass`,
With `KueueDRAIntegrationExtendedResource` enabled, Kueue detects the matching `DeviceClass`,
uses `extendedResourceName` as the quota key, and drops the auto-created claim
from accounting. No `deviceClassMappings` configuration is needed — the
mapping is discovered from the `DeviceClass` automatically.

{{% alert title="Note" color="info" %}}
The extended resource path additionally requires the Kubernetes
`DRAExtendedResource` feature gate on kube-apiserver and kube-scheduler
(alpha in Kubernetes 1.34), in addition to Kueue's `DynamicResourceAllocation`
and `DRAExtendedResources` feature gates.
(alpha in Kubernetes 1.34), in addition to Kueue's `KueueDRAIntegration`
and `KueueDRAIntegrationExtendedResource` feature gates.
{{% /alert %}}

## Path separation
Expand Down
Loading