From 32a9a8eded4443b16505c87bc3b3217b19797c04 Mon Sep 17 00:00:00 2001
From: Nour <nurmn3m@gmail.com>
Date: Sun, 17 May 2026 19:01:24 +0300
Subject: [PATCH] Adds an example showing how to query DRA resource pool
 availability with new ResourcePoolStatusRequest API

Signed-off-by: Nour <nurmn3m@gmail.com>
---
 demo/resource-pool-status.yaml        | 95 +++++++++++++++++++++++++++
 demo/scripts/kind-cluster-config.yaml |  3 +-
 test/e2e/e2e_setup_test.go            | 49 ++++++++++++++
 test/e2e/e2e_test.go                  |  9 +++
 4 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 demo/resource-pool-status.yaml

diff --git a/demo/resource-pool-status.yaml b/demo/resource-pool-status.yaml
new file mode 100644
index 00000000..cc8e93e5
--- /dev/null
+++ b/demo/resource-pool-status.yaml
@@ -0,0 +1,95 @@
+# Example: DRA Resource Availability Visibility (ResourcePoolStatusRequest)
+#
+# Demonstrates the
+# [DRA resource pool status](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/#resource-pool-status)
+# feature.
+# For more information see
+# [KEP-5677](https://github.com/kubernetes/enhancements/issues/5677).
+#
+# This manifest is self-contained: it creates one Pod that consumes 1 GPU and a
+# cluster-scoped `ResourcePoolStatusRequest` that asks
+# `kube-controller-manager` to publish a one-shot snapshot of every pool served
+# by the example driver. The snapshot lists total / allocated / available /
+# unavailable device counts per pool.
+#
+# `ResourcePoolStatusRequest` is particularly useful for non-admin users:
+# `ResourceClaim`s are namespaced, so a user cannot ordinarily inspect claims
+# in other namespaces. A cluster-scoped `ResourcePoolStatusRequest` lets them
+# see aggregate consumption without that visibility. The driver itself needs
+# no code changes; the aggregation is computed entirely by
+# `kube-controller-manager` from existing `ResourceSlice`s and
+# `ResourceClaim`s.
+#
+# Expected: once the controller reconciles the request, `.status` reports
+# `allocatedDevices: 1` for the pool backing the consumer Pod. Inspect with:
+#   kubectl wait --for=condition=Complete \
+#     resourcepoolstatusrequest/gpu-pool-status --timeout=30s
+#   kubectl get resourcepoolstatusrequest/gpu-pool-status -o yaml
+#
+# `ResourcePoolStatusRequest` is one-shot. To refresh the snapshot after
+# launching or tearing down workloads, delete and re-create the request:
+#   kubectl delete resourcepoolstatusrequest/gpu-pool-status
+#   kubectl apply --filename=demo/resource-pool-status.yaml
+#
+# Driver requirements:
+#   Profile: gpu
+#   GPUs: 1
+#
+# Cluster requirements:
+#   Kubernetes 1.36+
+#   Feature gate: DRAResourcePoolStatus (on kube-apiserver and
+#     kube-controller-manager)
+#   API enabled: resource.k8s.io/v1alpha3
+
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: resource-pool-status
+
+---
+apiVersion: resource.k8s.io/v1
+kind: ResourceClaimTemplate
+metadata:
+  namespace: resource-pool-status
+  name: single-gpu
+spec:
+  spec:
+    devices:
+      requests:
+      - name: gpu
+        exactly:
+          deviceClassName: gpu.example.com
+
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  namespace: resource-pool-status
+  name: pod0
+  labels:
+    app: pod
+spec:
+  containers:
+  - name: ctr0
+    image: ubuntu:22.04
+    command: ["bash", "-c"]
+    args: ["export; trap 'exit 0' TERM; sleep 9999 & wait"]
+    resources:
+      claims:
+      - name: gpu
+  resourceClaims:
+  - name: gpu
+    resourceClaimTemplateName: single-gpu
+
+---
+apiVersion: resource.k8s.io/v1alpha3
+kind: ResourcePoolStatusRequest
+metadata:
+  name: gpu-pool-status
+spec:
+  driver: gpu.example.com
+  # Optional: filter to a specific pool (typically the node name).
+  # poolName: dra-example-driver-cluster-worker
+  # Optional: cap the number of pools returned. Defaults to 100, max 1000.
+  # limit: 10
diff --git a/demo/scripts/kind-cluster-config.yaml b/demo/scripts/kind-cluster-config.yaml
index 5f8dab5e..344c9026 100644
--- a/demo/scripts/kind-cluster-config.yaml
+++ b/demo/scripts/kind-cluster-config.yaml
@@ -7,6 +7,7 @@ featureGates:
   GangScheduling: true
   GenericWorkload: true
   DRAExtendedResource: true
+  DRAResourcePoolStatus: true
 containerdConfigPatches:
 # Enable CDI as described in
 # https://tags.cncf.io/container-device-interface#containerd-configuration
@@ -20,7 +21,7 @@ nodes:
     kind: ClusterConfiguration
     apiServer:
       extraArgs:
-        runtime-config: "resource.k8s.io/v1beta1=true,scheduling.k8s.io/v1alpha2=true"
+        runtime-config: "resource.k8s.io/v1beta1=true,resource.k8s.io/v1alpha3=true,scheduling.k8s.io/v1alpha2=true"
     scheduler:
       extraArgs:
         v: "1"
diff --git a/test/e2e/e2e_setup_test.go b/test/e2e/e2e_setup_test.go
index ac6a28d8..18de8cf3 100644
--- a/test/e2e/e2e_setup_test.go
+++ b/test/e2e/e2e_setup_test.go
@@ -455,6 +455,55 @@ func verifyExtendedResourceClaimStatus(ctx context.Context, namespace, podName,
 	}, checkPodLogsTimeout, checkPodLogsInterval).Should(Succeed())
 }
 
+// resourcePoolStatusRequestGVR identifies the v1alpha3 cluster-scoped resource
+var resourcePoolStatusRequestGVR = schema.GroupVersionResource{
+	Group:    "resource.k8s.io",
+	Version:  "v1alpha3",
+	Resource: "resourcepoolstatusrequests",
+}
+
+// verifyResourcePoolStatusComplete waits for the named ResourcePoolStatusRequest
+// to reach the Complete condition and asserts its first pool entry references
+// the expected driver name.
+func verifyResourcePoolStatusComplete(ctx context.Context, name, expectedDriverName string) {
+	GinkgoHelper()
+	Eventually(func(g Gomega) {
+		rpsr, err := dynamicClient.Resource(resourcePoolStatusRequestGVR).Get(ctx, name, metav1.GetOptions{})
+		g.Expect(err).NotTo(HaveOccurred(),
+			"Failed to get ResourcePoolStatusRequest %s", name)
+
+		conditions, _, err := unstructured.NestedSlice(rpsr.Object, "status", "conditions")
+		g.Expect(err).NotTo(HaveOccurred())
+		var complete bool
+		for _, c := range conditions {
+			cm, ok := c.(map[string]any)
+			if !ok {
+				continue
+			}
+			if cm["type"] == "Complete" && cm["status"] == "True" {
+				complete = true
+				break
+			}
+		}
+		g.Expect(complete).To(BeTrue(),
+			"ResourcePoolStatusRequest %s has no Complete=True condition; conditions: %v",
+			name, conditions)
+
+		pools, _, err := unstructured.NestedSlice(rpsr.Object, "status", "pools")
+		g.Expect(err).NotTo(HaveOccurred())
+		g.Expect(pools).NotTo(BeEmpty(),
+			"ResourcePoolStatusRequest %s reported no pools for driver %s",
+			name, expectedDriverName)
+
+		pool, ok := pools[0].(map[string]any)
+		g.Expect(ok).To(BeTrue(), "pool entry is not a map: %T", pools[0])
+		g.Expect(pool["driver"]).To(Equal(expectedDriverName),
+			"ResourcePoolStatusRequest %s pool driver mismatch", name)
+		g.Expect(pool["poolName"]).NotTo(BeEmpty(),
+			"ResourcePoolStatusRequest %s pool has empty poolName", name)
+	}).WithContext(ctx).WithTimeout(30 * time.Second).WithPolling(2 * time.Second).Should(Succeed())
+}
+
 // claimNewGPU verifies that a GPU is unclaimed and adds it to observedGPUs.
 func claimNewGPU(g Gomega, observedGPUs map[string]string, gpu, namespace, podName, containerName string) {
 	GinkgoHelper()
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
index d879d5d0..5d1fecbd 100644
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -161,6 +161,15 @@ var _ = Describe("Test GPU allocation", func() {
 		verifyDRAAdminAccess(ctx, namespace, pods[0], containerName, "true")
 	})
 
+	It("should publish a ResourcePoolStatusRequest snapshot for the driver", func(ctx SpecContext) {
+		drv := installDriver(ctx, DriverConfig{})
+		namespace := "resource-pool-status"
+
+		deployManifest(ctx, namespace, "resource-pool-status.yaml", drv)
+		checkPodsReadyAndRunning(ctx, namespace, []string{"pod0"})
+		verifyResourcePoolStatusComplete(ctx, "gpu-pool-status", drv.DriverName)
+	})
+
 	It("should allocate 1 GPU per pod for extended resource requests", func(ctx SpecContext) {
 		// Each parallel test must advertise its DeviceClass under a unique
 		// extended resource name so KEP-5004 reservations don't collide.