diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/bandwidth-operator/dist/chart/Chart.yaml b/bandwidth-operator/dist/chart/Chart.yaml index 02a0cb0..5df5806 100644 --- a/bandwidth-operator/dist/chart/Chart.yaml +++ b/bandwidth-operator/dist/chart/Chart.yaml @@ -2,9 +2,9 @@ apiVersion: v2 name: vaclab-bandwidth-operator description: Vaclab Kubernetes operator for bandwidth-aware workload scheduling in Vaclab environments type: application -version: 0.1.8 -appVersion: "0.1.8" -icon: "https://example.com/icon.png" +version: 0.2.1 +appVersion: "0.2.1" +icon: "https://raw.githubusercontent.com/vacp2p/vaclab-2/feat/add_lab_components/extras/vac-logo-light-no-bg.png" keywords: - kubernetes - operator diff --git a/bandwidth-operator/dist/chart/templates/manager/manager.yaml b/bandwidth-operator/dist/chart/templates/manager/manager.yaml index 171ccc1..4136673 100644 --- a/bandwidth-operator/dist/chart/templates/manager/manager.yaml +++ b/bandwidth-operator/dist/chart/templates/manager/manager.yaml @@ -41,7 +41,7 @@ spec: {{- end }} command: - /manager - image: katakuri100/vaclab-bandwidth-operator:v0.1.8 + image: katakuri100/vaclab-bandwidth-operator:v0.2.1 imagePullPolicy: IfNotPresent livenessProbe: httpGet: diff --git a/bandwidth-operator/internal/controller/helpers.go b/bandwidth-operator/internal/controller/helpers.go index 477eca5..7feda91 100644 --- a/bandwidth-operator/internal/controller/helpers.go +++ b/bandwidth-operator/internal/controller/helpers.go @@ -61,10 +61,7 @@ func (r *BandwidthReconciler) setDefaultBandwidthValues(bandwidth *networkingv1. func (r *BandwidthReconciler) UpdateBandwidthStatus(ctx context.Context, bandwidth *networkingv1.Bandwidth) error { // UpdatedAt is set by caller only when status actually changes err := r.Status().Update(ctx, bandwidth) - // If conflict, return without error to allow retry - if errors.IsConflict(err) { - return nil - } + // Return conflict errors so controller can requeue return err } @@ -173,22 +170,36 @@ func (r *BandwidthReconciler) createBandwidthResourcesForAllNodes(ctx context.Co return err } - // Now update the status after creation + // Now update the status after creation with retry on conflict // Re-get the object to get the latest version - if err := r.Get(ctx, types.NamespacedName{Name: nodeName}, &bw); err != nil { - return err - } - - bw.Status.Status = networkingv1.Created - bw.Status.Capacity = bw.Spec.Capacity - bw.Status.UpdatedAt = metav1.NewTime(time.Now()) + maxRetries := 3 + for attempt := 0; attempt < maxRetries; attempt++ { + if err := r.Get(ctx, types.NamespacedName{Name: nodeName}, &bw); err != nil { + if attempt == maxRetries-1 { + // Give up on Get failures, let reconcile loop handle it + break + } + time.Sleep(100 * time.Millisecond) + continue + } - if err := r.UpdateBandwidthStatus(ctx, &bw); err != nil { - return err + bw.Status.Status = networkingv1.Created + bw.Status.Capacity = bw.Spec.Capacity + bw.Status.UpdatedAt = metav1.NewTime(time.Now()) + + if err := r.Status().Update(ctx, &bw); err != nil { + if errors.IsConflict(err) && attempt < maxRetries-1 { + // Retry on conflict + time.Sleep(100 * time.Millisecond) + continue + } + // On last attempt or non-conflict error, let reconcile loop handle it + break + } + // Success + r.generateEvent(networkingv1.EventVaclabNodeBandwidthCreated, &bw) + break } - - // Generate event - r.generateEvent(networkingv1.EventVaclabNodeBandwidthCreated, &bw) } return nil diff --git a/bandwidth-operator/internal/controller/setup_bandwidth.go b/bandwidth-operator/internal/controller/setup_bandwidth.go index a48df88..7c120ae 100644 --- a/bandwidth-operator/internal/controller/setup_bandwidth.go +++ b/bandwidth-operator/internal/controller/setup_bandwidth.go @@ -155,6 +155,31 @@ func (r *BandwidthReconciler) SetupBandwidthResource(ctx context.Context, bandwi }) } + } else { + // pod not created yet, in pendning state, freshly reserved by the scheduler + // during reserve stage, should have requested bandwidth defined in the request and pod uid + // should be taken into account for capacity calculation because scheduler will remove it in case of scheduling failure + if !strings.EqualFold(req.PodUid, "") && (req.Bandwidth.UlMbps > 0 || req.Bandwidth.DlMbps > 0) { + usedUlNetwork += req.Bandwidth.UlMbps + usedDlNetwork += req.Bandwidth.DlMbps + usedUlLocal += req.Bandwidth.UlMbps + usedDlLocal += req.Bandwidth.DlMbps + reservationInfo = append(reservationInfo, networkingv1.ReservationInfo{ + PodName: req.PodName, + PodUid: req.PodUid, + Bandwidth: networkingv1.Capacity{ + Network: networkingv1.BandwidthDefinition{ + UlMbps: req.Bandwidth.UlMbps, + DlMbps: req.Bandwidth.DlMbps, + }, + Local: networkingv1.BandwidthDefinition{ + UlMbps: req.Bandwidth.UlMbps, + DlMbps: req.Bandwidth.DlMbps, + }, + }, + Namespace: req.Namespace, + }) + } } } usedCapacity.Local = networkingv1.BandwidthDefinition{ @@ -191,91 +216,6 @@ func (r *BandwidthReconciler) SetupBandwidthResource(ctx context.Context, bandwi return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } -// used only when manually changing the spec of an existing Bandwidth resource -// only max capacity changes are allowed manually -func (r *BandwidthReconciler) CheckAndUpdateBandwidth(ctx context.Context, bandwidth *networkingv1.Bandwidth) (ctrl.Result, error) { - log := log.FromContext(ctx) - log.Info("watching vaclab Bandwidth resource for updates") - - nodeName := bandwidth.Spec.Node - bwName := bandwidth.Name - //make sure node exists and bw resource is valid - var node corev1.Node - nodeErr := r.Get(ctx, types.NamespacedName{Name: nodeName}, &node) - if nodeErr != nil { - if apierrors.IsNotFound(nodeErr) { - // Node gone: need to stop and clean up any existing bandwidth resource - log.Info("Node not found, cleaning up Bandwidth resource", "node", nodeName) - var bw networkingv1.Bandwidth - if err := r.Get(ctx, types.NamespacedName{Name: nodeName}, &bw); err == nil { - _ = r.Delete(ctx, &bw) // ignore error - } - bandwidth.Status.Status = networkingv1.Error - bandwidth.Status.ErrorReason = "node not found" - r.generateEvent(networkingv1.EventVaclabNodeBandwidthFailed, bandwidth) - return ctrl.Result{}, nil - } - bandwidth.Status.Status = networkingv1.Error - bandwidth.Status.ErrorReason = "unable to fetch node information" - log.Info("Node not found, cleaning up Bandwidth resource", "node", nodeName) - var bw networkingv1.Bandwidth - if err := r.Get(ctx, types.NamespacedName{Name: nodeName}, &bw); err == nil { - _ = r.Delete(ctx, &bw) // ignore error - } - r.generateEvent(networkingv1.EventVaclabNodeBandwidthFailed, bandwidth) - return ctrl.Result{}, nodeErr - } - if !strings.EqualFold(bwName, node.Name) { - log.Error(nil, "Node name mismatch", "expected", nodeName, "found", node.Name) - bandwidth.Status.Status = networkingv1.Error - bandwidth.Status.ErrorReason = "name mismatch between nodeName and bandwidthName" - log.Info("Node not found, cleaning up Bandwidth resource", "node", nodeName) - var bw networkingv1.Bandwidth - if err := r.Get(ctx, types.NamespacedName{Name: nodeName}, &bw); err == nil { - _ = r.Delete(ctx, &bw) // ignore error - } - r.generateEvent(networkingv1.EventVaclabNodeBandwidthFailed, bandwidth) - return ctrl.Result{}, nil - } - - //currentSpec := bandwidth.Spec.Requests - state := bandwidth.Status - if len(state.Reservations) != len(bandwidth.Spec.Requests) { - // changes in requests are not allowed - log.Error(nil, "Changes in bandwidth requests are not allowed after creation", "node", nodeName) - return ctrl.Result{}, nil - } - - //expectedCapacity := bandwidth.Status.Capacity - if state.Capacity != bandwidth.Spec.Capacity { - state.Capacity = bandwidth.Spec.Capacity - state.Remaining = networkingv1.Capacity{ - Local: networkingv1.BandwidthDefinition{ - UlMbps: bandwidth.Spec.Capacity.Local.UlMbps - state.Used.Local.UlMbps, - DlMbps: bandwidth.Spec.Capacity.Local.DlMbps - state.Used.Local.DlMbps, - }, - Network: networkingv1.BandwidthDefinition{ - UlMbps: bandwidth.Spec.Capacity.Network.UlMbps - state.Used.Network.UlMbps, - DlMbps: bandwidth.Spec.Capacity.Network.DlMbps - state.Used.Network.DlMbps, - }, - } - } - bandwidth.Status = state - bandwidth.Status.UpdatedAt = metav1.NewTime(time.Now()) - - if err := r.UpdateBandwidthStatus(ctx, bandwidth); err != nil { - log.Error(err, "unable to update bandwidth resource status", "node", nodeName) - return ctrl.Result{}, err - } - r.Update(ctx, bandwidth) - // Requeue to check the status later - return ctrl.Result{RequeueAfter: 10 * time.Second}, nil -} - -/*func (r *BandwidthReconciler) SyncFromSpecAndPods(ctx context.Context, bw *networkingv1.Bandwidth) (ctrl.Result, error) { - return r.syncFromSpecAndPodsWithVisited(ctx, bw, make(map[string]bool)) -}*/ - // syncSingleNodeBandwidth updates a single bandwidth resource without cascading to related nodes // Used for background updates triggered by sibling pod changes func (r *BandwidthReconciler) syncSingleNodeBandwidth(ctx context.Context, bw *networkingv1.Bandwidth) error { @@ -335,12 +275,34 @@ func (r *BandwidthReconciler) syncSingleNodeBandwidth(ctx context.Context, bw *n for _, req := range bwRequests { pod, exists := podsWithBw[req.PodUid] + foundUnknown := false if !exists { - log.V(1).Info("dropping request for missing pod", "podUid", req.PodUid, "podName", req.PodName) - continue - } + // Try to get pod status using client and pod UID + // keep the request if pod is pending (scheduled but not created yet) + var unknownPod corev1.Pod + if err := r.Get(ctx, types.NamespacedName{Name: req.PodName, Namespace: req.Namespace}, &unknownPod); err != nil || string(unknownPod.UID) != req.PodUid { + log.V(1).Info("dropping request for missing pod", "podUid", req.PodUid, "podName", req.PodName) + continue + } - ul, dl := r.GetBandwidthFromAnnotation(pod) + if unknownPod.DeletionTimestamp != nil || unknownPod.Status.Phase == corev1.PodSucceeded || unknownPod.Status.Phase == corev1.PodFailed { + log.V(1).Info("dropping request for terminating/completed pod", "podUid", req.PodUid, "podName", req.PodName) + continue + } + + if unknownPod.Status.Phase != corev1.PodPending { + log.V(1).Info("dropping request for pod without bandwidth annotation", "podUid", req.PodUid, "podName", req.PodName) + continue + } + foundUnknown = true + pod = unknownPod + } + var ul, dl int64 + if foundUnknown { + ul, dl = req.Bandwidth.UlMbps, req.Bandwidth.DlMbps + } else { + ul, dl = r.GetBandwidthFromAnnotation(pod) + } usedUlLocal += ul usedDlLocal += dl @@ -519,28 +481,44 @@ func (r *BandwidthReconciler) SyncFromSpecAndPods(ctx context.Context, bw *netwo for _, req := range bwRequests { pod, exists := podsWithBw[req.PodUid] + isPendingPod := false if !exists { - // Pod gone or no bw annotation anymore => drop request - // Check if this pod had siblings on other nodes - they need updates - if oldReq, hadRequest := existingReqs[req.PodUid]; hadRequest { - log.Info("dropping request for missing pod", "podUid", req.PodUid, "podName", req.PodName) - // If pod was part of a workload with siblings, mark related nodes as affected - // We need to fetch sibling info to know which nodes to notify - // Use a best-effort approach: get pod from old request data if possible - var oldPod corev1.Pod - if err := r.Get(ctx, types.NamespacedName{Name: oldReq.PodName, Namespace: oldReq.Namespace}, &oldPod); err == nil { - _, nodes, _ := r.allSiblingsOnCurrentNode(ctx, &oldPod, nodeName) - for _, n := range nodes { - if !strings.EqualFold(n, nodeName) { - affectedRelatedNodes[n] = true + var pendingPod corev1.Pod + if err := r.Get(ctx, types.NamespacedName{Name: req.PodName, Namespace: req.Namespace}, &pendingPod); err == nil && string(pendingPod.UID) == req.PodUid { + if pendingPod.Status.Phase == corev1.PodPending { + // Pod is pending - keep the request as is + isPendingPod = true + pod = pendingPod + } + } + if !isPendingPod { + // Pod gone or no bw annotation anymore => drop request + // Check if this pod had siblings on other nodes - they need updates + if oldReq, hadRequest := existingReqs[req.PodUid]; hadRequest { + log.Info("dropping request for missing pod", "podUid", req.PodUid, "podName", req.PodName) + // If pod was part of a workload with siblings, mark related nodes as affected + // We need to fetch sibling info to know which nodes to notify + // Use a best-effort approach: get pod from old request data if possible + var oldPod corev1.Pod + if err := r.Get(ctx, types.NamespacedName{Name: oldReq.PodName, Namespace: oldReq.Namespace}, &oldPod); err == nil { + _, nodes, _ := r.allSiblingsOnCurrentNode(ctx, &oldPod, nodeName) + for _, n := range nodes { + if !strings.EqualFold(n, nodeName) { + affectedRelatedNodes[n] = true + } } } } + continue } - continue } - ul, dl := r.GetBandwidthFromAnnotation(pod) + var ul, dl int64 + if isPendingPod { + ul, dl = req.Bandwidth.UlMbps, req.Bandwidth.DlMbps + } else { + ul, dl = r.GetBandwidthFromAnnotation(pod) + } // local bandwidth is always used // since pods are always connected to virtual bridge usedUlLocal += ul @@ -707,6 +685,18 @@ func (r *BandwidthReconciler) SyncFromSpecAndPods(ctx context.Context, bw *netwo return ctrl.Result{}, err } log.Info("bandwidth spec updated", "node", nodeName, "requests", len(cleanRequests)) + + // Re-fetch to get updated resourceVersion after spec update + if err := r.Get(ctx, client.ObjectKeyFromObject(bw), bw); err != nil { + return ctrl.Result{}, err + } + // Re-apply status fields since we just fetched fresh object + bw.Status.Used = usedCapacity + bw.Status.Remaining = remainingCapacity + bw.Status.Reservations = reservationInfo + bw.Status.Capacity = bw.Spec.Capacity + bw.Status.Status = networkingv1.Created + bw.Status.ErrorReason = "" } // Only update status if it actually changed diff --git a/bandwidth-operator/vaclab-bandwidth-operator-0.1.9.tgz b/bandwidth-operator/vaclab-bandwidth-operator-0.1.9.tgz new file mode 100644 index 0000000..e323a9e Binary files /dev/null and b/bandwidth-operator/vaclab-bandwidth-operator-0.1.9.tgz differ diff --git a/bandwidth-operator/vaclab-bandwidth-operator-0.2.0.tgz b/bandwidth-operator/vaclab-bandwidth-operator-0.2.0.tgz new file mode 100644 index 0000000..e6ee237 Binary files /dev/null and b/bandwidth-operator/vaclab-bandwidth-operator-0.2.0.tgz differ diff --git a/bandwidth-operator/vaclab-bandwidth-operator-0.2.1.tgz b/bandwidth-operator/vaclab-bandwidth-operator-0.2.1.tgz new file mode 100644 index 0000000..437dd76 Binary files /dev/null and b/bandwidth-operator/vaclab-bandwidth-operator-0.2.1.tgz differ diff --git a/charts/index.yaml b/charts/index.yaml new file mode 100644 index 0000000..e47e7cc --- /dev/null +++ b/charts/index.yaml @@ -0,0 +1,99 @@ +apiVersion: v1 +entries: + vaclab-bandwidth-operator: + - apiVersion: v2 + appVersion: 0.2.1 + created: "2026-01-17T13:31:32.611407179Z" + description: Vaclab Kubernetes operator for bandwidth-aware workload scheduling + in Vaclab environments + digest: 8f9473c32763045645dda2737d268bf26874e9cf1755810cfb4c7d847f0e49d3 + icon: https://raw.githubusercontent.com/vacp2p/vaclab-2/feat/add_lab_components/extras/vac-logo-light-no-bg.png + keywords: + - kubernetes + - operator + - bandwidth + - networking + - scheduling + - vac + - DST + - IFT + - vaclab + maintainers: + - email: mamoutou@vac.dev + name: VAC DST Team + name: vaclab-bandwidth-operator + type: application + urls: + - https://vacp2p.github.io/vaclab-k8s-plugins/charts/vaclab-bandwidth-operator-0.2.1.tgz + version: 0.2.1 + - apiVersion: v2 + appVersion: 0.2.0 + created: "2026-01-17T13:31:32.610435104Z" + description: Vaclab Kubernetes operator for bandwidth-aware workload scheduling + in Vaclab environments + digest: 80b1b956d1a30c643ad1ed7eefd194c72ac2c69f4cc771a829e8d7dc0fda335e + icon: https://raw.githubusercontent.com/vacp2p/vaclab-2/feat/add_lab_components/extras/vac-logo-light-no-bg.png + keywords: + - kubernetes + - operator + - bandwidth + - networking + - scheduling + - vac + - DST + - IFT + - vaclab + maintainers: + - email: mamoutou@vac.dev + name: VAC DST Team + name: vaclab-bandwidth-operator + type: application + urls: + - https://vacp2p.github.io/vaclab-k8s-plugins/charts/vaclab-bandwidth-operator-0.2.0.tgz + version: 0.2.0 + - apiVersion: v2 + appVersion: 0.1.9 + created: "2026-01-17T13:31:32.609233024Z" + description: Vaclab Kubernetes operator for bandwidth-aware workload scheduling + in Vaclab environments + digest: bf5be250bc03b156488e86412e1a698e877ce502282db6d16bd0b6d257159484 + icon: https://raw.githubusercontent.com/vacp2p/vaclab-2/feat/add_lab_components/extras/vac-logo-light-no-bg.png + keywords: + - kubernetes + - operator + - bandwidth + - networking + - scheduling + - vac + - DST + - IFT + - vaclab + maintainers: + - email: mamoutou@vac.dev + name: VAC DST Team + name: vaclab-bandwidth-operator + type: application + urls: + - https://vacp2p.github.io/vaclab-k8s-plugins/charts/vaclab-bandwidth-operator-0.1.9.tgz + version: 0.1.9 + vaclab-scheduler: + - apiVersion: v2 + appVersion: 0.30.7 + created: "2026-01-17T13:31:32.612387709Z" + description: deploy vaclab bandwidth-aware scheduler plugin as a second scheduler + in cluster + digest: 12b7dc35ae94178dc57dc483c6510e04681725bc9c7faae2240d02175d952683 + keywords: + - scheduler-plugins + - vaclab + - scheduler + - kubernetes + maintainers: + - email: mamoutou@vac.dev + name: mamoutou + name: vaclab-scheduler + type: application + urls: + - https://vacp2p.github.io/vaclab-k8s-plugins/charts/vaclab-scheduler-0.30.7.tgz + version: 0.30.7 +generated: "2026-01-17T13:31:32.607222621Z" diff --git a/charts/vaclab-bandwidth-operator-0.1.9.tgz b/charts/vaclab-bandwidth-operator-0.1.9.tgz new file mode 100644 index 0000000..e323a9e Binary files /dev/null and b/charts/vaclab-bandwidth-operator-0.1.9.tgz differ diff --git a/charts/vaclab-bandwidth-operator-0.2.0.tgz b/charts/vaclab-bandwidth-operator-0.2.0.tgz new file mode 100644 index 0000000..e6ee237 Binary files /dev/null and b/charts/vaclab-bandwidth-operator-0.2.0.tgz differ diff --git a/charts/vaclab-bandwidth-operator-0.2.1.tgz b/charts/vaclab-bandwidth-operator-0.2.1.tgz new file mode 100644 index 0000000..437dd76 Binary files /dev/null and b/charts/vaclab-bandwidth-operator-0.2.1.tgz differ diff --git a/charts/vaclab-scheduler-0.30.7.tgz b/charts/vaclab-scheduler-0.30.7.tgz new file mode 100644 index 0000000..207b70a Binary files /dev/null and b/charts/vaclab-scheduler-0.30.7.tgz differ diff --git a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/Chart.yaml b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/Chart.yaml index 1ff781e..2419a58 100644 --- a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/Chart.yaml +++ b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/Chart.yaml @@ -15,12 +15,12 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.30.6 +version: 0.30.7 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -appVersion: 0.30.6 +appVersion: 0.30.7 keywords: - scheduler-plugins - vaclab diff --git a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml index 7baad5e..be0d7d0 100644 --- a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml +++ b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml @@ -26,7 +26,7 @@ data: score: enabled: - name: VaclabScheduling - weight: 10 + weight: {{ .Values.plugins.scoreWeight | default 100 }} reserve: enabled: - name: VaclabScheduling diff --git a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml index c41a1ad..bbcff99 100644 --- a/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml +++ b/scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml @@ -4,7 +4,7 @@ scheduler: name: vaclab-scheduler - image: katakuri100/vaclab-scheduler:v0.30.6 + image: katakuri100/vaclab-scheduler:v0.30.6-1 command: - /bin/kube-scheduler replicaCount: 1 @@ -23,7 +23,7 @@ scheduler: controller: name: vaclab-scheduler-controller - image: katakuri100/vaclab-scheduler-controller:v0.30.6 + image: katakuri100/vaclab-scheduler-controller:v0.30.6-1 replicaCount: 1 leaderElect: false priorityClassName: "" @@ -44,6 +44,9 @@ controller: plugins: enabled: ["VaclabScheduling"] disabled: [] # only in-tree plugins need to be defined here + # Score weight for VaclabScheduling plugin (higher = more influence) + # Default 100 makes bandwidth the dominant factor in scheduling decisions + scoreWeight: 100 # Customize the enabled plugins' config. # Refer to the "pluginConfig" section of manifests//scheduler-config.yaml. diff --git a/scheduler-plugins/manifests/install/charts/vaclab-scheduler-0.30.7.tgz b/scheduler-plugins/manifests/install/charts/vaclab-scheduler-0.30.7.tgz new file mode 100644 index 0000000..207b70a Binary files /dev/null and b/scheduler-plugins/manifests/install/charts/vaclab-scheduler-0.30.7.tgz differ diff --git a/scheduler-plugins/pkg/vaclabscheduling/vaclab_scheduling.go b/scheduler-plugins/pkg/vaclabscheduling/vaclab_scheduling.go index 3fe6ee3..b2a0a73 100644 --- a/scheduler-plugins/pkg/vaclabscheduling/vaclab_scheduling.go +++ b/scheduler-plugins/pkg/vaclabscheduling/vaclab_scheduling.go @@ -295,7 +295,8 @@ func (v *VaclabScheduling) NormalizeScore(ctx context.Context, state *framework. return framework.NewStatus(framework.Success, "") } -// Reserve logs the intent to reserve bandwidth (actual reservation happens in PostBind) +// Reserve updates the bandwidth CRD to reserve bandwidth for the pod +// This happens before binding to avoid race conditions func (v *VaclabScheduling) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { // Get cached bandwidth requirements from PreFilter s, err := getPreFilterState(state) @@ -309,44 +310,63 @@ func (v *VaclabScheduling) Reserve(ctx context.Context, state *framework.CycleSt return framework.NewStatus(framework.Success, "") } - // Just log the intent - actual CRD update happens in PostBind after pod exists - klog.V(3).InfoS("Reserved bandwidth intent for pod", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) + // Update CRD to reserve bandwidth (before binding) + if err := v.bandwidth.AddBandwidthRequest(ctx, nodeName, pod.Name, pod.Namespace, string(pod.UID), s.ulMbps, s.dlMbps); err != nil { + klog.ErrorS(err, "Failed to reserve bandwidth in CRD", "node", nodeName, "pod", pod.Name) + return framework.NewStatus(framework.Error, fmt.Sprintf("failed to reserve bandwidth: %v", err)) + } + + klog.InfoS("Reserved bandwidth in CRD", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) return framework.NewStatus(framework.Success, "") } -// Unreserve is a no-op since we don't modify CRD in Reserve phase +// Unreserve releases the bandwidth reservation if binding fails +// This ensures we clean up CRD modifications made in Reserve func (v *VaclabScheduling) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { - // No-op: we don't add to CRD in Reserve, so nothing to remove - klog.V(4).InfoS("Unreserve called (no-op)", "node", nodeName, "pod", pod.Name) -} - -// PreBind is called before binding the pod (final validation) -func (v *VaclabScheduling) PreBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { - // Final check that bandwidth is still available - ulMbps, dlMbps, err := v.bandwidth.ExtractPodBandwidthRequirement(ctx, pod) + // Get cached bandwidth requirements from PreFilter + s, err := getPreFilterState(state) if err != nil { - return framework.NewStatus(framework.Error, fmt.Sprintf("failed to extract bandwidth: %v", err)) + klog.ErrorS(err, "Failed to get prefilter state in Unreserve", "pod", pod.Name) + return } - if ulMbps == 0 && dlMbps == 0 { - return framework.NewStatus(framework.Success, "") + // Skip if no bandwidth was reserved + if s.ulMbps == 0 && s.dlMbps == 0 { + return + } + + // Remove bandwidth request from CRD (cleanup after failed binding) + if err := v.bandwidth.RemoveBandwidthRequest(ctx, nodeName, string(pod.UID)); err != nil { + klog.ErrorS(err, "Failed to unreserve bandwidth in CRD", "node", nodeName, "pod", pod.Name) + return } - // Final check that bandwidth is still available - sufficient, err := v.bandwidth.HasSufficientBandwidth(ctx, nodeName, ulMbps, dlMbps) + klog.InfoS("Unreserved bandwidth in CRD", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) +} + +// PreBind is called before binding the pod +// Bandwidth was already reserved in Reserve phase, so this just logs +func (v *VaclabScheduling) PreBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { + // Get cached bandwidth requirements from PreFilter + s, err := getPreFilterState(state) if err != nil { - return framework.NewStatus(framework.Error, fmt.Sprintf("failed to verify bandwidth: %v", err)) + klog.ErrorS(err, "Failed to get prefilter state in PreBind", "pod", pod.Name) + // Don't fail binding for state read errors + return framework.NewStatus(framework.Success, "") } - if !sufficient { - return framework.NewStatus(framework.Unschedulable, "insufficient bandwidth at prebind") + // Skip if no bandwidth required + if s.ulMbps == 0 && s.dlMbps == 0 { + return framework.NewStatus(framework.Success, "") } - klog.V(3).InfoS("PreBind bandwidth check passed", "node", nodeName, "pod", pod.Name) + // Bandwidth was already validated and reserved in Reserve phase + klog.V(3).InfoS("PreBind: bandwidth already reserved", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) return framework.NewStatus(framework.Success, "") } -// PostBind adds the bandwidth request to CRD after pod is successfully bound +// PostBind is called after pod is successfully bound +// Bandwidth was already reserved in Reserve phase, so this just logs success func (v *VaclabScheduling) PostBind(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) { // Get cached bandwidth requirements from PreFilter s, err := getPreFilterState(state) @@ -360,11 +380,6 @@ func (v *VaclabScheduling) PostBind(ctx context.Context, state *framework.CycleS return } - // Now add bandwidth request to the node's Bandwidth CRD (pod exists now) - if err := v.bandwidth.AddBandwidthRequest(ctx, nodeName, pod.Name, pod.Namespace, string(pod.UID), s.ulMbps, s.dlMbps); err != nil { - klog.ErrorS(err, "Failed to add bandwidth request in PostBind", "node", nodeName, "pod", pod.Name) - return - } - - klog.InfoS("Added bandwidth request to CRD in PostBind", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) + // Bandwidth was already reserved in Reserve phase, just log success + klog.InfoS("Pod successfully bound with bandwidth reservation", "node", nodeName, "pod", pod.Name, "ul", s.ulMbps, "dl", s.dlMbps) }