Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/extended/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
- **node_e2e/node.go** - PodDisruptionBudget drain blocking (OCP-67564) - Tests that node drain is blocked when PDB has minAvailable=100% with empty selector [Disruptive] [Lifecycle:informing]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's maintainable to list all test cases here.
Probably we should group them to some extent.
This can be followed up though.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been organic so far. I think it has reached a point where it does need to be more structured. Definitely a followup item.


## Directory Structure

Expand Down
152 changes: 152 additions & 0 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@ import (

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/origin/test/extended/imagepolicy"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
utilrand "k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
Expand Down Expand Up @@ -164,6 +169,153 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
e2e.Logf("/dev/fuse mount output: %s", output)
Comment thread
BhargaviGudi marked this conversation as resolved.
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})

//author: bgudi@redhat.com
//migrated from openshift-tests-private
//automates: https://issues.redhat.com/browse/OCPBUGS-15035
g.It("[OTP] node's drain should block when PodDisruptionBudget minAvailable equals 100 percentage and selector is empty [Disruptive] [OCP-67564]", ote.Informing(), func() {
ctx := context.Background()

// Skip on SNO/External topologies where there might not be dedicated worker nodes
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get cluster infrastructure")
if infra.Status.ControlPlaneTopology == "SingleReplica" || infra.Status.ControlPlaneTopology == "External" {
g.Skip("Skipping on SNO/External topology - requires dedicated worker nodes")
}

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create a deployment with 6 replicas")
replicas := int32(6)
deployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "hello-openshift",
Namespace: namespace,
Labels: map[string]string{
"app": "myapp",
},
},
Spec: appsv1.DeploymentSpec{
Replicas: &replicas,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "myapp",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: "myapp",
Labels: map[string]string{
"app": "myapp",
},
},
Spec: corev1.PodSpec{
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks really weird...
if golang is new enough, you can write in this way, or you can use ptr.To()

Suggested change
RunAsNonRoot: &[]bool{true}[0],
RunAsNonRoot: new(true),

SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "myapp",
Image: "quay.io/openshifttest/hello-openshift@sha256:4200f438cf2e9446f6bcff9d67ceea1f69ed07a2f83363b7fb52529f7ddd8a83",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
AllowPrivilegeEscalation: &[]bool{false}[0],
AllowPrivilegeEscalation: new(false),

Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
},
},
},
},
},
}
_, err = oc.KubeClient().AppsV1().Deployments(namespace).Create(ctx, deployment, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create deployment")
g.DeferCleanup(oc.KubeClient().AppsV1().Deployments(namespace).Delete, ctx, "hello-openshift", metav1.DeleteOptions{})

g.By("Wait for deployment to be ready")
err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
deploy, pollErr := oc.KubeClient().AppsV1().Deployments(namespace).Get(ctx, "hello-openshift", metav1.GetOptions{})
if pollErr != nil {
e2e.Logf("Error getting deployment: %v", pollErr)
return false, nil
}
if deploy.Status.ReadyReplicas == replicas {
e2e.Logf("Deployment is ready with %d replicas", deploy.Status.ReadyReplicas)
return true, nil
}
e2e.Logf("Waiting for deployment, ready replicas: %d/%d", deploy.Status.ReadyReplicas, replicas)
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "deployment did not become ready")

g.By("Create PodDisruptionBudget with 100% minAvailable")
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pdb",
Namespace: namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{
Type: intstr.String,
StrVal: "100%",
},
Selector: &metav1.LabelSelector{},
},
}
_, err = oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Create(ctx, pdb, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create PodDisruptionBudget")
g.DeferCleanup(oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Delete, ctx, "my-pdb", metav1.DeleteOptions{})

g.By("Get a single worker node")
workerNode, err := nodeutils.GetSingleWorkerNode(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get worker node")
e2e.Logf("Selected worker node: %s", workerNode)

g.By("Obtain the pods running on the selected worker node")
podsInWorker, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods on worker node")
o.Expect(len(strings.Fields(podsInWorker))).Should(o.BeNumerically(">", 0), "no pods found on worker node")

g.By("Make sure that PDB's DisruptionAllowed condition is False")
var pdbStatus string
err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 30*time.Second, true, func(pollCtx context.Context) (bool, error) {
var pollErr error
pdbStatus, pollErr = oc.AsAdmin().WithoutNamespace().Run("get").Args("poddisruptionbudget", "my-pdb", "-n", namespace, "-o=jsonpath={.status.conditions[?(@.type==\"DisruptionAllowed\")].status}").Output()
if pollErr != nil {
e2e.Logf("Error getting PDB status: %v", pollErr)
return false, nil
}
if pdbStatus != "" {
return true, nil
}
e2e.Logf("Waiting for PDB DisruptionAllowed condition to appear")
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "PDB DisruptionAllowed condition not found")
o.Expect(pdbStatus).Should(o.Equal("False"), "PDB DisruptionAllowed should be False")

g.By("Drain the selected worker node")
g.DeferCleanup(func() {
err := nodeutils.WaitClusterOperatorAvailable(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "cluster operators failed to return to available state after node drain")
})
g.DeferCleanup(oc.AsAdmin().WithoutNamespace().Run("adm").Args("uncordon", workerNode).Execute)

out, err := oc.AsAdmin().WithoutNamespace().Run("adm").Args("drain", workerNode, "--ignore-daemonsets", "--delete-emptydir-data", "--timeout=30s").Output()
o.Expect(err).To(o.HaveOccurred(), "drain operation should have been blocked but it wasn't")
o.Expect(strings.Contains(out, "Cannot evict pod as it would violate the pod's disruption budget")).Should(o.BeTrue(), "drain output missing PDB violation error message")
o.Expect(strings.Contains(out, "There are pending nodes to be drained")).Should(o.BeTrue(), "drain output missing pending nodes error message")

g.By("Verify that the pods were not drained from the node")
podsAfterDrain, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods after drain attempt")
o.Expect(podsInWorker).Should(o.BeIdenticalTo(podsAfterDrain), "pods should not have been evicted from the node")
})
})

// author: asahay@redhat.com
Expand Down
42 changes: 42 additions & 0 deletions test/extended/node/node_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,3 +764,45 @@ func GetFirstReadyWorkerNode(oc *exutil.CLI) string {
o.Expect(false).To(o.BeTrue(), "no Ready worker node found among %v", workers)
return "" // unreachable; satisfies compiler
}

// GetSingleWorkerNode returns the name of a single worker node
func GetSingleWorkerNode(ctx context.Context, oc *exutil.CLI) (string, error) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there's the same or similar function somewhere else.

nodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker")
if err != nil {
return "", err
}
if len(nodes) == 0 {
return "", fmt.Errorf("no worker nodes found")
}
framework.Logf("Worker Node Name is %v", nodes[0].Name)
return nodes[0].Name, nil
}

// WaitClusterOperatorAvailable waits for all cluster operators to be available
func WaitClusterOperatorAvailable(ctx context.Context, oc *exutil.CLI) error {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there's the same or similar function somewhere else.

timeout := 30 * time.Minute

waitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
availableCOStatus, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("clusteroperator", "-o=jsonpath={.items[*].status.conditions[?(@.type==\"Available\")].status}").Output()
if err != nil {
framework.Logf("Error getting cluster operators: %v", err)
return false, nil
}
if availableCOStatus == "" {
framework.Logf("No cluster operator status found")
return false, nil
}
statuses := strings.Fields(availableCOStatus)
for _, status := range statuses {
if status != "True" {
framework.Logf("Some Cluster Operator is still Unavailable")
return false, nil
}
}
return true, nil
})
if waitErr != nil {
return fmt.Errorf("some cluster operator is still unavailable after timeout: %w", waitErr)
}
return nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.