Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
package node

import (
"context"
"path/filepath"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"

Expand Down Expand Up @@ -157,4 +164,151 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
e2e.Logf("/dev/fuse mount output: %s", output)
Comment thread
BhargaviGudi marked this conversation as resolved.
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})

//author: bgudi@redhat.com
//migrated from openshift-tests-private
//automates: https://issues.redhat.com/browse/OCPBUGS-15035
g.It("[OTP] node's drain should block when PodDisruptionBudget minAvailable equals 100 percentage and selector is empty [Disruptive] [OCP-67564]", ote.Informing(), func() {
ctx := context.Background()

// Skip on SNO/External topologies where there might not be dedicated worker nodes
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get cluster infrastructure")
if infra.Status.ControlPlaneTopology == "SingleReplica" || infra.Status.ControlPlaneTopology == "External" {
g.Skip("Skipping on SNO/External topology - requires dedicated worker nodes")
}

oc.SetupProject()
namespace := oc.Namespace()

g.By("Create a deployment with 6 replicas")
replicas := int32(6)
deployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "hello-openshift",
Namespace: namespace,
Labels: map[string]string{
"app": "myapp",
},
},
Spec: appsv1.DeploymentSpec{
Replicas: &replicas,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "myapp",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: "myapp",
Labels: map[string]string{
"app": "myapp",
},
},
Spec: corev1.PodSpec{
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "myapp",
Image: "quay.io/openshifttest/hello-openshift@sha256:4200f438cf2e9446f6bcff9d67ceea1f69ed07a2f83363b7fb52529f7ddd8a83",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
},
},
},
},
},
}
_, err = oc.KubeClient().AppsV1().Deployments(namespace).Create(ctx, deployment, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create deployment")
g.DeferCleanup(oc.KubeClient().AppsV1().Deployments(namespace).Delete, ctx, "hello-openshift", metav1.DeleteOptions{})

g.By("Wait for deployment to be ready")
err = wait.PollUntilContextTimeout(ctx, 3*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
deploy, pollErr := oc.KubeClient().AppsV1().Deployments(namespace).Get(ctx, "hello-openshift", metav1.GetOptions{})
if pollErr != nil {
e2e.Logf("Error getting deployment: %v", pollErr)
return false, nil
}
if deploy.Status.ReadyReplicas == replicas {
e2e.Logf("Deployment is ready with %d replicas", deploy.Status.ReadyReplicas)
return true, nil
}
e2e.Logf("Waiting for deployment, ready replicas: %d/%d", deploy.Status.ReadyReplicas, replicas)
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "deployment did not become ready")

g.By("Create PodDisruptionBudget with 100% minAvailable")
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pdb",
Namespace: namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{
Type: intstr.String,
StrVal: "100%",
},
Selector: &metav1.LabelSelector{},
},
}
_, err = oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Create(ctx, pdb, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create PodDisruptionBudget")
g.DeferCleanup(oc.KubeClient().PolicyV1().PodDisruptionBudgets(namespace).Delete, ctx, "my-pdb", metav1.DeleteOptions{})

g.By("Get a single worker node")
workerNode, err := nodeutils.GetSingleWorkerNode(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get worker node")
e2e.Logf("Selected worker node: %s", workerNode)

g.By("Obtain the pods running on the selected worker node")
podsInWorker, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods on worker node")
o.Expect(len(strings.Fields(podsInWorker))).Should(o.BeNumerically(">", 0), "no pods found on worker node")

g.By("Make sure that PDB's DisruptionAllowed condition is False")
var pdbStatus string
err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 30*time.Second, true, func(pollCtx context.Context) (bool, error) {
var pollErr error
pdbStatus, pollErr = oc.AsAdmin().WithoutNamespace().Run("get").Args("poddisruptionbudget", "my-pdb", "-n", namespace, "-o=jsonpath={.status.conditions[?(@.type==\"DisruptionAllowed\")].status}").Output()
if pollErr != nil {
e2e.Logf("Error getting PDB status: %v", pollErr)
return false, nil
}
if pdbStatus != "" {
return true, nil
}
e2e.Logf("Waiting for PDB DisruptionAllowed condition to appear")
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "PDB DisruptionAllowed condition not found")
o.Expect(pdbStatus).Should(o.Equal("False"), "PDB DisruptionAllowed should be False")

g.By("Drain the selected worker node")
g.DeferCleanup(func() {
err := nodeutils.WaitClusterOperatorAvailable(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "cluster operators failed to return to available state after node drain")
})
g.DeferCleanup(oc.AsAdmin().WithoutNamespace().Run("adm").Args("uncordon", workerNode).Execute)

out, err := oc.AsAdmin().WithoutNamespace().Run("adm").Args("drain", workerNode, "--ignore-daemonsets", "--delete-emptydir-data", "--timeout=30s").Output()
o.Expect(err).To(o.HaveOccurred(), "drain operation should have been blocked but it wasn't")
o.Expect(strings.Contains(out, "Cannot evict pod as it would violate the pod's disruption budget")).Should(o.BeTrue(), "drain output missing PDB violation error message")
o.Expect(strings.Contains(out, "There are pending nodes to be drained")).Should(o.BeTrue(), "drain output missing pending nodes error message")

g.By("Verify that the pods were not drained from the node")
podsAfterDrain, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("pods", "-n", namespace, "-o=jsonpath={.items[?(@.spec.nodeName=='"+workerNode+"')].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get pods after drain attempt")
o.Expect(podsInWorker).Should(o.BeIdenticalTo(podsAfterDrain), "pods should not have been evicted from the node")
})
})
42 changes: 42 additions & 0 deletions test/extended/node/node_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,3 +741,45 @@ func ensureDropInDirectoryExists(ctx context.Context, oc *exutil.CLI, dirPath st

return nil
}

// GetSingleWorkerNode returns the name of a single worker node
func GetSingleWorkerNode(ctx context.Context, oc *exutil.CLI) (string, error) {
nodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker")
if err != nil {
return "", err
}
if len(nodes) == 0 {
return "", fmt.Errorf("no worker nodes found")
}
framework.Logf("Worker Node Name is %v", nodes[0].Name)
return nodes[0].Name, nil
}

// WaitClusterOperatorAvailable waits for all cluster operators to be available
func WaitClusterOperatorAvailable(ctx context.Context, oc *exutil.CLI) error {
timeout := 30 * time.Minute

waitErr := wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
availableCOStatus, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("clusteroperator", "-o=jsonpath={.items[*].status.conditions[?(@.type==\"Available\")].status}").Output()
if err != nil {
framework.Logf("Error getting cluster operators: %v", err)
return false, nil
}
if availableCOStatus == "" {
framework.Logf("No cluster operator status found")
return false, nil
}
statuses := strings.Fields(availableCOStatus)
for _, status := range statuses {
if status != "True" {
framework.Logf("Some Cluster Operator is still Unavailable")
return false, nil
}
}
return true, nil
})
if waitErr != nil {
return fmt.Errorf("some cluster operator is still unavailable after timeout: %w", waitErr)
}
return nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.