-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Expand file tree
/
Copy pathnode.go
More file actions
265 lines (232 loc) · 11.7 KB
/
node.go
File metadata and controls
265 lines (232 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
package node
import (
"context"
"path/filepath"
"strings"
"time"
g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/utils/ptr"
"github.com/openshift/origin/test/extended/imagepolicy"
nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
)
var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
var (
oc = exutil.NewCLIWithoutNamespace("node")
nodeE2EBaseDir = exutil.FixturePath("testdata", "node", "node_e2e")
podDevFuseYAML = filepath.Join(nodeE2EBaseDir, "pod-dev-fuse.yaml")
)
// Skip all tests on MicroShift clusters as MachineConfig resources are not available
g.BeforeEach(func() {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}
})
//author: asahay@redhat.com
g.It("[OTP] validate KUBELET_LOG_LEVEL", func() {
var kubeservice string
var kubelet string
var err error
g.By("Polling to check kubelet log level on ready nodes")
waitErr := wait.Poll(10*time.Second, 1*time.Minute, func() (bool, error) {
g.By("Getting all node names in the cluster")
nodeName, nodeErr := oc.AsAdmin().Run("get").Args("nodes", "-o=jsonpath={.items[*].metadata.name}").Output()
o.Expect(nodeErr).NotTo(o.HaveOccurred())
e2e.Logf("\nNode Names are %v", nodeName)
nodes := strings.Fields(nodeName)
for _, node := range nodes {
g.By("Checking if node " + node + " is Ready")
nodeStatus, statusErr := oc.AsAdmin().Run("get").Args("nodes", node, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
o.Expect(statusErr).NotTo(o.HaveOccurred())
e2e.Logf("\nNode %s Status is %s\n", node, nodeStatus)
if nodeStatus == "True" {
g.By("Checking KUBELET_LOG_LEVEL in kubelet.service on node " + node)
kubeservice, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "systemctl show kubelet.service | grep KUBELET_LOG_LEVEL")
o.Expect(err).NotTo(o.HaveOccurred())
g.By("Checking kubelet process for --v=2 flag on node " + node)
kubelet, err = nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c", "ps aux | grep [k]ubelet")
o.Expect(err).NotTo(o.HaveOccurred())
g.By("Verifying KUBELET_LOG_LEVEL is set and kubelet is running with --v=2")
if strings.Contains(kubeservice, "KUBELET_LOG_LEVEL") && strings.Contains(kubelet, "--v=2") {
e2e.Logf("KUBELET_LOG_LEVEL is 2.\n")
return true, nil
} else {
e2e.Logf("KUBELET_LOG_LEVEL is not 2.\n")
return false, nil
}
} else {
e2e.Logf("\nNode %s is not Ready, Skipping\n", node)
}
}
return false, nil
})
if waitErr != nil {
e2e.Logf("Kubelet Log level is:\n %v\n", kubeservice)
e2e.Logf("Running Process of kubelet are:\n %v\n", kubelet)
}
o.Expect(waitErr).NotTo(o.HaveOccurred(), "KUBELET_LOG_LEVEL is not expected, timed out")
})
//author: cmaurya@redhat.com
g.It("[OTP] validate cgroupv2 is default [OCP-80983]", func() {
g.By("Check cgroup version on all Ready worker nodes")
nodeNames, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("nodes", "-l", "node-role.kubernetes.io/worker", "-o=jsonpath={.items[*].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred())
workers := strings.Fields(nodeNames)
o.Expect(workers).NotTo(o.BeEmpty(), "No worker nodes found")
for _, worker := range workers {
nodeStatus, err := oc.AsAdmin().Run("get").Args("nodes", worker, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
o.Expect(err).NotTo(o.HaveOccurred())
if nodeStatus != "True" {
e2e.Logf("Skipping worker node %s (not Ready)", worker)
continue
}
cgroupV, err := nodeutils.ExecOnNodeWithChroot(oc, worker, "/bin/bash", "-c", "stat -c %T -f /sys/fs/cgroup")
o.Expect(err).NotTo(o.HaveOccurred())
e2e.Logf("cgroup version on node %s: [%v]", worker, cgroupV)
o.Expect(cgroupV).To(o.ContainSubstring("cgroup2fs"), "Node %s does not have cgroupv2", worker)
}
g.By("Changing cgroup from v2 to v1 should result in error")
output, err := oc.AsAdmin().WithoutNamespace().Run("patch").Args("nodes.config.openshift.io", "cluster", "-p", `{"spec": {"cgroupMode": "v1"}}`, "--type=merge").Output()
o.Expect(err).Should(o.HaveOccurred())
o.Expect(output).To(o.ContainSubstring("spec.cgroupMode: Unsupported value: \"v1\": supported values: \"v2\", \"\""))
})
//author: cmaurya@redhat.com
g.It("[OTP] Allow dev fuse by default in CRI-O [OCP-70987]", func() {
podName := "pod-devfuse"
ns := "devfuse-test"
// Skip on runc: io.kubernetes.cri-o.Devices annotation is only in crun's allowed_annotations.
// We query crio config directly as ContainerRuntimeConfig API misses platform-default runc.
g.By("Skip if the default runtime is runc")
node, err := oc.AsAdmin().WithoutNamespace().Run("get").Args(
"nodes", "-l", "node-role.kubernetes.io/worker", "-o=jsonpath={.items[0].metadata.name}").Output()
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(node).NotTo(o.BeEmpty())
runtime, err := nodeutils.ExecOnNodeWithChroot(oc, node, "/bin/bash", "-c",
"crio status config 2>/dev/null | awk -F'\"' '/default_runtime/{print $2}'")
o.Expect(err).NotTo(o.HaveOccurred())
if strings.TrimSpace(runtime) == "runc" {
g.Skip("Skipping: not applicable to runc runtime")
}
g.By("Create a test namespace")
err = oc.AsAdmin().WithoutNamespace().Run("create").Args("namespace", ns).Execute()
o.Expect(err).NotTo(o.HaveOccurred())
defer oc.AsAdmin().WithoutNamespace().Run("delete").Args("namespace", ns, "--ignore-not-found").Execute()
g.By("Create a pod with dev fuse annotation")
err = oc.AsAdmin().WithoutNamespace().Run("apply").Args("-f", podDevFuseYAML, "-n", ns).Execute()
o.Expect(err).NotTo(o.HaveOccurred())
g.By("Wait for pod to be ready")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
status, pollErr := oc.AsAdmin().WithoutNamespace().Run("get").Args("pod", podName, "-n", ns, "-o=jsonpath={.status.conditions[?(@.type=='Ready')].status}").Output()
if pollErr != nil {
e2e.Logf("Error polling pod status: %v", pollErr)
return false, nil
}
return status == "True", nil
})
if err != nil {
podStatus, _ := oc.AsAdmin().WithoutNamespace().Run("get").Args("pod", podName, "-n", ns, "-o=jsonpath={.status}").Output()
e2e.Logf("Pod status on timeout: %s", podStatus)
}
o.Expect(err).NotTo(o.HaveOccurred(), "pod did not become ready")
g.By("Check /dev/fuse is mounted inside the pod")
output, err := oc.AsAdmin().WithoutNamespace().Run("exec").Args(podName, "-n", ns, "--", "stat", "/dev/fuse").Output()
o.Expect(err).NotTo(o.HaveOccurred())
e2e.Logf("/dev/fuse mount output: %s", output)
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})
})
var _ = g.Describe("[Suite:openshift/disruptive-longrunning][sig-node][Disruptive] ContainerRuntimeConfig", func() {
var (
oc = exutil.NewCLIWithoutNamespace("ctrcfg")
)
g.BeforeEach(func() {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
if err != nil {
e2e.Logf("Failed to detect MicroShift cluster: %v", err)
g.Skip("Skipping: unable to determine cluster type")
}
if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}
})
// Validates that ContainerRuntimeConfig pidsLimit setting is correctly applied
// by MCO and that manual crio.conf edits are overwritten during rollout.
//author: cmaurya@redhat.com
g.It("[OTP] Verify ContainerRuntimeConfig pidsLimit and conmon setting [OCP-45351]", func() {
ctrcfgName := "set-pids-limit"
g.By("Get a ready worker node")
workers, err := exutil.GetReadySchedulableWorkerNodes(context.Background(), oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get ready schedulable worker nodes")
o.Expect(workers).NotTo(o.BeEmpty(), "No Ready worker nodes found")
workerNode := workers[0].Name
g.By("Make a manual change to crio.conf on node " + workerNode)
_, err = nodeutils.ExecOnNodeWithChroot(oc, workerNode,
"/bin/bash", "-c", `sed -i '/^\[crio\.runtime\]/a log_level = "debug"' /etc/crio/crio.conf`)
o.Expect(err).NotTo(o.HaveOccurred(), "failed to edit crio.conf on node %s", workerNode)
g.By("Verify the manual crio.conf edit took effect")
editedConf, err := nodeutils.ExecOnNodeWithChroot(oc, workerNode,
"cat", "/etc/crio/crio.conf")
o.Expect(err).NotTo(o.HaveOccurred(), "failed to read crio.conf on node %s", workerNode)
o.Expect(editedConf).To(o.ContainSubstring(`log_level = "debug"`),
"sed edit did not apply: expected log_level = debug in crio.conf")
g.By("Label worker MachineConfigPool with custom-crio=high-pid-limit")
err = oc.AsAdmin().WithoutNamespace().Run("label").Args(
"machineconfigpool", "worker", "custom-crio=high-pid-limit").Execute()
o.Expect(err).NotTo(o.HaveOccurred(), "failed to label worker MachineConfigPool")
g.DeferCleanup(func() {
g.By("Cleanup: delete ContainerRuntimeConfig and remove MCP label")
cleanupSpec := imagepolicy.GetMCPCurrentSpecConfigName(oc, "worker")
cleanupErr := oc.MachineConfigurationClient().MachineconfigurationV1().ContainerRuntimeConfigs().Delete(
context.Background(), ctrcfgName, metav1.DeleteOptions{})
if !apierrors.IsNotFound(cleanupErr) {
o.Expect(cleanupErr).NotTo(o.HaveOccurred(),
"cleanup failed: could not delete ContainerRuntimeConfig %s", ctrcfgName)
}
cleanupErr = oc.AsAdmin().WithoutNamespace().Run("label").Args(
"machineconfigpool", "worker", "custom-crio-").Execute()
o.Expect(cleanupErr).NotTo(o.HaveOccurred(),
"cleanup failed: could not remove custom-crio label from worker MachineConfigPool")
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "worker", cleanupSpec)
})
initialSpec := imagepolicy.GetMCPCurrentSpecConfigName(oc, "worker")
g.By("Create ContainerRuntimeConfig with pidsLimit 2048")
ctrcfg := &mcfgv1.ContainerRuntimeConfig{
ObjectMeta: metav1.ObjectMeta{Name: ctrcfgName},
Spec: mcfgv1.ContainerRuntimeConfigSpec{
MachineConfigPoolSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"custom-crio": "high-pid-limit"},
},
ContainerRuntimeConfig: &mcfgv1.ContainerRuntimeConfiguration{
PidsLimit: ptr.To[int64](2048),
},
},
}
_, err = oc.MachineConfigurationClient().MachineconfigurationV1().ContainerRuntimeConfigs().Create(
context.Background(), ctrcfg, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create ContainerRuntimeConfig")
g.By("Wait for worker MCP rollout to complete")
imagepolicy.WaitForMCPConfigSpecChangeAndUpdated(oc, "worker", initialSpec)
e2e.Logf("All worker nodes rolled out successfully")
g.By("Verify pidsLimit and conmon in crio config on node " + workerNode)
var crioConfig string
o.Eventually(func() error {
var execErr error
crioConfig, execErr = nodeutils.ExecOnNodeWithChroot(oc, workerNode,
"/bin/bash", "-c", "crio config 2>/dev/null")
return execErr
}, 30*time.Second, 5*time.Second).Should(o.Succeed(), "failed to get crio config on node %s", workerNode)
o.Expect(crioConfig).To(o.ContainSubstring("pids_limit = 2048"), "pidsLimit should be 2048")
o.Expect(crioConfig).To(o.ContainSubstring(`conmon = ""`),
"conmon should be empty")
o.Expect(crioConfig).NotTo(o.ContainSubstring(`log_level = "debug"`),
"manual crio.conf edit should be overwritten by MCO")
})
})