Skip to content

Commit 57305d5

Browse files
committed
UPSTREAM: <carry>: OCPBUGS-52169: Workload partitioning of static init containers
The pod modification routine that prepares containers for Workload Partitioning quits early when it encounters a container with no resources specified. This causes a logical leak of resource capacity on the node, because the Pod is left untouched and still reports its full resource requests to the scheduler. It is however not using them, because the logic that moves the container to the management partitions works just fine. The end result is lowered node capacity for scheduling. Signed-off-by: Martin Sivak <[email protected]>
1 parent b94367c commit 57305d5

File tree

2 files changed

+238
-18
lines changed

2 files changed

+238
-18
lines changed

pkg/kubelet/managed/managed.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,36 @@ func GenerateResourceName(workloadName string) v1.ResourceName {
117117
func updateContainers(workloadName string, pod *v1.Pod) error {
118118
updateContainer := func(container *v1.Container) error {
119119
if container.Resources.Requests == nil {
120-
return fmt.Errorf("managed container %v does not have Resource.Requests", container.Name)
120+
// Nothing to modify, but that is OK, it will not
121+
// change the QoS class of the modified Pod
122+
return nil
121123
}
122-
if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok {
124+
125+
_, cpuOk := container.Resources.Requests[v1.ResourceCPU]
126+
_, memoryOk := container.Resources.Requests[v1.ResourceMemory]
127+
128+
// It is possible memory is configured using limits only and that implies
129+
// requests with the same value, check for that in case memory requests
130+
// are not present by themselves.
131+
if !memoryOk && container.Resources.Limits != nil {
132+
_, memoryOk = container.Resources.Limits[v1.ResourceMemory]
133+
}
134+
135+
// When both cpu and memory requests are missing, there is nothing
136+
// to do
137+
if !cpuOk && !memoryOk {
138+
return nil
139+
}
140+
141+
// Both memory and cpu have to be set to make sure stripping them
142+
// will not change the QoS class of the Pod
143+
if !cpuOk {
123144
return fmt.Errorf("managed container %v does not have cpu requests", container.Name)
124145
}
125-
if _, ok := container.Resources.Requests[v1.ResourceMemory]; !ok {
146+
if !memoryOk {
126147
return fmt.Errorf("managed container %v does not have memory requests", container.Name)
127148
}
149+
128150
if container.Resources.Limits == nil {
129151
container.Resources.Limits = v1.ResourceList{}
130152
}

pkg/kubelet/managed/managed_test.go

Lines changed: 213 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,6 @@ func TestModifyStaticPodForPinnedManagementErrorStates(t *testing.T) {
1919
pod *v1.Pod
2020
expectedError error
2121
}{
22-
{
23-
pod: createPod(workloadAnnotations, nil,
24-
&v1.Container{
25-
Name: "nginx",
26-
Image: "test/image",
27-
Resources: v1.ResourceRequirements{
28-
Requests: nil,
29-
},
30-
}),
31-
expectedError: fmt.Errorf("managed container nginx does not have Resource.Requests"),
32-
},
3322
{
3423
pod: createPod(workloadAnnotations, nil,
3524
&v1.Container{
@@ -129,6 +118,7 @@ func TestStaticPodManaged(t *testing.T) {
129118
pod *v1.Pod
130119
expectedAnnotations map[string]string
131120
isGuaranteed bool
121+
isBestEffort bool
132122
}{
133123
{
134124
pod: &v1.Pod{
@@ -168,6 +158,47 @@ func TestStaticPodManaged(t *testing.T) {
168158
"resources.workload.openshift.io/nginx": `{"cpushares":102}`,
169159
},
170160
},
161+
{
162+
pod: &v1.Pod{
163+
TypeMeta: metav1.TypeMeta{
164+
Kind: "Pod",
165+
APIVersion: "",
166+
},
167+
ObjectMeta: metav1.ObjectMeta{
168+
Name: "test",
169+
UID: "12345",
170+
Namespace: "mynamespace",
171+
Annotations: map[string]string{
172+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
173+
},
174+
},
175+
Spec: v1.PodSpec{
176+
Containers: []v1.Container{
177+
{
178+
Name: "nginx",
179+
Image: "test/image",
180+
Resources: v1.ResourceRequirements{
181+
Requests: v1.ResourceList{
182+
v1.ResourceName(v1.ResourceMemory): resource.MustParse("100m"),
183+
v1.ResourceName(v1.ResourceCPU): resource.MustParse("100m"),
184+
},
185+
Limits: v1.ResourceList{
186+
v1.ResourceName(v1.ResourceMemory): resource.MustParse("100m"),
187+
},
188+
},
189+
},
190+
},
191+
SecurityContext: &v1.PodSecurityContext{},
192+
},
193+
Status: v1.PodStatus{
194+
Phase: v1.PodPending,
195+
},
196+
},
197+
expectedAnnotations: map[string]string{
198+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
199+
"resources.workload.openshift.io/nginx": `{"cpushares":102}`,
200+
},
201+
},
171202
{
172203
pod: &v1.Pod{
173204
TypeMeta: metav1.TypeMeta{
@@ -270,6 +301,164 @@ func TestStaticPodManaged(t *testing.T) {
270301
"resources.workload.openshift.io/c1": `{"cpushares":20,"cpulimit":100}`,
271302
},
272303
},
304+
{
305+
pod: &v1.Pod{
306+
TypeMeta: metav1.TypeMeta{
307+
Kind: "Pod",
308+
APIVersion: "",
309+
},
310+
ObjectMeta: metav1.ObjectMeta{
311+
Name: "test",
312+
UID: "12345",
313+
Namespace: "mynamespace",
314+
Annotations: map[string]string{
315+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
316+
},
317+
},
318+
Spec: v1.PodSpec{
319+
Containers: []v1.Container{
320+
{
321+
Name: "c1",
322+
Image: "test/nginx",
323+
Resources: v1.ResourceRequirements{},
324+
},
325+
},
326+
SecurityContext: &v1.PodSecurityContext{},
327+
},
328+
Status: v1.PodStatus{
329+
Phase: v1.PodPending,
330+
},
331+
},
332+
expectedAnnotations: map[string]string{
333+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
334+
},
335+
isBestEffort: true,
336+
},
337+
{
338+
pod: &v1.Pod{
339+
TypeMeta: metav1.TypeMeta{
340+
Kind: "Pod",
341+
APIVersion: "",
342+
},
343+
ObjectMeta: metav1.ObjectMeta{
344+
Name: "test",
345+
UID: "12345",
346+
Namespace: "mynamespace",
347+
Annotations: map[string]string{
348+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
349+
},
350+
},
351+
Spec: v1.PodSpec{
352+
Containers: []v1.Container{
353+
{
354+
Name: "c1",
355+
Image: "test/nginx",
356+
Resources: v1.ResourceRequirements{
357+
Requests: v1.ResourceList{
358+
v1.ResourceName("dummy"): resource.MustParse("20m"),
359+
},
360+
},
361+
},
362+
},
363+
SecurityContext: &v1.PodSecurityContext{},
364+
},
365+
Status: v1.PodStatus{
366+
Phase: v1.PodPending,
367+
},
368+
},
369+
expectedAnnotations: map[string]string{
370+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
371+
},
372+
isBestEffort: true,
373+
},
374+
{
375+
pod: &v1.Pod{
376+
TypeMeta: metav1.TypeMeta{
377+
Kind: "Pod",
378+
APIVersion: "",
379+
},
380+
ObjectMeta: metav1.ObjectMeta{
381+
Name: "test",
382+
UID: "12345",
383+
Namespace: "mynamespace",
384+
Annotations: map[string]string{
385+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
386+
},
387+
},
388+
Spec: v1.PodSpec{
389+
Containers: []v1.Container{
390+
{
391+
Name: "c1",
392+
Image: "test/nginx",
393+
Resources: v1.ResourceRequirements{},
394+
},
395+
},
396+
InitContainers: []v1.Container{
397+
{
398+
Name: "ic1",
399+
Image: "test/nginx",
400+
Resources: v1.ResourceRequirements{},
401+
},
402+
},
403+
SecurityContext: &v1.PodSecurityContext{},
404+
},
405+
Status: v1.PodStatus{
406+
Phase: v1.PodPending,
407+
},
408+
},
409+
expectedAnnotations: map[string]string{
410+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
411+
},
412+
isBestEffort: true,
413+
},
414+
{
415+
pod: &v1.Pod{
416+
TypeMeta: metav1.TypeMeta{
417+
Kind: "Pod",
418+
APIVersion: "",
419+
},
420+
ObjectMeta: metav1.ObjectMeta{
421+
Name: "test",
422+
UID: "12345",
423+
Namespace: "mynamespace",
424+
Annotations: map[string]string{
425+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
426+
},
427+
},
428+
Spec: v1.PodSpec{
429+
Containers: []v1.Container{
430+
{
431+
Name: "c1",
432+
Image: "test/nginx",
433+
Resources: v1.ResourceRequirements{
434+
Requests: v1.ResourceList{
435+
v1.ResourceName("dummy"): resource.MustParse("20m"),
436+
},
437+
},
438+
},
439+
},
440+
InitContainers: []v1.Container{
441+
{
442+
Name: "ic1",
443+
Image: "test/nginx",
444+
Resources: v1.ResourceRequirements{
445+
Requests: v1.ResourceList{
446+
v1.ResourceName("dummy"): resource.MustParse("20m"),
447+
},
448+
},
449+
},
450+
},
451+
SecurityContext: &v1.PodSecurityContext{},
452+
},
453+
Status: v1.PodStatus{
454+
Phase: v1.PodPending,
455+
},
456+
},
457+
expectedAnnotations: map[string]string{
458+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
459+
},
460+
isBestEffort: true,
461+
},
273462
{
274463
pod: &v1.Pod{
275464
TypeMeta: metav1.TypeMeta{
@@ -481,15 +670,24 @@ func TestStaticPodManaged(t *testing.T) {
481670
if container.Resources.Requests.Cpu().String() != "0" && !tc.isGuaranteed {
482671
t.Errorf("cpu requests should be 0 got %v", container.Resources.Requests.Cpu().String())
483672
}
484-
if container.Resources.Requests.Memory().String() == "0" && !tc.isGuaranteed {
485-
t.Errorf("memory requests were %v but should be %v", container.Resources.Requests.Memory().String(), container.Resources.Requests.Memory().String())
673+
if container.Resources.Requests.Memory().String() == "0" && !tc.isGuaranteed && !tc.isBestEffort {
674+
t.Errorf("memory requests were %v but should be %v in container %v", container.Resources.Requests.Memory().String(), container.Resources.Requests.Memory().String(), container.Name)
486675
}
487-
if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed {
676+
if container.Resources.Requests.Memory().String() != "0" && !tc.isGuaranteed && tc.isBestEffort {
677+
t.Errorf("memory requests should be 0 got %v", container.Resources.Requests.Memory().String())
678+
}
679+
if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed && !tc.isBestEffort {
488680
t.Errorf("managed capacity label missing from pod %v and container %v", tc.pod.Name, container.Name)
489681
}
490-
if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed {
682+
if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; !exists && !tc.isGuaranteed && !tc.isBestEffort {
491683
t.Errorf("managed capacity label missing from pod %v and container %v limits", tc.pod.Name, container.Name)
492684
}
685+
if _, exists := container.Resources.Requests[GenerateResourceName(workloadName)]; exists && tc.isBestEffort {
686+
t.Errorf("managed capacity label present in best-effort pod %v and container %v requests", tc.pod.Name, container.Name)
687+
}
688+
if _, exists := container.Resources.Limits[GenerateResourceName(workloadName)]; exists && tc.isBestEffort {
689+
t.Errorf("managed capacity label present in best-effort pod %v and container %v limits", tc.pod.Name, container.Name)
690+
}
493691
}
494692
}
495693
}

0 commit comments

Comments
 (0)