Merge pull request #7688 from macsko/enforce_provisioning_request_pro…

…cessing_even_if_all_pods_are_young Enforce provisioning requests processing even if all pods are new
kubernetes · Jan 23, 2025 · f6064ee · f6064ee
2 parents 0b3c289 + d7c325a
commit f6064ee
Show file tree

Hide file tree

Showing 9 changed files with 191 additions and 3 deletions.
diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go
@@ -520,13 +520,15 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 		return false, nil
 	}
 
+	forceScaleUp := a.processors.ScaleUpEnforcer.ShouldForceScaleUp(unschedulablePodsToHelp)
+
 	if len(unschedulablePodsToHelp) == 0 {
 		scaleUpStatus.Result = status.ScaleUpNotNeeded
 		klog.V(1).Info("No unschedulable pods")
-	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
+	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal && !forceScaleUp {
 		scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable
 		klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
-	} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
+	} else if len(a.BypassedSchedulers) == 0 && !forceScaleUp && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
 		// The assumption here is that these pods have been created very recently and probably there
 		// is more pods to come. In theory we could check the newest pod time but then if pod were created
 		// slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time.

diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
@@ -563,6 +563,8 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot
 		opts.LoopStartNotifier = loopstart.NewObserversList([]loopstart.Observer{provreqProcesor})
 
 		podListProcessor.AddProcessor(provreqProcesor)
+
+		opts.Processors.ScaleUpEnforcer = provreq.NewProvisioningRequestScaleUpEnforcer()
 	}
 
 	if *proactiveScaleupEnabled {

diff --git a/cluster-autoscaler/processors/pods/scaleup_enforcer.go b/cluster-autoscaler/processors/pods/scaleup_enforcer.go
@@ -0,0 +1,38 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pods
+
+import apiv1 "k8s.io/api/core/v1"
+
+// ScaleUpEnforcer can force scale up even if all pods are new or MaxNodesTotal was achieved.
+type ScaleUpEnforcer interface {
+	ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool
+}
+
+// NoOpScaleUpEnforcer returns false by default in case of ProvisioningRequests disabled.
+type NoOpScaleUpEnforcer struct {
+}
+
+// NewDefaultScaleUpEnforcer creates an instance of ScaleUpEnforcer.
+func NewDefaultScaleUpEnforcer() ScaleUpEnforcer {
+	return &NoOpScaleUpEnforcer{}
+}
+
+// ShouldForceScaleUp returns false by default.
+func (p *NoOpScaleUpEnforcer) ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool {
+	return false
+}
diff --git a/cluster-autoscaler/processors/pods/scaleup_enforcer_test.go b/cluster-autoscaler/processors/pods/scaleup_enforcer_test.go
@@ -0,0 +1,34 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pods
+
+import (
+	"testing"
+
+	apiv1 "k8s.io/api/core/v1"
+	testutils "k8s.io/autoscaler/cluster-autoscaler/utils/test"
+)
+
+func TestDefaultScaleUpEnforcer(t *testing.T) {
+	p1 := testutils.BuildTestPod("p1", 40, 0)
+	unschedulablePods := []*apiv1.Pod{p1}
+	scaleUpEnforcer := NewDefaultScaleUpEnforcer()
+	forceScaleUp := scaleUpEnforcer.ShouldForceScaleUp(unschedulablePods)
+	if forceScaleUp {
+		t.Errorf("Error: scaleUpEnforcer should not force scale up by default")
+	}
+}
diff --git a/cluster-autoscaler/processors/processors.go b/cluster-autoscaler/processors/processors.go
@@ -74,6 +74,8 @@ type AutoscalingProcessors struct {
 	ScaleStateNotifier *nodegroupchange.NodeGroupChangeObserversList
 	// AsyncNodeGroupChecker checks if node group is upcoming or not
 	AsyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker
+	// ScaleUpEnforcer can force scale up even if all pods are new or MaxNodesTotal was achieved.
+	ScaleUpEnforcer pods.ScaleUpEnforcer
 }
 
 // DefaultProcessors returns default set of processors.
@@ -100,6 +102,7 @@ func DefaultProcessors(options config.AutoscalingOptions) *AutoscalingProcessors
 		TemplateNodeInfoProvider:    nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false),
 		ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(),
 		ScaleStateNotifier:          nodegroupchange.NewNodeGroupChangeObserversList(),
+		ScaleUpEnforcer:             pods.NewDefaultScaleUpEnforcer(),
 	}
 }
 

diff --git a/cluster-autoscaler/processors/provreq/pods_filter.go b/cluster-autoscaler/processors/provreq/pods_filter.go
@@ -22,7 +22,7 @@ import (
 
 	apiv1 "k8s.io/api/core/v1"
 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1"
+	v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/pods"
 	provreqpods "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods"

diff --git a/cluster-autoscaler/processors/provreq/scaleup_enforcer.go b/cluster-autoscaler/processors/provreq/scaleup_enforcer.go
@@ -0,0 +1,41 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package provreq
+
+import (
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/processors/pods"
+)
+
+// ProvisioningRequestScaleUpEnforcer forces scale up if there is any unschedulable pod that belongs to ProvisioningRequest.
+type ProvisioningRequestScaleUpEnforcer struct {
+}
+
+// NewProvisioningRequestScaleUpEnforcer creates a ProvisioningRequest scale up enforcer.
+func NewProvisioningRequestScaleUpEnforcer() pods.ScaleUpEnforcer {
+	return &ProvisioningRequestScaleUpEnforcer{}
+}
+
+// ShouldForceScaleUp forces scale up if there is any unschedulable pod that belongs to ProvisioningRequest.
+func (p *ProvisioningRequestScaleUpEnforcer) ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool {
+	for _, pod := range unschedulablePods {
+		if _, ok := provisioningRequestName(pod); ok {
+			return true
+		}
+	}
+	return false
+}
diff --git a/cluster-autoscaler/processors/provreq/scaleup_enforcer_test.go b/cluster-autoscaler/processors/provreq/scaleup_enforcer_test.go
@@ -0,0 +1,66 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package provreq
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	apiv1 "k8s.io/api/core/v1"
+	corev1 "k8s.io/api/core/v1"
+	v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods"
+	testutils "k8s.io/autoscaler/cluster-autoscaler/utils/test"
+)
+
+func TestProvisioningRequestScaleUpEnforcer(t *testing.T) {
+	prPod1 := testutils.BuildTestPod("pr-pod-1", 500, 10)
+	prPod1.Annotations[v1.ProvisioningRequestPodAnnotationKey] = "pr-class"
+
+	prPod2 := testutils.BuildTestPod("pr-pod-2", 500, 10)
+	prPod2.Annotations[pods.DeprecatedProvisioningRequestPodAnnotationKey] = "pr-class-2"
+
+	pod1 := testutils.BuildTestPod("pod-1", 500, 10)
+	pod2 := testutils.BuildTestPod("pod-2", 500, 10)
+
+	testCases := map[string]struct {
+		unschedulablePods []*apiv1.Pod
+		want              bool
+	}{
+		"Any pod with ProvisioningRequest annotation key forces scale up": {
+			unschedulablePods: []*corev1.Pod{prPod1, pod1},
+			want:              true,
+		},
+		"Any pod with ProvisioningRequest deprecated annotation key forces scale up": {
+			unschedulablePods: []*corev1.Pod{prPod2, pod1},
+			want:              true,
+		},
+		"Pod without ProvisioningRequest annotation key don't force scale up": {
+			unschedulablePods: []*corev1.Pod{pod1, pod2},
+			want:              false,
+		},
+		"No pods don't force scale up": {
+			unschedulablePods: []*corev1.Pod{},
+			want:              false,
+		},
+	}
+	for _, test := range testCases {
+		scaleUpEnforcer := NewProvisioningRequestScaleUpEnforcer()
+		got := scaleUpEnforcer.ShouldForceScaleUp(test.unschedulablePods)
+		assert.Equal(t, got, test.want)
+	}
+}
diff --git a/cluster-autoscaler/processors/test/common.go b/cluster-autoscaler/processors/test/common.go
@@ -31,6 +31,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/nodes"
+	"k8s.io/autoscaler/cluster-autoscaler/processors/pods"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates"
 	"k8s.io/autoscaler/cluster-autoscaler/processors/status"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling"
@@ -56,5 +57,6 @@ func NewTestProcessors(context *context.AutoscalingContext) *processors.Autoscal
 		ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(),
 		ScaleStateNotifier:          nodegroupchange.NewNodeGroupChangeObserversList(),
 		AsyncNodeGroupStateChecker:  asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(),
+		ScaleUpEnforcer:             pods.NewDefaultScaleUpEnforcer(),
 	}
 }