|
| 1 | +/* |
| 2 | +Copyright 2022 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package eligibility |
| 18 | + |
| 19 | +import ( |
| 20 | + "reflect" |
| 21 | + "time" |
| 22 | + |
| 23 | + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" |
| 24 | + "k8s.io/autoscaler/cluster-autoscaler/context" |
| 25 | + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/actuation" |
| 26 | + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable" |
| 27 | + "k8s.io/autoscaler/cluster-autoscaler/simulator" |
| 28 | + "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" |
| 29 | + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" |
| 30 | + "k8s.io/autoscaler/cluster-autoscaler/utils/klogx" |
| 31 | + |
| 32 | + apiv1 "k8s.io/api/core/v1" |
| 33 | + klog "k8s.io/klog/v2" |
| 34 | + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" |
| 35 | +) |
| 36 | + |
| 37 | +const ( |
| 38 | + // ScaleDownDisabledKey is the name of annotation marking node as not eligible for scale down. |
| 39 | + ScaleDownDisabledKey = "cluster-autoscaler.kubernetes.io/scale-down-disabled" |
| 40 | +) |
| 41 | + |
| 42 | +// Checker is responsible for deciding which nodes pass the criteria for scale down. |
| 43 | +type Checker struct { |
| 44 | + thresholdGetter utilizationThresholdGetter |
| 45 | +} |
| 46 | + |
| 47 | +type utilizationThresholdGetter interface { |
| 48 | + // GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup. |
| 49 | + GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) |
| 50 | + // GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup. |
| 51 | + GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) |
| 52 | +} |
| 53 | + |
| 54 | +// NewChecker creates a new Checker object. |
| 55 | +func NewChecker(thresholdGetter utilizationThresholdGetter) *Checker { |
| 56 | + return &Checker{ |
| 57 | + thresholdGetter: thresholdGetter, |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +// FilterOutUnremovable accepts a list of nodes that are candidates for |
| 62 | +// scale down and filters out nodes that cannot be removed, along with node |
| 63 | +// utilization info. |
| 64 | +// TODO(x13n): Node utilization could actually be calculated independently for |
| 65 | +// all nodes and just used here. Next refactor... |
| 66 | +func (c *Checker) FilterOutUnremovable(context *context.AutoscalingContext, scaleDownCandidates []*apiv1.Node, timestamp time.Time, unremovableNodes *unremovable.Nodes) ([]string, map[string]utilization.Info) { |
| 67 | + unremovableNodes.Update(context.ClusterSnapshot.NodeInfos(), timestamp) |
| 68 | + |
| 69 | + skipped := 0 |
| 70 | + utilizationMap := make(map[string]utilization.Info) |
| 71 | + currentlyUnneededNodeNames := make([]string, 0, len(scaleDownCandidates)) |
| 72 | + utilLogsQuota := klogx.NewLoggingQuota(20) |
| 73 | + |
| 74 | + for _, node := range scaleDownCandidates { |
| 75 | + nodeInfo, err := context.ClusterSnapshot.NodeInfos().Get(node.Name) |
| 76 | + if err != nil { |
| 77 | + klog.Errorf("Can't retrieve scale-down candidate %s from snapshot, err: %v", node.Name, err) |
| 78 | + unremovableNodes.AddReason(node, simulator.UnexpectedError) |
| 79 | + continue |
| 80 | + } |
| 81 | + |
| 82 | + // Skip nodes that were recently checked. |
| 83 | + if unremovableNodes.IsRecent(node.Name) { |
| 84 | + unremovableNodes.AddReason(node, simulator.RecentlyUnremovable) |
| 85 | + skipped++ |
| 86 | + continue |
| 87 | + } |
| 88 | + |
| 89 | + reason, utilInfo := c.unremovableReasonAndNodeUtilization(context, timestamp, nodeInfo, utilLogsQuota) |
| 90 | + if utilInfo != nil { |
| 91 | + utilizationMap[node.Name] = *utilInfo |
| 92 | + } |
| 93 | + if reason != simulator.NoReason { |
| 94 | + unremovableNodes.AddReason(node, reason) |
| 95 | + continue |
| 96 | + } |
| 97 | + |
| 98 | + currentlyUnneededNodeNames = append(currentlyUnneededNodeNames, node.Name) |
| 99 | + } |
| 100 | + |
| 101 | + klogx.V(4).Over(utilLogsQuota).Infof("Skipped logging utilization for %d other nodes", -utilLogsQuota.Left()) |
| 102 | + if skipped > 0 { |
| 103 | + klog.V(1).Infof("Scale-down calculation: ignoring %v nodes unremovable in the last %v", skipped, context.AutoscalingOptions.UnremovableNodeRecheckTimeout) |
| 104 | + } |
| 105 | + return currentlyUnneededNodeNames, utilizationMap |
| 106 | +} |
| 107 | + |
| 108 | +func (c *Checker) unremovableReasonAndNodeUtilization(context *context.AutoscalingContext, timestamp time.Time, nodeInfo *schedulerframework.NodeInfo, utilLogsQuota *klogx.Quota) (simulator.UnremovableReason, *utilization.Info) { |
| 109 | + node := nodeInfo.Node() |
| 110 | + |
| 111 | + // Skip nodes marked to be deleted, if they were marked recently. |
| 112 | + // Old-time marked nodes are again eligible for deletion - something went wrong with them |
| 113 | + // and they have not been deleted. |
| 114 | + if actuation.IsNodeBeingDeleted(node, timestamp) { |
| 115 | + klog.V(1).Infof("Skipping %s from delete consideration - the node is currently being deleted", node.Name) |
| 116 | + return simulator.CurrentlyBeingDeleted, nil |
| 117 | + } |
| 118 | + |
| 119 | + // Skip nodes marked with no scale down annotation |
| 120 | + if HasNoScaleDownAnnotation(node) { |
| 121 | + klog.V(1).Infof("Skipping %s from delete consideration - the node is marked as no scale down", node.Name) |
| 122 | + return simulator.ScaleDownDisabledAnnotation, nil |
| 123 | + } |
| 124 | + |
| 125 | + utilInfo, err := utilization.Calculate(nodeInfo, context.IgnoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, context.CloudProvider.GPULabel(), timestamp) |
| 126 | + if err != nil { |
| 127 | + klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err) |
| 128 | + } |
| 129 | + |
| 130 | + nodeGroup, err := context.CloudProvider.NodeGroupForNode(node) |
| 131 | + if err != nil { |
| 132 | + klog.Warning("Node group not found for node %v: %v", node.Name, err) |
| 133 | + return simulator.UnexpectedError, nil |
| 134 | + } |
| 135 | + if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() { |
| 136 | + // We should never get here as non-autoscaled nodes should not be included in scaleDownCandidates list |
| 137 | + // (and the default PreFilteringScaleDownNodeProcessor would indeed filter them out). |
| 138 | + klog.Warningf("Skipped %s from delete consideration - the node is not autoscaled", node.Name) |
| 139 | + return simulator.NotAutoscaled, nil |
| 140 | + } |
| 141 | + |
| 142 | + underutilized, err := c.isNodeBelowUtilizationThreshold(context, node, nodeGroup, utilInfo) |
| 143 | + if err != nil { |
| 144 | + klog.Warningf("Failed to check utilization thresholds for %s: %v", node.Name, err) |
| 145 | + return simulator.UnexpectedError, nil |
| 146 | + } |
| 147 | + if !underutilized { |
| 148 | + klog.V(4).Infof("Node %s is not suitable for removal - %s utilization too big (%f)", node.Name, utilInfo.ResourceName, utilInfo.Utilization) |
| 149 | + return simulator.NotUnderutilized, &utilInfo |
| 150 | + } |
| 151 | + |
| 152 | + klogx.V(4).UpTo(utilLogsQuota).Infof("Node %s - %s utilization %f", node.Name, utilInfo.ResourceName, utilInfo.Utilization) |
| 153 | + |
| 154 | + return simulator.NoReason, &utilInfo |
| 155 | +} |
| 156 | + |
| 157 | +// isNodeBelowUtilizationThreshold determines if a given node utilization is below threshold. |
| 158 | +func (c *Checker) isNodeBelowUtilizationThreshold(context *context.AutoscalingContext, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup, utilInfo utilization.Info) (bool, error) { |
| 159 | + var threshold float64 |
| 160 | + var err error |
| 161 | + if gpu.NodeHasGpu(context.CloudProvider.GPULabel(), node) { |
| 162 | + threshold, err = c.thresholdGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup) |
| 163 | + if err != nil { |
| 164 | + return false, err |
| 165 | + } |
| 166 | + } else { |
| 167 | + threshold, err = c.thresholdGetter.GetScaleDownUtilizationThreshold(context, nodeGroup) |
| 168 | + if err != nil { |
| 169 | + return false, err |
| 170 | + } |
| 171 | + } |
| 172 | + if utilInfo.Utilization >= threshold { |
| 173 | + return false, nil |
| 174 | + } |
| 175 | + return true, nil |
| 176 | +} |
| 177 | + |
| 178 | +// HasNoScaleDownAnnotation checks whether the node has an annotation blocking it from being scaled down. |
| 179 | +func HasNoScaleDownAnnotation(node *apiv1.Node) bool { |
| 180 | + return node.Annotations[ScaleDownDisabledKey] == "true" |
| 181 | +} |
0 commit comments