From 719acb3f1186bce5498eb2afb773b690a89cca71 Mon Sep 17 00:00:00 2001 From: Jiawen Liu Date: Thu, 31 Oct 2024 00:05:14 +0800 Subject: [PATCH] feat: scheduler and binder support interPodAffinity and podTopologySpread plugins --- pkg/binder/cache/cache.go | 15 + .../commonstores/node_store/node_store.go | 10 +- pkg/binder/cache/fake/fake_cache.go | 4 + pkg/binder/cache/interface.go | 2 + pkg/binder/factory.go | 7 +- pkg/binder/framework/handle/handle.go | 2 + .../interpodaffinity/interpodaffinity.go | 85 + .../interpodaffinity/interpodaffinity_test.go | 1746 +++++++++++++ .../podtopologyspread/podtopologyspread.go | 136 + .../podtopologyspread_test.go | 613 +++++ pkg/binder/framework/registry.go | 4 + pkg/binder/godel_binder_frameworkhandle.go | 4 + pkg/binder/testing/framework_helpers.go | 4 + pkg/framework/api/nodeinfo_hashslice.go | 46 + pkg/framework/api/nodeinfo_podinfo_test.go | 2 +- pkg/plugins/helper/node_affinity.go | 85 + pkg/plugins/helper/node_affinity_test.go | 716 +++++ pkg/plugins/helper/node_info.go | 66 + pkg/plugins/interpodaffinity/util.go | 294 +++ pkg/plugins/interpodaffinity/util_test.go | 240 ++ pkg/plugins/podtopologyspread/util.go | 359 +++ pkg/scheduler/cache/cache_test.go | 24 +- .../commonstores/node_store/node_store.go | 4 +- pkg/scheduler/cache/snapshot.go | 67 +- pkg/scheduler/factory.go | 6 + .../plugins/interpodaffinity/filtering.go | 130 + .../interpodaffinity/filtering_test.go | 2315 +++++++++++++++++ .../plugins/interpodaffinity/plugin.go | 77 + .../plugins/interpodaffinity/scoring.go | 272 ++ .../plugins/interpodaffinity/scoring_test.go | 916 +++++++ .../plugins/podtopologyspread/common.go | 61 + .../plugins/podtopologyspread/filtering.go | 136 + .../podtopologyspread/filtering_test.go | 1794 +++++++++++++ .../plugins/podtopologyspread/plugin.go | 94 + .../plugins/podtopologyspread/scoring.go | 321 +++ .../plugins/podtopologyspread/scoring_test.go | 988 +++++++ pkg/scheduler/framework/registry.go | 4 + pkg/scheduler/util/topologies.go | 84 + .../framework-helper/wrappers.go | 47 + pkg/util/parallelize/parallelism.go | 17 +- pkg/util/pod/podstate.go | 3 +- test/e2e/scheduling/hard_constraints.go | 989 ++++++- test/e2e/scheduling/soft_constraints.go | 371 ++- 43 files changed, 12987 insertions(+), 173 deletions(-) create mode 100644 pkg/binder/framework/plugins/interpodaffinity/interpodaffinity.go create mode 100644 pkg/binder/framework/plugins/interpodaffinity/interpodaffinity_test.go create mode 100644 pkg/binder/framework/plugins/podtopologyspread/podtopologyspread.go create mode 100644 pkg/binder/framework/plugins/podtopologyspread/podtopologyspread_test.go create mode 100644 pkg/plugins/helper/node_affinity.go create mode 100644 pkg/plugins/helper/node_affinity_test.go create mode 100644 pkg/plugins/helper/node_info.go create mode 100644 pkg/plugins/interpodaffinity/util.go create mode 100644 pkg/plugins/interpodaffinity/util_test.go create mode 100644 pkg/plugins/podtopologyspread/util.go create mode 100644 pkg/scheduler/framework/plugins/interpodaffinity/filtering.go create mode 100644 pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go create mode 100644 pkg/scheduler/framework/plugins/interpodaffinity/plugin.go create mode 100644 pkg/scheduler/framework/plugins/interpodaffinity/scoring.go create mode 100644 pkg/scheduler/framework/plugins/interpodaffinity/scoring_test.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/common.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/filtering.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/plugin.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/scoring.go create mode 100644 pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go create mode 100644 pkg/scheduler/util/topologies.go diff --git a/pkg/binder/cache/cache.go b/pkg/binder/cache/cache.go index da175b61..f45f37c1 100644 --- a/pkg/binder/cache/cache.go +++ b/pkg/binder/cache/cache.go @@ -53,18 +53,27 @@ type binderCache struct { handler commoncache.CacheHandler mu *sync.RWMutex + + nodeSlices *framework.NodeSlices } func newBinderCache(handler commoncache.CacheHandler) *binderCache { + nodeSlices := framework.NewNodeSlices() + bc := &binderCache{ CommonStoresSwitch: commonstore.MakeStoreSwitch(handler, commonstore.Cache, commonstores.GlobalRegistries, orderedStoreNames), handler: handler, mu: handler.Mutex(), + + nodeSlices: nodeSlices, } // NodeStore and PodStore are mandatory, so we don't care if they are nil. nodeStore, podStore := bc.CommonStoresSwitch.Find(nodestore.Name), bc.CommonStoresSwitch.Find(podstore.Name) + nodeStore.(*nodestore.NodeStore).AfterAdd = func(n framework.NodeInfo) { nodeSlices.Update(n, true) } + nodeStore.(*nodestore.NodeStore).AfterDelete = func(n framework.NodeInfo) { nodeSlices.Update(n, false) } + handler.SetNodeHandler(nodeStore.(*nodestore.NodeStore).GetNodeInfo) handler.SetPodHandler(podStore.(*podstore.PodStore).GetPodState) @@ -217,3 +226,9 @@ func (cache *binderCache) FindStore(storeName commonstore.StoreName) commonstore defer cache.mu.RUnlock() return cache.CommonStoresSwitch.Find(storeName) } + +func (cache *binderCache) List() []framework.NodeInfo { + cache.mu.RLock() + defer cache.mu.RUnlock() + return append(cache.nodeSlices.InPartitionNodeSlice.Nodes(), cache.nodeSlices.OutOfPartitionNodeSlice.Nodes()...) +} diff --git a/pkg/binder/cache/commonstores/node_store/node_store.go b/pkg/binder/cache/commonstores/node_store/node_store.go index f1dc7f3f..2f613ee5 100644 --- a/pkg/binder/cache/commonstores/node_store/node_store.go +++ b/pkg/binder/cache/commonstores/node_store/node_store.go @@ -61,7 +61,9 @@ type NodeStore struct { // `Deleted` holds all the nodes: // 1. that have been deleted but still have residual pods. // 2. that its pod comes before its own, so we can't use it to schedule. - Deleted sets.String + Deleted sets.String + AfterAdd func(framework.NodeInfo) // Triggered by a call to the NodeStore.Add function, used to maintain additional information about the node. + AfterDelete func(framework.NodeInfo) // Triggered by a call to the NodeStore.Delete function, used to maintain additional information about the node. // A map from image name to its imageState. // ATTENTION: Like `Deleted` field, it will only be modified and used in the Cache. @@ -426,11 +428,17 @@ func (s *NodeStore) Set(nodeName string, nodeInfo framework.NodeInfo) { // Add will Store the node and trigger the AfterAdd. func (s *NodeStore) Add(nodeName string, nodeInfo framework.NodeInfo) { s.Store.Set(nodeName, nodeInfo) + if s.AfterAdd != nil { + s.AfterAdd(nodeInfo) + } } // Delete will delete the node and trigger the AfterDelete. func (s *NodeStore) Delete(nodeName string, nodeInfo framework.NodeInfo) { s.Store.Delete(nodeName) + if s.AfterDelete != nil { + s.AfterDelete(nodeInfo) + } } // AllNodesClone return all nodes's deepcopy and organize them in map. diff --git a/pkg/binder/cache/fake/fake_cache.go b/pkg/binder/cache/fake/fake_cache.go index 2db22721..8930efe6 100644 --- a/pkg/binder/cache/fake/fake_cache.go +++ b/pkg/binder/cache/fake/fake_cache.go @@ -248,3 +248,7 @@ func (c *Cache) GetAvailablePlaceholderPod( } return nil, fmt.Errorf("empty store") } + +func (c *Cache) List() []framework.NodeInfo { + return nil +} diff --git a/pkg/binder/cache/interface.go b/pkg/binder/cache/interface.go index 8f1fb64d..6681a2a8 100644 --- a/pkg/binder/cache/interface.go +++ b/pkg/binder/cache/interface.go @@ -103,4 +103,6 @@ type BinderCache interface { // for resource reservation GetAvailablePlaceholderPod(pod *v1.Pod) (*v1.Pod, error) + + List() []framework.NodeInfo } diff --git a/pkg/binder/factory.go b/pkg/binder/factory.go index 3d252391..e4749d77 100644 --- a/pkg/binder/factory.go +++ b/pkg/binder/factory.go @@ -31,10 +31,12 @@ import ( "github.com/kubewharf/godel-scheduler/pkg/binder/apis" godelcache "github.com/kubewharf/godel-scheduler/pkg/binder/cache" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/defaultbinder" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nodeports" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/noderesources" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nodevolumelimits" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nonnativeresource" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/volumebinding" "github.com/kubewharf/godel-scheduler/pkg/binder/queue" "github.com/kubewharf/godel-scheduler/pkg/features" @@ -67,7 +69,10 @@ func DefaultUnitQueueSortFunc() framework.UnitLessFunc { func NewBasePlugins(victimsCheckingPlugins []*framework.VictimCheckingPluginCollectionSpec) *apis.BinderPluginCollection { // TODO add some default plugins later basicPlugins := apis.BinderPluginCollection{ - CheckTopology: []string{}, + CheckTopology: []string{ + interpodaffinity.Name, + podtopologyspread.Name, + }, CheckConflicts: []string{ noderesources.ConflictCheckName, nodevolumelimits.CSIName, diff --git a/pkg/binder/framework/handle/handle.go b/pkg/binder/framework/handle/handle.go index b871734b..3e167290 100644 --- a/pkg/binder/framework/handle/handle.go +++ b/pkg/binder/framework/handle/handle.go @@ -45,4 +45,6 @@ type BinderFrameworkHandle interface { FindStore(storeName commonstore.StoreName) commonstore.Store GetNodeInfo(string) framework.NodeInfo + + ListNodeInfos() []framework.NodeInfo } diff --git a/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity.go b/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity.go new file mode 100644 index 00000000..5823a009 --- /dev/null +++ b/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity.go @@ -0,0 +1,85 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "context" + + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/interpodaffinity" + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + Name = "InterPodAffinityCheck" + ErrorReasonWhenFilterNodeWithSameTopology = "failed to get nodes with same topology labels" +) + +type InterPodAffinity struct { + frameworkHandle handle.BinderFrameworkHandle +} + +var _ framework.CheckTopologyPlugin = &InterPodAffinity{} + +func (pl *InterPodAffinity) Name() string { + return Name +} + +func (pl *InterPodAffinity) CheckTopology(_ context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + // Get the nodes with the same topology labels as the node to be scheduled + podLauncher, status := podlauncher.NodeFits(nil, pod, nodeInfo) + if status != nil { + return status + } + + nodeInfos := pl.frameworkHandle.ListNodeInfos() + + existingPodAntiAffinityMap := utils.GetTPMapMatchingExistingAntiAffinity(pod, nodeInfos) + + podInfo := framework.NewPodInfo(pod) + incomingPodAffinityMap, incomingPodAntiAffinityMap := utils.GetTPMapMatchingIncomingAffinityAntiAffinity(podInfo, nodeInfos) + + state := &utils.PreFilterState{ + TopologyToMatchedExistingAntiAffinityTerms: existingPodAntiAffinityMap, + TopologyToMatchedAffinityTerms: incomingPodAffinityMap, + TopologyToMatchedAntiAffinityTerms: incomingPodAntiAffinityMap, + PodInfo: podInfo, + } + + if !utils.SatisfyPodAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.UnschedulableAndUnresolvable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonAffinityRulesNotMatch) + } + + if !utils.SatisfyPodAntiAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.Unschedulable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonAntiAffinityRulesNotMatch) + } + + if !utils.SatisfyExistingPodsAntiAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.Unschedulable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonExistingAntiAffinityRulesNotMatch) + } + + return nil +} + +func New(_ runtime.Object, handle handle.BinderFrameworkHandle) (framework.Plugin, error) { + return &InterPodAffinity{ + frameworkHandle: handle, + }, nil +} diff --git a/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity_test.go b/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity_test.go new file mode 100644 index 00000000..1b9fb378 --- /dev/null +++ b/pkg/binder/framework/plugins/interpodaffinity/interpodaffinity_test.go @@ -0,0 +1,1746 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package interpodaffinity + +import ( + "context" + "reflect" + "testing" + "time" + + "github.com/kubewharf/godel-scheduler/pkg/binder/cache" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" + pt "github.com/kubewharf/godel-scheduler/pkg/binder/testing" + commoncache "github.com/kubewharf/godel-scheduler/pkg/common/cache" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/interpodaffinity" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" +) + +var ( + defaultNamespace = "" +) + +func initFrameworkHandle(client *fake.Clientset, nodes []*v1.Node, existingPods []*v1.Pod) (handle.BinderFrameworkHandle, error) { + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(client, 0) + informerFactory.Core().V1().Nodes().Informer() + cacheHandler := commoncache.MakeCacheHandlerWrapper(). + Period(10 * time.Second).PodAssumedTTL(30 * time.Second).StopCh(make(chan struct{})). + ComponentName("godel-binder").Obj() + cache := cache.New(cacheHandler) + + for _, node := range nodes { + _, err := client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + cache.AddNode(node) + } + + for _, pod := range existingPods { + pod.UID = types.UID(pod.Name) + cache.AddPod(pod) + } + + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + return pt.NewBinderFrameworkHandle(client, nil, informerFactory, nil, cache) +} + +func initFrameworkHandleWithSingleNode(client *fake.Clientset, nodeInfo *framework.NodeInfo) (handle.BinderFrameworkHandle, error) { + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(client, 0) + informerFactory.Core().V1().Nodes().Informer() + cacheHandler := commoncache.MakeCacheHandlerWrapper(). + Period(10 * time.Second).PodAssumedTTL(30 * time.Second).StopCh(make(chan struct{})). + ComponentName("godel-binder").Obj() + cache := cache.New(cacheHandler) + + node := (*nodeInfo).GetNode() + _, err := client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + cache.AddNode(node) + + for _, podInfo := range (*nodeInfo).GetPods() { + pod := podInfo.Pod + pod.Spec.NodeName = node.Name + pod.UID = types.UID(pod.Name) + if err := cache.AddPod(pod); err != nil { + return nil, err + } + } + + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + return pt.NewBinderFrameworkHandle(client, nil, informerFactory, nil, cache) +} + +func createPodWithAffinityTerms(namespace, nodeName string, labels map[string]string, affinity, antiAffinity []v1.PodAffinityTerm) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-" + string(uuid.NewUUID()), + Labels: labels, + Namespace: namespace, + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: affinity, + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: antiAffinity, + }, + }, + }, + } +} + +func newPodWithLabels(labels map[string]string) *v1.Pod { + return newPodWithLabelsAndNamespace(labels, defaultNamespace) +} + +func newPodWithLabelsAndNamespace(labels map[string]string, namespace string) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-" + string(uuid.NewUUID()), + Namespace: namespace, + Labels: labels, + }, + } +} + +func TestRequiredAffinitySingleNode(t *testing.T) { + podLabel := map[string]string{"service": "securityscan"} + labels1 := map[string]string{ + "region": "r1", + "zone": "z11", + } + podLabel2 := map[string]string{"security": "S1"} + node1 := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labels1}} + tests := []struct { + pod *v1.Pod + nodeInfo framework.NodeInfo + name string + wantStatus *framework.Status + }{ + { + pod: new(v1.Pod), + nodeInfo: framework.NewNodeInfo(), + name: "A pod that has no required pod affinity scheduling rules can schedule onto a node with no existing pods", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "satisfies with requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using In operator that matches the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan3", "value3"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "satisfies the pod with requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using not in operator in labelSelector that matches the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + Namespaces: []string{"DiffNameSpace"}, + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabelsAndNamespace(podLabel, "ns")), + name: "Does not satisfy the PodAffinity with labelSelector because of diff Namespace", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "Doesn't satisfy the PodAffinity because of unmatching labelSelector with the existing pod", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"WrongValue"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "satisfies the PodAffinity with different label Operators in multiple RequiredDuringSchedulingIgnoredDuringExecution ", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan2"}, + }, { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"WrongValue"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "The labelSelector requirements(items of matchExpressions) are ANDed, the pod cannot schedule onto the node because one of the matchExpression item don't match.", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "satisfies the PodAffinity and PodAntiAffinity with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + })), + name: "satisfies the PodAffinity and PodAntiAffinity and PodAntiAffinity symmetry with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "satisfies the PodAffinity but doesn't satisfy the PodAntiAffinity with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + })), + name: "satisfies the PodAffinity and PodAntiAffinity but doesn't satisfy PodAntiAffinity symmetry with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + nodeInfo: framework.NewNodeInfo( + newPodWithLabels(podLabel)), + name: "pod matches its own Label in PodAffinity and that matches the existing pod Labels", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabel, + }, + }, + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + })), + name: "verify that PodAntiAffinity from existing pod is respected when pod has no AntiAffinity constraints. doesn't satisfy PodAntiAffinity symmetry with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabel, + }, + }, + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + })), + name: "verify that PodAntiAffinity from existing pod is respected when pod has no AntiAffinity constraints. satisfy PodAntiAffinity symmetry with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + })), + name: "satisfies the PodAntiAffinity with existing pod but doesn't satisfy PodAntiAffinity symmetry with incoming pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + })), + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check a1: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + })), + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check a2: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"abc": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", map[string]string{"def": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + })), + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check b1: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"def": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + nodeInfo: framework.NewNodeInfo( + createPodWithAffinityTerms(defaultNamespace, "machine1", map[string]string{"abc": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + })), + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check b2: incoming pod and existing pod partially match each other on AffinityTerms", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.nodeInfo.SetNode(&node1) + + client := fake.NewSimpleClientset() + frameworkHandle, err := initFrameworkHandleWithSingleNode(client, &tt.nodeInfo) + if err != nil { + t.Fatal(err) + } + + p, err := New(nil, frameworkHandle) + if err != nil { + t.Fatal(err) + } + + gotStatus := p.(framework.CheckTopologyPlugin).CheckTopology(context.Background(), nil, tt.pod, tt.nodeInfo) + if !reflect.DeepEqual(gotStatus, tt.wantStatus) { + t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatus) + } + }) + } +} + +func TestRequiredAffinityMultipleNodes(t *testing.T) { + podLabelA := map[string]string{ + "foo": "bar", + } + labelRgChina := map[string]string{ + "region": "China", + } + labelRgChinaAzAz1 := map[string]string{ + "region": "China", + "az": "az1", + } + labelRgIndia := map[string]string{ + "region": "India", + } + + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + wantStatuses []*framework.Status + name string + }{ + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + nil, + nil, + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "A pod can be scheduled onto all the nodes that have the same topology key & label value with one of them has an existing pod that matches the affinity rules", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"foo": "bar", "service": "securityscan"}, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "The affinity rule is to schedule all of the pods of this collection to the same zone. The first pod of the collection " + + "should not be blocked from being scheduled onto any node, even there's no existing pod that matches the rule anywhere.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"foo": "bar", "service": "securityscan"}, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zoneLabel": "az1", "hostname": "h1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zoneLabel": "az2", "hostname": "h2"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "The first pod of the collection can only be scheduled on nodes labelled with the requested topology keys", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "This test ensures that anti-affinity matches a pod when any term of the anti-affinity rule matches a pod.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC", + }, + { + pod: createPodWithAffinityTerms("NS1", "", map[string]string{"foo": "123"}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"foo": "bar"}, + Namespace: "NS1", + }, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + createPodWithAffinityTerms("NS2", "nodeC", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"123"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test existing pod's anti-affinity: if an existing pod has a term with invalid topologyKey, labelSelector of the term is firstly checked, and then topologyKey of the term is also checked", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test incoming pod's anti-affinity: even if labelSelector matches, we still check if topologyKey matches", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + name: "Test existing pod's anti-affinity: incoming pod wouldn't considered as a fit as it violates each existingPod's terms on all nodes", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeB", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's anti-affinity: incoming pod wouldn't considered as a fit as it at least violates one anti-affinity rule of existingPod", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test existing pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when one term has invalid topologyKey", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "podA", Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test incoming pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when one term has invalid topologyKey", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + name: "Test existing pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when all terms have valid topologyKey", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when all terms have valid topologyKey", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "labelA", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + createPodWithAffinityTerms(defaultNamespace, "nodeB", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "labelB", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: map[string]string{"region": "r1", "zone": "z3", "hostname": "nodeC"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test existing pod's anti-affinity: existingPod on nodeA and nodeB has at least one anti-affinity term matches incoming pod, so incoming pod can only be scheduled to nodeC", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pod1", Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test incoming pod's affinity: firstly check if all affinityTerms match, and then check if all topologyKeys match", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pod1", Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pod2", Labels: map[string]string{"bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeB", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's affinity: firstly check if all affinityTerms match, and then check if all topologyKeys match, and the match logic should be satisfied on the same pod", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := fake.NewSimpleClientset() + frameworkHandle, err := initFrameworkHandle(client, tt.nodes, tt.pods) + if err != nil { + t.Fatal(err) + } + + p, err := New(nil, frameworkHandle) + if err != nil { + t.Fatal(err) + } + + for i, node := range tt.nodes { + nodeInfo := frameworkHandle.GetNodeInfo(node.Name) + gotStatus := p.(framework.CheckTopologyPlugin).CheckTopology(context.Background(), nil, tt.pod, nodeInfo) + if !reflect.DeepEqual(gotStatus, tt.wantStatuses[i]) { + t.Errorf("status does not match: %v, want: %v", gotStatus, tt.wantStatuses[i]) + } + } + }) + } +} diff --git a/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread.go b/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread.go new file mode 100644 index 00000000..03f11667 --- /dev/null +++ b/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread.go @@ -0,0 +1,136 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "fmt" + + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/plugins/helper" + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/validation" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + Name = "PodTopologySpreadCheck" //Name +) + +type TopologySpreadCondition struct { + Constraints []utils.TopologySpreadConstraint + TpKeyToCriticalPaths map[string]*utils.CriticalPaths + TpPairToMatchNum map[utils.TopologyPair]*int32 +} + +type PodTopologySpreadCheck struct { + args config.PodTopologySpreadArgs + frameworkHandle handle.BinderFrameworkHandle +} + +var _ framework.CheckTopologyPlugin = &PodTopologySpreadCheck{} + +func (pl *PodTopologySpreadCheck) Name() string { + return Name +} + +func (pl *PodTopologySpreadCheck) CheckTopology(_ context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + podLauncher, status := podlauncher.NodeFits(cycleState, pod, nodeInfo) + if status != nil { + return status + } + + constraints, err := pl.getConstraints(pod) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + if len(constraints) == 0 { + return nil + } + + nodeInfos := pl.frameworkHandle.ListNodeInfos() + + state := utils.GetPreFilterState(pod, nodeInfos, constraints) + + return utils.IsSatisfyPodTopologySpreadConstraints(&state, pod, nodeInfo, podLauncher) +} + +func New(plArgs runtime.Object, handle handle.BinderFrameworkHandle) (framework.Plugin, error) { + args, err := utils.GetArgs(plArgs) + if err != nil { + return nil, err + } + if err := validation.ValidatePodTopologySpreadArgs(&args); err != nil { + return nil, err + } + pl := &PodTopologySpreadCheck{ + args: args, + frameworkHandle: handle, + } + + if len(pl.args.DefaultConstraints) != 0 { + if handle.SharedInformerFactory() == nil { + return nil, fmt.Errorf("SharedInformerFactory is nil") + } + } + + return pl, nil +} + +// defaultConstraints builds the constraints for a pod using +// .DefaultConstraints and the selectors from the services, replication +// controllers, replica sets and stateful sets that match the pod. +func (pl *PodTopologySpreadCheck) defaultConstraints(p *v1.Pod, action v1.UnsatisfiableConstraintAction) ([]utils.TopologySpreadConstraint, error) { + constraints, err := utils.FilterTopologySpreadConstraints(pl.args.DefaultConstraints, action) + if err != nil || len(constraints) == 0 { + return nil, err + } + selector := helper.DefaultSelector(p, pl.frameworkHandle.SharedInformerFactory().Core().V1().Services().Lister(), + pl.frameworkHandle.SharedInformerFactory().Core().V1().ReplicationControllers().Lister(), + pl.frameworkHandle.SharedInformerFactory().Apps().V1().ReplicaSets().Lister(), pl.frameworkHandle.SharedInformerFactory().Apps().V1().StatefulSets().Lister()) + if selector.Empty() { + return nil, nil + } + for i := range constraints { + constraints[i].Selector = selector + } + return constraints, nil +} + +func (pl *PodTopologySpreadCheck) getConstraints(pod *v1.Pod) ([]utils.TopologySpreadConstraint, error) { + var err error + constraints := []utils.TopologySpreadConstraint{} + if len(pod.Spec.TopologySpreadConstraints) > 0 { + // We have feature gating in APIServer to strip the spec + // so don't need to re-check feature gate, just check length of Constraints. + constraints, err = utils.FilterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.DoNotSchedule) + if err != nil { + return nil, fmt.Errorf("obtaining pod's hard topology spread constraints: %v", err) + } + } else { + constraints, err = pl.defaultConstraints(pod, v1.DoNotSchedule) + if err != nil { + return nil, fmt.Errorf("setting default hard topology spread constraints: %v", err) + } + } + + return constraints, nil +} diff --git a/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread_test.go b/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread_test.go new file mode 100644 index 00000000..dc1ad7e6 --- /dev/null +++ b/pkg/binder/framework/plugins/podtopologyspread/podtopologyspread_test.go @@ -0,0 +1,613 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "reflect" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/kubewharf/godel-scheduler/pkg/binder/cache" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" + pt "github.com/kubewharf/godel-scheduler/pkg/binder/testing" + commoncache "github.com/kubewharf/godel-scheduler/pkg/common/cache" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + testing_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" +) + +var cmpOpts = []cmp.Option{ + cmp.Comparer(func(s1 labels.Selector, s2 labels.Selector) bool { + return reflect.DeepEqual(s1, s2) + }), + cmp.Comparer(func(p1, p2 utils.CriticalPaths) bool { + p1.Sort() + p2.Sort() + return p1[0] == p2[0] && p1[1] == p2[1] + }), +} + +func mustConvertLabelSelectorAsSelector(t *testing.T, ls *metav1.LabelSelector) labels.Selector { + t.Helper() + s, err := metav1.LabelSelectorAsSelector(ls) + if err != nil { + t.Fatal(err) + } + return s +} + +func initInformerFactoryInformers(informerFactory *informers.SharedInformerFactory) { + (*informerFactory).Core().V1().Nodes().Informer() + (*informerFactory).Core().V1().Pods().Informer() + (*informerFactory).Core().V1().Services().Informer() + (*informerFactory).Apps().V1().ReplicaSets().Informer() + (*informerFactory).Apps().V1().StatefulSets().Informer() +} + +func initFrameworkHandle(client *fake.Clientset, nodes []*v1.Node, existingPods []*v1.Pod) (handle.BinderFrameworkHandle, error) { + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(client, 0) + initInformerFactoryInformers(&informerFactory) + cacheHandler := commoncache.MakeCacheHandlerWrapper(). + Period(10 * time.Second).PodAssumedTTL(30 * time.Second).StopCh(make(chan struct{})). + ComponentName("godel-binder").Obj() + cache := cache.New(cacheHandler) + + for _, node := range nodes { + _, err := client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + cache.AddNode(node) + } + + for _, pod := range existingPods { + pod.UID = types.UID(pod.Name) + cache.AddPod(pod) + } + + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + return pt.NewBinderFrameworkHandle(client, nil, informerFactory, nil, cache) +} + +func TestCheckConflictsForSingleConstraint(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + wantStatusCode map[string]framework.Code + }{ + { + name: "no existing pods", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + name: "no existing pods, incoming pod doesn't match itself", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + name: "existing pods in a different namespace do not count", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Namespace("ns2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pods spread across zones as 3/3, all nodes fit", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + // TODO(Huang-Wei): maybe document this to remind users that typos on node labels + // can cause unexpected behavior + name: "pods spread across zones as 1/2 due to absence of label 'zone' on node-b", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zon", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.UnschedulableAndUnresolvable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pod cannot be scheduled as all nodes don't have label 'rack'", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "rack", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.UnschedulableAndUnresolvable, + "node-x": framework.UnschedulableAndUnresolvable, + }, + }, + { + name: "pods spread across nodes as 2/1/0/3, only node-x fits", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pods spread across nodes as 2/1/0/3, maxSkew is 2, node-b and node-x fit", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 2, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // not a desired case, but it can happen + // TODO(Huang-Wei): document this "pod-not-match-itself" case + // in this case, placement of the new pod doesn't change pod distribution of the cluster + // as the incoming pod doesn't have label "foo" + name: "pods spread across nodes as 2/1/0/3, but pod doesn't match itself", + pod: testing_helper.MakePod().Name("p").Label("bar", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // only node-a and node-y are considered, so pods spread as 2/~1~/~0~/3 + // ps: '~num~' is a markdown symbol to denote a crossline through 'num' + // but in this unit test, we don't run NodeAffinity Predicate, so node-b and node-x are + // still expected to be fits; + // the fact that node-a fits can prove the underlying logic works + name: "incoming pod has nodeAffinity, pods spread as 2/~1~/~0~/3, hence node-a fits", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + NodeAffinityIn("node", []string{"node-a", "node-y"}, testing_helper.NodeAffinityWithRequiredReq). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, // in real case, it's false + "node-x": framework.Success, // in real case, it's false + "node-y": framework.Unschedulable, + }, + }, + { + name: "terminating Pods should be excluded", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a").Node("node-a").Label("foo", "").Terminating().Obj(), + testing_helper.MakePod().Name("p-b").Node("node-b").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Unschedulable, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + frameworkHandle, err := initFrameworkHandle(fake.NewSimpleClientset(), tt.nodes, tt.existingPods) + if err != nil { + t.Fatal(err) + } + + pl, err := New(nil, frameworkHandle) + if err != nil { + t.Fatal(err) + } + + for _, node := range tt.nodes { + nodeInfo := frameworkHandle.GetNodeInfo(node.Name) + gotStatus := pl.(framework.CheckTopologyPlugin).CheckTopology(context.Background(), nil, tt.pod, nodeInfo) + if len(tt.wantStatusCode) != 0 && gotStatus.Code() != tt.wantStatusCode[node.Name] { + t.Errorf("[%s]: expected error code %v got %v", node.Name, tt.wantStatusCode[node.Name], gotStatus.Code()) + } + } + }) + } +} + +func TestCheckConflictsForMultipleConstraint(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + wantStatusCode map[string]framework.Code + }{ + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on any zone (hence any node) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-x + // intersection of (1) and (2) returns node-x + name: "two Constraints on zone and node, spreads = [3/3, 2/1/0/3]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone1 (node-a or node-b) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-x + // intersection of (1) and (2) returns no node + name: "two Constraints on zone and node, spreads = [3/4, 2/1/0/4]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone2 (node-x or node-y) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a, node-b or node-x + // intersection of (1) and (2) returns node-x + name: "Constraints hold different labelSelectors, spreads = [1/0, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone2 (node-x or node-y) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a or node-b + // intersection of (1) and (2) returns no node + name: "Constraints hold different labelSelectors, spreads = [1/0, 0/0/1/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone1 (node-a or node-b) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-b or node-x + // intersection of (1) and (2) returns node-b + name: "Constraints hold different labelSelectors, spreads = [2/3, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. pod doesn't match itself on "zone" constraint, so it can be put onto any zone + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a or node-b + // intersection of (1) and (2) returns node-a and node-b + name: "Constraints hold different labelSelectors but pod doesn't match itself on 'zone' constraint", + pod: testing_helper.MakePod().Name("p").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on any zone (hence any node) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-b (node-x doesn't have the required label) + // intersection of (1) and (2) returns node-b + name: "two Constraints on zone and node, absence of label 'node' on node-x, spreads = [1/1, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.UnschedulableAndUnresolvable, + "node-y": framework.Unschedulable, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + frameworkHandle, err := initFrameworkHandle(fake.NewSimpleClientset(), tt.nodes, tt.existingPods) + if err != nil { + t.Fatal(err) + } + + pl, err := New(nil, frameworkHandle) + if err != nil { + t.Fatal(err) + } + + for _, node := range tt.nodes { + nodeInfo := frameworkHandle.GetNodeInfo(node.Name) + gotStatus := pl.(framework.CheckTopologyPlugin).CheckTopology(context.Background(), nil, tt.pod, nodeInfo) + if len(tt.wantStatusCode) != 0 && gotStatus.Code() != tt.wantStatusCode[node.Name] { + t.Errorf("[%s]: expected error code %v got %v", node.Name, tt.wantStatusCode[node.Name], gotStatus.Code()) + } + } + }) + } +} diff --git a/pkg/binder/framework/registry.go b/pkg/binder/framework/registry.go index 051a1884..e994283c 100644 --- a/pkg/binder/framework/registry.go +++ b/pkg/binder/framework/registry.go @@ -25,10 +25,12 @@ import ( "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/defaultbinder" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/defaultpreemption" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nodeports" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/noderesources" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nodevolumelimits" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/nonnativeresource" + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/binder/framework/plugins/volumebinding" framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" ) @@ -57,6 +59,8 @@ func NewInTreeRegistry() Registry { volumebinding.Name: volumebinding.New, nodeports.Name: nodeports.New, nonnativeresource.Name: nonnativeresource.New, + interpodaffinity.Name: interpodaffinity.New, + podtopologyspread.Name: podtopologyspread.New, } } diff --git a/pkg/binder/godel_binder_frameworkhandle.go b/pkg/binder/godel_binder_frameworkhandle.go index fe830e60..4f19f14c 100644 --- a/pkg/binder/godel_binder_frameworkhandle.go +++ b/pkg/binder/godel_binder_frameworkhandle.go @@ -135,3 +135,7 @@ func (h *frameworkHandleImpl) GetNodeInfo(nodename string) framework.NodeInfo { func (h *frameworkHandleImpl) FindStore(storeName commonstore.StoreName) commonstore.Store { return h.binderCache.FindStore(storeName) } + +func (h *frameworkHandleImpl) ListNodeInfos() []framework.NodeInfo { + return h.binderCache.List() +} diff --git a/pkg/binder/testing/framework_helpers.go b/pkg/binder/testing/framework_helpers.go index 6b2c77d8..0070c4f0 100644 --- a/pkg/binder/testing/framework_helpers.go +++ b/pkg/binder/testing/framework_helpers.go @@ -80,6 +80,10 @@ func (mfh *MockBinderFrameworkHandle) GetNodeInfo(nodeName string) framework.Nod return mfh.cache.GetNodeInfo(nodeName) } +func (mfh *MockBinderFrameworkHandle) ListNodeInfos() []framework.NodeInfo { + return mfh.cache.List() +} + func NewBinderFramework(pluginRegistry, preemptionPluginRegistry framework.PluginMap, basePlugins *apis.BinderPluginCollection) framework.BinderFramework { return binderruntime.New(pluginRegistry, preemptionPluginRegistry, basePlugins) } diff --git a/pkg/framework/api/nodeinfo_hashslice.go b/pkg/framework/api/nodeinfo_hashslice.go index 37755795..d7596edf 100644 --- a/pkg/framework/api/nodeinfo_hashslice.go +++ b/pkg/framework/api/nodeinfo_hashslice.go @@ -16,6 +16,8 @@ limitations under the License. package api +var GlobalNodeInfoPlaceHolder = NewNodeInfo() + // NodeHashSlice maintains a linear NodeInfo's slice. The time complexity of all methods is O(1). type NodeHashSlice interface { Add(NodeInfo) bool @@ -82,3 +84,47 @@ func (hs *NodeHashSliceImpl) Nodes() []NodeInfo { func (hs *NodeHashSliceImpl) Len() int { return hs.count } + +// NodeSlices is mainly used to maintain all nodeInfos in the cluster. +type NodeSlices struct { + InPartitionNodeSlice NodeHashSlice + OutOfPartitionNodeSlice NodeHashSlice + HavePodsWithAffinityNodeSlice NodeHashSlice + HavePodsWithRequiredAntiAffinityNodeSlice NodeHashSlice +} + +func NewNodeSlices() *NodeSlices { + return &NodeSlices{ + InPartitionNodeSlice: NewNodeHashSlice(), + OutOfPartitionNodeSlice: NewNodeHashSlice(), + HavePodsWithAffinityNodeSlice: NewNodeHashSlice(), + HavePodsWithRequiredAntiAffinityNodeSlice: NewNodeHashSlice(), + } +} + +func op(slice NodeHashSlice, n NodeInfo, isAdd bool) { + if isAdd { + _ = slice.Add(n) + } else { + _ = slice.Del(n) + } +} + +func (s *NodeSlices) Update(n NodeInfo, isAdd bool) { + // ATTENTION: We should ensure that the `globalNodeInfoPlaceHolder` will not be added to nodelice. + if n == GlobalNodeInfoPlaceHolder { + return + } + + if n.GetNodeInSchedulerPartition() || n.GetNMNodeInSchedulerPartition() { + op(s.InPartitionNodeSlice, n, isAdd) + } else { + op(s.OutOfPartitionNodeSlice, n, isAdd) + } + if len(n.GetPodsWithAffinity()) > 0 { + op(s.HavePodsWithAffinityNodeSlice, n, isAdd) + } + if len(n.GetPodsWithRequiredAntiAffinity()) > 0 { + op(s.HavePodsWithRequiredAntiAffinityNodeSlice, n, isAdd) + } +} diff --git a/pkg/framework/api/nodeinfo_podinfo_test.go b/pkg/framework/api/nodeinfo_podinfo_test.go index 5ceed3eb..7c48d03a 100644 --- a/pkg/framework/api/nodeinfo_podinfo_test.go +++ b/pkg/framework/api/nodeinfo_podinfo_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,7 +30,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" - "github.com/google/go-cmp/cmp" "github.com/kubewharf/godel-scheduler/pkg/features" podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" ) diff --git a/pkg/plugins/helper/node_affinity.go b/pkg/plugins/helper/node_affinity.go new file mode 100644 index 00000000..5a27e715 --- /dev/null +++ b/pkg/plugins/helper/node_affinity.go @@ -0,0 +1,85 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/util/helper" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" +) + +// PodMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to +// the requirements in both NodeAffinity and nodeSelector. +func PodMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, nodeInfo framework.NodeInfo, podLanucher podutil.PodLauncher) bool { + nodeLabels := nodeInfo.GetNodeLabels(podLanucher) + + // Check if node.Labels match pod.Spec.NodeSelector. + if len(pod.Spec.NodeSelector) > 0 { + selector := labels.SelectorFromSet(pod.Spec.NodeSelector) + if !selector.Matches(labels.Set(nodeLabels)) { + return false + } + } + + // 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes) + // 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes + // 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity + // 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes + // 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity + // 6. non-nil empty NodeSelectorRequirement is not allowed + nodeAffinityMatches := true + affinity := pod.Spec.Affinity + if affinity != nil && affinity.NodeAffinity != nil { + nodeAffinity := affinity.NodeAffinity + // if no required NodeAffinity requirements, will do no-op, means select all nodes. + // TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution. + if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + // if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution == nil && nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + return true + } + + // Match node selector for requiredDuringSchedulingRequiredDuringExecution. + // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. + // if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil { + // nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms + // klog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms) + // nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms) + // } + + // Match node selector for requiredDuringSchedulingIgnoredDuringExecution. + if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { + nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + switch podLanucher { + case podutil.Kubelet: + nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(nodeLabels, nodeSelectorTerms, nodeInfo.GetNode().Name) + case podutil.NodeManager: + nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(nodeLabels, nodeSelectorTerms, nodeInfo.GetNMNode().Name) + } + } + + } + return nodeAffinityMatches +} + +// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms, +// terms are ORed, and an empty list of terms will match nothing. +func nodeMatchesNodeSelectorTerms(nodeLabels map[string]string, nodeSelectorTerms []v1.NodeSelectorTerm, nodeName string) bool { + return helper.MatchNodeSelectorTerms(nodeSelectorTerms, nodeLabels, fields.Set{"metadata.name": nodeName}) +} diff --git a/pkg/plugins/helper/node_affinity_test.go b/pkg/plugins/helper/node_affinity_test.go new file mode 100644 index 00000000..823b1e53 --- /dev/null +++ b/pkg/plugins/helper/node_affinity_test.go @@ -0,0 +1,716 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "testing" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/util" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestPodMatchesNodeSelectorAndAffinityTerms(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + labels map[string]string + nodeName string + want bool + }{ + { + name: "no selector", + pod: &v1.Pod{}, + want: true, + }, + { + name: "missing labels", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + }, + }, + }, + want: false, + }, + { + name: "same labels", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "node labels are superset", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + "baz": "blah", + }, + want: true, + }, + { + name: "node labels are subset", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + "baz": "blah", + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + }, + { + name: "Pod with matchExpressions using In operator that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar", "value2"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "Pod with matchExpressions using Gt operator that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "kernel-version", + Operator: v1.NodeSelectorOpGt, + Values: []string{"0204"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + // We use two digit to denote major version and two digit for minor version. + "kernel-version": "0206", + }, + want: true, + }, + { + name: "Pod with matchExpressions using NotIn operator that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "mem-type", + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"DDR", "DDR2"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "mem-type": "DDR3", + }, + want: true, + }, + { + name: "Pod with matchExpressions using Exists operator that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "GPU", + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "GPU": "NVIDIA-GRID-K1", + }, + want: true, + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"value1", "value2"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + name: "Pod with affinity that don't match node's labels won't schedule onto the node", + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: nil, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + name: "Pod with a nil []NodeSelectorTerm in affinity, can't match the node's labels and won't schedule onto the node", + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{}, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + name: "Pod with an empty []NodeSelectorTerm in affinity, can't match the node's labels and won't schedule onto the node", + }, + { + name: "Pod with empty MatchExpressions is not a valid value will match no objects and won't schedule onto the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{}, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + }, + { + name: "Pod with no Affinity will schedule onto a node", + pod: &v1.Pod{}, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "Pod with Affinity but nil NodeSelector will schedule onto a node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: nil, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "Pod with multiple matchExpressions ANDed that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "GPU", + Operator: v1.NodeSelectorOpExists, + }, { + Key: "GPU", + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"AMD", "INTER"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "GPU": "NVIDIA-GRID-K1", + }, + want: true, + }, + { + name: "Pod with multiple matchExpressions ANDed that doesn't match the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "GPU", + Operator: v1.NodeSelectorOpExists, + }, { + Key: "GPU", + Operator: v1.NodeSelectorOpIn, + Values: []string{"AMD", "INTER"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "GPU": "NVIDIA-GRID-K1", + }, + want: false, + }, + { + name: "Pod with multiple NodeSelectorTerms ORed in affinity, matches the node's labels and will schedule onto the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar", "value2"}, + }, + }, + }, + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "diffkey", + Operator: v1.NodeSelectorOpIn, + Values: []string{"wrong", "value2"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "Pod with an Affinity and a PodSpec.NodeSelector(the old thing that we are deprecating) " + + "both are satisfied, will schedule onto the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: true, + }, + { + name: "Pod with an Affinity matches node's labels but the PodSpec.NodeSelector(the old thing that we are deprecating) " + + "is not satisfied, won't schedule onto the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + NodeSelector: map[string]string{ + "foo": "bar", + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "barrrrrr", + }, + want: false, + }, + { + name: "Pod with an invalid value in Affinity term won't be scheduled onto the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"invalid value: ___@#$%^"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + labels: map[string]string{ + "foo": "bar", + }, + want: false, + }, + { + name: "Pod with matchFields using In operator that matches the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_1", + want: true, + }, + { + name: "Pod with matchFields using In operator that does not match the existing node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + want: false, + }, + { + name: "Pod with two terms: matchFields does not match, but matchExpressions matches", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + }, + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + labels: map[string]string{"foo": "bar"}, + want: true, + }, + { + name: "Pod with one term: matchFields does not match, but matchExpressions matches", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + labels: map[string]string{"foo": "bar"}, + want: false, + }, + { + name: "Pod with one term: both matchFields and matchExpressions match", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_1", + labels: map[string]string{"foo": "bar"}, + want: true, + }, + { + name: "Pod with two terms: both matchFields and matchExpressions do not match", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: util.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_1"}, + }, + }, + }, + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: v1.NodeSelectorOpIn, + Values: []string{"not-match-to-bar"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + labels: map[string]string{"foo": "bar"}, + want: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + node := v1.Node{ObjectMeta: metav1.ObjectMeta{ + Name: test.nodeName, + Labels: test.labels, + }} + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(&node) + got := PodMatchesNodeSelectorAndAffinityTerms(test.pod, nodeInfo, podutil.Kubelet) + if test.want != got { + t.Errorf("expected: %v got %v", test.want, got) + } + }) + } +} diff --git a/pkg/plugins/helper/node_info.go b/pkg/plugins/helper/node_info.go new file mode 100644 index 00000000..6c08069b --- /dev/null +++ b/pkg/plugins/helper/node_info.go @@ -0,0 +1,66 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "fmt" + + "github.com/kubewharf/godel-scheduler/pkg/binder/framework/handle" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" +) + +const ( + AllNodeInfoKey = "AllNodeInfo" +) + +type AllNodeInfos struct { + NodeInfos []framework.NodeInfo +} + +func (f AllNodeInfos) Clone() framework.StateData { + clonedNodeInfos := make([]framework.NodeInfo, len(f.NodeInfos)) + + copy(clonedNodeInfos, f.NodeInfos) + + return AllNodeInfos{ + NodeInfos: clonedNodeInfos, + } +} + +func WriteAllNodeInfos(cycleState *framework.CycleState, frameworkHandle handle.BinderFrameworkHandle) error { + nodeInfos := frameworkHandle.ListNodeInfos() + + allNodeInfoData := &AllNodeInfos{ + NodeInfos: nodeInfos, + } + cycleState.Write(AllNodeInfoKey, allNodeInfoData) + + return nil +} + +func ReadAllNodeInfos(cycleState *framework.CycleState) ([]framework.NodeInfo, error) { + nodeInfoData, err := cycleState.Read(AllNodeInfoKey) + if err != nil { + return nil, err + } + + allNodeInfos, ok := nodeInfoData.(*AllNodeInfos) + if !ok { + return nil, fmt.Errorf("%+v convert to helper.AllNodeInfos error", nodeInfoData) + } + return allNodeInfos.NodeInfos, nil +} diff --git a/pkg/plugins/interpodaffinity/util.go b/pkg/plugins/interpodaffinity/util.go new file mode 100644 index 00000000..44529c68 --- /dev/null +++ b/pkg/plugins/interpodaffinity/util.go @@ -0,0 +1,294 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "context" + "fmt" + "sync/atomic" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + schedutil "github.com/kubewharf/godel-scheduler/pkg/scheduler/util" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" +) + +const ( + // ErrReasonExistingAntiAffinityRulesNotMatch is used for ExistingPodsAntiAffinityRulesNotMatch predicate error. + ErrReasonExistingAntiAffinityRulesNotMatch = "node(s) didn't satisfy existing pods anti-affinity rules" + // ErrReasonAffinityNotMatch is used for MatchInterPodAffinity predicate error. + ErrReasonAffinityNotMatch = "node(s) didn't match pod affinity/anti-affinity" + // ErrReasonAffinityRulesNotMatch is used for PodAffinityRulesNotMatch predicate error. + ErrReasonAffinityRulesNotMatch = "node(s) didn't match pod affinity rules" + // ErrReasonAntiAffinityRulesNotMatch is used for PodAntiAffinityRulesNotMatch predicate error. + ErrReasonAntiAffinityRulesNotMatch = "node(s) didn't match pod anti-affinity rules" +) + +// TODO(Huang-Wei): It might be possible to use "make(map[TopologyPair]*int64)" so that +// we can do atomic additions instead of using a global mutext, however we need to consider +// how to init each TopologyToMatchedTermCount. +type TopologyPair struct { + Key string + Value string +} + +type TopologyToMatchedTermCount map[TopologyPair]int64 + +func (m TopologyToMatchedTermCount) append(toAppend TopologyToMatchedTermCount) { + for pair := range toAppend { + m[pair] += toAppend[pair] + } +} + +func (m TopologyToMatchedTermCount) Clone() TopologyToMatchedTermCount { + copy := make(TopologyToMatchedTermCount, len(m)) + copy.append(m) + return copy +} + +// UpdateWithAffinityTerms updates the topologyToMatchedTermCount map with the specified value +// for each affinity term if "targetPod" matches ALL terms. +func (m TopologyToMatchedTermCount) UpdateWithAffinityTerms(targetPod *v1.Pod, nodeLbaels map[string]string, affinityTerms []framework.AffinityTerm, value int64) { + if PodMatchesAllAffinityTerms(targetPod, affinityTerms) { + for _, t := range affinityTerms { + if topologyValue, ok := nodeLbaels[t.TopologyKey]; ok { + pair := TopologyPair{Key: t.TopologyKey, Value: topologyValue} + m[pair] += value + // value could be a negative value, hence we delete the entry if + // the entry is down to zero. + if m[pair] == 0 { + delete(m, pair) + } + } + } + } +} + +// UpdateWithAntiAffinityTerms updates the topologyToMatchedTermCount map with the specified value +// for each anti-affinity term matched the target pod. +func (m TopologyToMatchedTermCount) UpdateWithAntiAffinityTerms(targetPod *v1.Pod, nodeLabels map[string]string, antiAffinityTerms []framework.AffinityTerm, value int64) { + // Check anti-affinity terms. + for _, a := range antiAffinityTerms { + if schedutil.PodMatchesTermsNamespaceAndSelector(targetPod, a.Namespaces, a.Selector) { + if topologyValue, ok := nodeLabels[a.TopologyKey]; ok { + pair := TopologyPair{Key: a.TopologyKey, Value: topologyValue} + m[pair] += value + // value could be a negative value, hence we delete the entry if + // the entry is down to zero. + if m[pair] == 0 { + delete(m, pair) + } + } + } + } +} + +// PreFilterState computed at PreFilter and used at Filter. +type PreFilterState struct { + // A map of topology pairs to the number of existing pods that has anti-affinity terms that match the "pod". + TopologyToMatchedExistingAntiAffinityTerms TopologyToMatchedTermCount + // A map of topology pairs to the number of existing pods that match the affinity terms of the "pod". + TopologyToMatchedAffinityTerms TopologyToMatchedTermCount + // A map of topology pairs to the number of existing pods that match the anti-affinity terms of the "pod". + TopologyToMatchedAntiAffinityTerms TopologyToMatchedTermCount + // PodInfo of the incoming pod. + PodInfo *framework.PodInfo +} + +// Clone the prefilter state. +func (s *PreFilterState) Clone() framework.StateData { + if s == nil { + return nil + } + + copy := PreFilterState{} + copy.TopologyToMatchedAffinityTerms = s.TopologyToMatchedAffinityTerms.Clone() + copy.TopologyToMatchedAntiAffinityTerms = s.TopologyToMatchedAntiAffinityTerms.Clone() + copy.TopologyToMatchedExistingAntiAffinityTerms = s.TopologyToMatchedExistingAntiAffinityTerms.Clone() + // No need to deep copy the podInfo because it shouldn't change. + copy.PodInfo = s.PodInfo + + return © +} + +// UpdateWithPod updates the preFilterState counters with the (anti)affinity matches for the given pod. +func (s *PreFilterState) UpdateWithPod(updatedPod *v1.Pod, nodeInfo framework.NodeInfo, multiplier int64) error { + if s == nil { + return nil + } + + podLauncher, err := podutil.GetPodLauncher(updatedPod) + if err != nil { + return fmt.Errorf("error getting pod launcher: %v", err) + } + + nodeLabels := nodeInfo.GetNodeLabels(podLauncher) + + // Update matching existing anti-affinity terms. + // TODO(#91058): AddPod/RemovePod should pass a *framework.PodInfo type instead of *v1.Pod. + updatedPodInfo := framework.NewPodInfo(updatedPod) + s.TopologyToMatchedExistingAntiAffinityTerms.UpdateWithAntiAffinityTerms(s.PodInfo.Pod, nodeLabels, updatedPodInfo.RequiredAntiAffinityTerms, multiplier) + + // Update matching incoming pod (anti)affinity terms. + s.TopologyToMatchedAffinityTerms.UpdateWithAffinityTerms(updatedPod, nodeLabels, s.PodInfo.RequiredAffinityTerms, multiplier) + s.TopologyToMatchedAntiAffinityTerms.UpdateWithAntiAffinityTerms(updatedPod, nodeLabels, s.PodInfo.RequiredAntiAffinityTerms, multiplier) + + return nil +} + +// PodMatchesAllAffinityTerms returns true IFF the given pod matches all the given terms. +func PodMatchesAllAffinityTerms(pod *v1.Pod, terms []framework.AffinityTerm) bool { + if len(terms) == 0 { + return false + } + for _, term := range terms { + if !schedutil.PodMatchesTermsNamespaceAndSelector(pod, term.Namespaces, term.Selector) { + return false + } + } + return true +} + +// GetTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node: +// (1) Whether it has PodAntiAffinity +// (2) Whether any AffinityTerm matches the incoming pod +func GetTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodes []framework.NodeInfo) TopologyToMatchedTermCount { + topoMaps := make([]TopologyToMatchedTermCount, len(nodes)) + index := int32(-1) + processNode := func(i int) { + nodeInfo := nodes[i] + topoMap := make(TopologyToMatchedTermCount) + for _, existingPod := range nodeInfo.GetPodsWithRequiredAntiAffinity() { + topoMap.UpdateWithAntiAffinityTerms(pod, nodeInfo.GetNodeLabels(existingPod.PodLauncher), existingPod.RequiredAntiAffinityTerms, 1) + } + if len(topoMap) != 0 { + topoMaps[atomic.AddInt32(&index, 1)] = topoMap + } + } + parallelize.Until(context.Background(), len(nodes), processNode) + + result := make(TopologyToMatchedTermCount) + for i := 0; i <= int(index); i++ { + result.append(topoMaps[i]) + } + + return result +} + +// GetTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod". +// It returns a topologyToMatchedTermCount that are checked later by the affinity +// predicate. With this topologyToMatchedTermCount available, the affinity predicate does not +// need to check all the pods in the cluster. +func GetTPMapMatchingIncomingAffinityAntiAffinity(podInfo *framework.PodInfo, allNodes []framework.NodeInfo) (TopologyToMatchedTermCount, TopologyToMatchedTermCount) { + affinityCounts := make(TopologyToMatchedTermCount) + antiAffinityCounts := make(TopologyToMatchedTermCount) + if len(podInfo.RequiredAffinityTerms) == 0 && len(podInfo.RequiredAntiAffinityTerms) == 0 { + return affinityCounts, antiAffinityCounts + } + + affinityCountsList := make([]TopologyToMatchedTermCount, len(allNodes)) + antiAffinityCountsList := make([]TopologyToMatchedTermCount, len(allNodes)) + index := int32(-1) + processNode := func(i int) { + nodeInfo := allNodes[i] + affinity := make(TopologyToMatchedTermCount) + antiAffinity := make(TopologyToMatchedTermCount) + for _, existingPod := range nodeInfo.GetPods() { + // Check affinity terms. + affinity.UpdateWithAffinityTerms(existingPod.Pod, nodeInfo.GetNodeLabels(existingPod.PodLauncher), podInfo.RequiredAffinityTerms, 1) + + // Check anti-affinity terms. + antiAffinity.UpdateWithAntiAffinityTerms(existingPod.Pod, nodeInfo.GetNodeLabels(existingPod.PodLauncher), podInfo.RequiredAntiAffinityTerms, 1) + } + + if len(affinity) > 0 || len(antiAffinity) > 0 { + k := atomic.AddInt32(&index, 1) + affinityCountsList[k] = affinity + antiAffinityCountsList[k] = antiAffinity + } + } + parallelize.Until(context.Background(), len(allNodes), processNode) + + for i := 0; i <= int(index); i++ { + affinityCounts.append(affinityCountsList[i]) + antiAffinityCounts.append(antiAffinityCountsList[i]) + } + + return affinityCounts, antiAffinityCounts +} + +// Checks if scheduling the pod onto this node would break any anti-affinity +// terms indicated by the existing pods. +func SatisfyExistingPodsAntiAffinity(state *PreFilterState, nodeInfo framework.NodeInfo, podLauncher podutil.PodLauncher) bool { + if len(state.TopologyToMatchedExistingAntiAffinityTerms) > 0 { + // Iterate over topology pairs to get any of the pods being affected by + // the scheduled pod anti-affinity terms + for topologyKey, topologyValue := range nodeInfo.GetNodeLabels(podLauncher) { + tp := TopologyPair{Key: topologyKey, Value: topologyValue} + if state.TopologyToMatchedExistingAntiAffinityTerms[tp] > 0 { + return false + } + } + } + return true +} + +// Checks if the node satisfies the incoming pod's affinity rules. +func SatisfyPodAffinity(state *PreFilterState, nodeInfo framework.NodeInfo, podLauncher podutil.PodLauncher) bool { + podsExist := true + for _, term := range state.PodInfo.RequiredAffinityTerms { + if topologyValue, ok := nodeInfo.GetNodeLabels(podLauncher)[term.TopologyKey]; ok { + tp := TopologyPair{Key: term.TopologyKey, Value: topologyValue} + if state.TopologyToMatchedAffinityTerms[tp] <= 0 { + podsExist = false + } + } else { + // All topology labels must exist on the node. + return false + } + } + + if !podsExist { + // This pod may be the first pod in a series that have affinity to themselves. In order + // to not leave such pods in pending state forever, we check that if no other pod + // in the cluster matches the namespace and selector of this pod, the pod matches + // its own terms, and the node has all the requested topologies, then we allow the pod + // to pass the affinity check. + podInfo := state.PodInfo + if len(state.TopologyToMatchedAffinityTerms) == 0 && PodMatchesAllAffinityTerms(podInfo.Pod, podInfo.RequiredAffinityTerms) { + return true + } + return false + } + return true +} + +// Checks if the node satisifies the incoming pod's anti-affinity rules. +func SatisfyPodAntiAffinity(state *PreFilterState, nodeInfo framework.NodeInfo, podLauncher podutil.PodLauncher) bool { + if len(state.TopologyToMatchedAntiAffinityTerms) > 0 { + for _, term := range state.PodInfo.RequiredAntiAffinityTerms { + if topologyValue, ok := nodeInfo.GetNodeLabels(podLauncher)[term.TopologyKey]; ok { + tp := TopologyPair{Key: term.TopologyKey, Value: topologyValue} + if state.TopologyToMatchedAntiAffinityTerms[tp] > 0 { + return false + } + } + } + } + return true +} diff --git a/pkg/plugins/interpodaffinity/util_test.go b/pkg/plugins/interpodaffinity/util_test.go new file mode 100644 index 00000000..e3a18aba --- /dev/null +++ b/pkg/plugins/interpodaffinity/util_test.go @@ -0,0 +1,240 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "reflect" + "testing" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + framework_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper/framework-helper" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// TestGetTPMapMatchingIncomingAffinityAntiAffinity tests against method getTPMapMatchingIncomingAffinityAntiAffinity +// on Anti Affinity cases +func TestGetTPMapMatchingIncomingAffinityAntiAffinity(t *testing.T) { + newPodAffinityTerms := func(keys ...string) []v1.PodAffinityTerm { + var terms []v1.PodAffinityTerm + for _, key := range keys { + terms = append(terms, v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: key, + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "hostname", + }) + } + return terms + } + newPod := func(labels ...string) *v1.Pod { + labelMap := make(map[string]string) + for _, l := range labels { + labelMap[l] = "" + } + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "normal", Labels: labelMap}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + } + } + normalPodA := newPod("aaa") + normalPodB := newPod("bbb") + normalPodAB := newPod("aaa", "bbb") + nodeA := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"hostname": "nodeA"}}} + + tests := []struct { + name string + existingPods []*v1.Pod + nodes []*v1.Node + pod *v1.Pod + wantAffinityPodsMap TopologyToMatchedTermCount + wantAntiAffinityPodsMap TopologyToMatchedTermCount + }{ + { + name: "nil test", + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"}, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: make(TopologyToMatchedTermCount), + }, + { + name: "incoming pod without affinity/anti-affinity causes a no-op", + existingPods: []*v1.Pod{normalPodA}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"}, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: make(TopologyToMatchedTermCount), + }, + { + name: "no pod has label that violates incoming pod's affinity and anti-affinity", + existingPods: []*v1.Pod{normalPodB}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "aaa-anti"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"), + }, + }, + }, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: make(TopologyToMatchedTermCount), + }, + { + name: "existing pod matches incoming pod's affinity and anti-affinity - single term case", + existingPods: []*v1.Pod{normalPodA}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"), + }, + }, + }, + }, + wantAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + wantAntiAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + }, + { + name: "existing pod matches incoming pod's affinity and anti-affinity - multiple terms case", + existingPods: []*v1.Pod{normalPodAB}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"), + }, + }, + }, + }, + wantAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 2, // 2 one for each term. + }, + wantAntiAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + }, + { + name: "existing pod not match incoming pod's affinity but matches anti-affinity", + existingPods: []*v1.Pod{normalPodA}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"), + }, + }, + }, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + }, + { + name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 1", + existingPods: []*v1.Pod{normalPodAB}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "anaffi-antiaffiti"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"), + }, + }, + }, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + }, + { + name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 2", + existingPods: []*v1.Pod{normalPodB}, + nodes: []*v1.Node{nodeA}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"), + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"), + }, + }, + }, + }, + wantAffinityPodsMap: make(TopologyToMatchedTermCount), + wantAntiAffinityPodsMap: TopologyToMatchedTermCount{ + {Key: "hostname", Value: "nodeA"}: 1, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + nodes := snapshot.NodeInfos().List() + gotAffinityPodsMap, gotAntiAffinityPodsMap := GetTPMapMatchingIncomingAffinityAntiAffinity(framework.NewPodInfo(tt.pod), nodes) + if !reflect.DeepEqual(gotAffinityPodsMap, tt.wantAffinityPodsMap) { + t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAffinityPodsMap = %#v, want %#v", gotAffinityPodsMap, tt.wantAffinityPodsMap) + } + if !reflect.DeepEqual(gotAntiAffinityPodsMap, tt.wantAntiAffinityPodsMap) { + t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAntiAffinityPodsMap = %#v, want %#v", gotAntiAffinityPodsMap, tt.wantAntiAffinityPodsMap) + } + }) + } +} diff --git a/pkg/plugins/podtopologyspread/util.go b/pkg/plugins/podtopologyspread/util.go new file mode 100644 index 00000000..adaae02d --- /dev/null +++ b/pkg/plugins/podtopologyspread/util.go @@ -0,0 +1,359 @@ +/* +Copyright 2024 The Godel Scheduler Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "fmt" + "math" + "sync/atomic" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/plugins/helper" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" +) + +const ( + // ErrReasonConstraintsNotMatch is used for PodTopologySpread filter error. + ErrReasonConstraintsNotMatch = "node(s) didn't match pod topology spread constraints" + // ErrReasonNodeLabelNotMatch is used when the node doesn't hold the required label. + ErrReasonNodeLabelNotMatch = ErrReasonConstraintsNotMatch + " (missing required label)" +) + +// PreFilterState computed at PreFilter and used at Filter. +// It combines TpKeyToCriticalPaths and TpPairToMatchNum to represent: +// (1) critical paths where the least pods are matched on each spread constraint. +// (2) number of pods matched on each spread constraint. +// A nil PreFilterState denotes it's not set at all (in PreFilter phase); +// An empty PreFilterState object denotes it's a legit state and is set in PreFilter phase. +// Fields are exported for comparison during testing. +type PreFilterState struct { + Constraints []TopologySpreadConstraint + // We record 2 critical paths instead of all critical paths here. + // criticalPaths[0].MatchNum always holds the minimum matching number. + // criticalPaths[1].MatchNum is always greater or equal to criticalPaths[0].MatchNum, but + // it's not guaranteed to be the 2nd minimum match number. + TpKeyToCriticalPaths map[string]*CriticalPaths + // TpPairToMatchNum is keyed with topologyPair, and valued with the number of matching pods. + TpPairToMatchNum map[TopologyPair]*int32 +} + +// Clone makes a copy of the given state. +func (s *PreFilterState) Clone() framework.StateData { + if s == nil { + return nil + } + copy := PreFilterState{ + // Constraints are shared because they don't change. + Constraints: s.Constraints, + TpKeyToCriticalPaths: make(map[string]*CriticalPaths, len(s.TpKeyToCriticalPaths)), + TpPairToMatchNum: make(map[TopologyPair]*int32, len(s.TpPairToMatchNum)), + } + for tpKey, paths := range s.TpKeyToCriticalPaths { + copy.TpKeyToCriticalPaths[tpKey] = &CriticalPaths{paths[0], paths[1]} + } + for tpPair, matchNum := range s.TpPairToMatchNum { + copyPair := TopologyPair{Key: tpPair.Key, Value: tpPair.Value} + copyCount := *matchNum + copy.TpPairToMatchNum[copyPair] = ©Count + } + return © +} + +func (s *PreFilterState) UpdateWithPod(updatedPod, preemptorPod *v1.Pod, nodeInfo framework.NodeInfo, delta int32) { + if s == nil || updatedPod.Namespace != preemptorPod.Namespace || nodeInfo == nil { + return + } + podLauncher, _ := podutil.GetPodLauncher(updatedPod) + nodeLabels := nodeInfo.GetNodeLabels(podLauncher) + + if !NodeLabelsMatchSpreadConstraints(nodeLabels, s.Constraints) { + return + } + + podLabelSet := labels.Set(updatedPod.Labels) + for _, constraint := range s.Constraints { + if !constraint.Selector.Matches(podLabelSet) { + continue + } + + k, v := constraint.TopologyKey, nodeLabels[constraint.TopologyKey] + pair := TopologyPair{Key: k, Value: v} + *s.TpPairToMatchNum[pair] += delta + + s.TpKeyToCriticalPaths[k].Update(v, *s.TpPairToMatchNum[pair]) + } +} + +type TopologyPair struct { + Key string + Value string +} + +// TopologySpreadConstraint is an internal version for v1.TopologySpreadConstraint +// and where the selector is parsed. +// Fields are exported for comparison during testing. +type TopologySpreadConstraint struct { + MaxSkew int32 + TopologyKey string + Selector labels.Selector +} + +// CAVEAT: the reason that `[2]criticalPath` can work is based on the implementation of current +// preemption algorithm, in particular the following 2 facts: +// Fact 1: we only preempt pods on the same node, instead of pods on multiple nodes. +// Fact 2: each node is evaluated on a separate copy of the preFilterState during its preemption cycle. +// If we plan to turn to a more complex algorithm like "arbitrary pods on multiple nodes", this +// structure needs to be revisited. +// Fields are exported for comparison during testing. +type CriticalPaths [2]struct { + // TopologyValue denotes the topology value mapping to topology key. + TopologyValue string + // MatchNum denotes the number of matching pods. + MatchNum int32 +} + +func NewCriticalPaths() *CriticalPaths { + return &CriticalPaths{{MatchNum: math.MaxInt32}, {MatchNum: math.MaxInt32}} +} + +func (p *CriticalPaths) Sort() { + if p[0].MatchNum == p[1].MatchNum && p[0].TopologyValue > p[1].TopologyValue { + // Swap TopologyValue to make them sorted alphabetically. + p[0].TopologyValue, p[1].TopologyValue = p[1].TopologyValue, p[0].TopologyValue + } +} + +func (p *CriticalPaths) Update(tpVal string, num int32) { + // first verify if `tpVal` exists or not + i := -1 + if tpVal == p[0].TopologyValue { + i = 0 + } else if tpVal == p[1].TopologyValue { + i = 1 + } + + if i >= 0 { + // `tpVal` exists + p[i].MatchNum = num + if p[0].MatchNum > p[1].MatchNum { + // swap paths[0] and paths[1] + p[0], p[1] = p[1], p[0] + } + } else { + // `tpVal` doesn't exist + if num < p[0].MatchNum { + // update paths[1] with paths[0] + p[1] = p[0] + // update paths[0] + p[0].TopologyValue, p[0].MatchNum = tpVal, num + } else if num < p[1].MatchNum { + // update paths[1] + p[1].TopologyValue, p[1].MatchNum = tpVal, num + } + } +} + +func GetArgs(obj runtime.Object) (config.PodTopologySpreadArgs, error) { + if obj == nil { + return config.PodTopologySpreadArgs{}, nil + } + + ptr, ok := obj.(*config.PodTopologySpreadArgs) + if !ok { + return config.PodTopologySpreadArgs{}, fmt.Errorf("want args to be of type PodTopologySpreadArgs, got %T", obj) + } + return *ptr, nil +} + +func FilterTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint, action v1.UnsatisfiableConstraintAction) ([]TopologySpreadConstraint, error) { + var result []TopologySpreadConstraint + for _, c := range constraints { + if c.WhenUnsatisfiable == action { + selector, err := metav1.LabelSelectorAsSelector(c.LabelSelector) + if err != nil { + return nil, err + } + result = append(result, TopologySpreadConstraint{ + MaxSkew: c.MaxSkew, + TopologyKey: c.TopologyKey, + Selector: selector, + }) + } + } + return result, nil +} + +func SizeHeuristic(nodes int, constraints []TopologySpreadConstraint) int { + for _, c := range constraints { + if c.TopologyKey == v1.LabelHostname { + return nodes + } + } + return 0 +} + +// NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread Constraints are present in node labels. +func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []TopologySpreadConstraint) bool { + for _, c := range constraints { + if _, ok := nodeLabels[c.TopologyKey]; !ok { + return false + } + } + return true +} + +func CountPodsMatchSelector(podInfos []*framework.PodInfo, selector labels.Selector, ns string, podLanucher podutil.PodLauncher) int { + count := 0 + for _, p := range podInfos { + // Bypass terminating Pod (see #87621). + if p.Pod.DeletionTimestamp != nil || p.Pod.Namespace != ns || p.PodLauncher != podLanucher { + continue + } + if selector.Matches(labels.Set(p.Pod.Labels)) { + count++ + } + } + return count +} + +func IsNodeNil(nodeInfo framework.NodeInfo, podLanucher podutil.PodLauncher) bool { + if nodeInfo == nil { + return true + } + + switch podLanucher { + case podutil.Kubelet: + if nodeInfo.GetNode() != nil { + return false + } + case podutil.NodeManager: + if nodeInfo.GetNMNode() != nil { + return false + } + } + return true +} + +func GetPreFilterState(pod *v1.Pod, allNodes []framework.NodeInfo, constraints []TopologySpreadConstraint) PreFilterState { + state := PreFilterState{ + Constraints: constraints, + TpKeyToCriticalPaths: make(map[string]*CriticalPaths, len(constraints)), + TpPairToMatchNum: make(map[TopologyPair]*int32, SizeHeuristic(len(allNodes), constraints)), + } + for _, nodeInfo := range allNodes { + for _, podLanucher := range podutil.PodLanucherTypes { + if IsNodeNil(nodeInfo, podLanucher) { + continue + } + + // In accordance to design, if NodeAffinity or NodeSelector is defined, + // spreading is applied to nodes that pass those filters. + if !helper.PodMatchesNodeSelectorAndAffinityTerms(pod, nodeInfo, podLanucher) { + continue + } + nodeLabels := nodeInfo.GetNodeLabels(podLanucher) + // Ensure current node's labels contains all topologyKeys in 'Constraints'. + if !NodeLabelsMatchSpreadConstraints(nodeLabels, constraints) { + continue + } + for _, c := range constraints { + pair := TopologyPair{Key: c.TopologyKey, Value: nodeLabels[c.TopologyKey]} + state.TpPairToMatchNum[pair] = new(int32) + } + } + } + + processNode := func(i int) { + nodeInfo := allNodes[i] + + for _, podLanucher := range podutil.PodLanucherTypes { + if IsNodeNil(nodeInfo, podLanucher) { + continue + } + + nodeLabels := nodeInfo.GetNodeLabels(podLanucher) + for _, constraint := range constraints { + pair := TopologyPair{Key: constraint.TopologyKey, Value: nodeLabels[constraint.TopologyKey]} + tpCount := state.TpPairToMatchNum[pair] + if state.TpPairToMatchNum[pair] != nil { + count := CountPodsMatchSelector(nodeInfo.GetPods(), constraint.Selector, pod.Namespace, podLanucher) + atomic.AddInt32(tpCount, int32(count)) + } + } + } + } + parallelize.Until(context.Background(), len(allNodes), processNode) + + // calculate min match for each topology pair + for i := 0; i < len(constraints); i++ { + key := constraints[i].TopologyKey + state.TpKeyToCriticalPaths[key] = NewCriticalPaths() + } + for pair, num := range state.TpPairToMatchNum { + state.TpKeyToCriticalPaths[pair.Key].Update(pair.Value, *num) + } + + return state +} + +func IsSatisfyPodTopologySpreadConstraints(s *PreFilterState, pod *v1.Pod, nodeInfo framework.NodeInfo, podLauncher podutil.PodLauncher) *framework.Status { + nodeLabels := nodeInfo.GetNodeLabels(podLauncher) + podLabelSet := labels.Set(pod.Labels) + for _, c := range s.Constraints { + tpKey := c.TopologyKey + tpVal, ok := nodeLabels[c.TopologyKey] + if !ok { + klog.V(5).Infof("node '%s' doesn't have required label '%s'", nodeInfo.GetNodeName(), tpKey) + return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonNodeLabelNotMatch) + } + + selfMatchNum := int32(0) + if c.Selector.Matches(podLabelSet) { + selfMatchNum = 1 + } + + pair := TopologyPair{Key: tpKey, Value: tpVal} + paths, ok := s.TpKeyToCriticalPaths[tpKey] + if !ok { + // error which should not happen + klog.Errorf("internal error: get paths from key %q of %#v", tpKey, s.TpKeyToCriticalPaths) + continue + } + // judging criteria: + // 'existing matching num' + 'if self-match (1 or 0)' - 'global min matching num' <= 'maxSkew' + minMatchNum := paths[0].MatchNum + matchNum := int32(0) + if tpCount := s.TpPairToMatchNum[pair]; tpCount != nil { + matchNum = *tpCount + } + skew := matchNum + selfMatchNum - minMatchNum + if skew > c.MaxSkew { + klog.V(5).Infof("node '%s' failed spreadConstraint[%s]: MatchNum(%d) + selfMatchNum(%d) - minMatchNum(%d) > maxSkew(%d)", nodeInfo.GetNodeName(), tpKey, matchNum, selfMatchNum, minMatchNum, c.MaxSkew) + return framework.NewStatus(framework.Unschedulable, ErrReasonConstraintsNotMatch) + } + } + + return nil +} diff --git a/pkg/scheduler/cache/cache_test.go b/pkg/scheduler/cache/cache_test.go index 9d976030..f8d6fd4d 100644 --- a/pkg/scheduler/cache/cache_test.go +++ b/pkg/scheduler/cache/cache_test.go @@ -1438,11 +1438,11 @@ func TestSchedulerCache_UpdateSubClusterSnapshot(t *testing.T) { // Update snapshot and check nodes. for i := 0; i < len(snapshots); i++ { snapshot := snapshots[i] - if snapshot.nodeSlices.inPartitionNodeSlice.Len() > 0 { - t.Errorf("unexpected inPartitionNodeSlice length. expect: %v, got: %v", 0, snapshot.nodeSlices.inPartitionNodeSlice.Len()) + if snapshot.nodeSlices.InPartitionNodeSlice.Len() > 0 { + t.Errorf("unexpected inPartitionNodeSlice length. expect: %v, got: %v", 0, snapshot.nodeSlices.InPartitionNodeSlice.Len()) } - if snapshot.nodeSlices.outOfPartitionNodeSlice.Len() != len(test.subCluster[i]) { - t.Errorf("unexpected subcluster snapshot nodeslice num. expect: %v, got: %v", len(test.subCluster[i]), snapshot.nodeSlices.outOfPartitionNodeSlice.Len()) + if snapshot.nodeSlices.OutOfPartitionNodeSlice.Len() != len(test.subCluster[i]) { + t.Errorf("unexpected subcluster snapshot nodeslice num. expect: %v, got: %v", len(test.subCluster[i]), snapshot.nodeSlices.OutOfPartitionNodeSlice.Len()) } for j := 0; j < len(test.subCluster[i]); j++ { expect := test.subCluster[i][j] @@ -1462,8 +1462,8 @@ func TestSchedulerCache_UpdateSubClusterSnapshot(t *testing.T) { } } if shouldBePlaceHolder { - if !reflect.DeepEqual(nodestore.GlobalNodeInfoPlaceHolder, got) { - t.Errorf("unexpected node, expect: %v, got %v.", nodestore.GlobalNodeInfoPlaceHolder, got) + if !reflect.DeepEqual(framework.GlobalNodeInfoPlaceHolder, got) { + t.Errorf("unexpected node, expect: %v, got %v.", framework.GlobalNodeInfoPlaceHolder, got) } // if diff := cmp.Diff(globalNodeInfoPlaceHolder, got); len(diff) > 0 { // t.Errorf("unexpected node, should be PlaceHolder. diff: %v", diff) @@ -1828,8 +1828,8 @@ func TestSchedulerCache_UpdateSnapshot(t *testing.T) { } // Check number of nodes with pods with affinity - if snapshot.nodeSlices.havePodsWithAffinityNodeSlice.Len() != test.expectedHavePodsWithAffinity { - t.Errorf("unexpected number of HavePodsWithAffinity nodes. Expected: %v, got: %v", test.expectedHavePodsWithAffinity, snapshot.nodeSlices.havePodsWithAffinityNodeSlice.Len()) + if snapshot.nodeSlices.HavePodsWithAffinityNodeSlice.Len() != test.expectedHavePodsWithAffinity { + t.Errorf("unexpected number of HavePodsWithAffinity nodes. Expected: %v, got: %v", test.expectedHavePodsWithAffinity, snapshot.nodeSlices.HavePodsWithAffinityNodeSlice.Len()) } // Always update the snapshot at the end of operations and compare it. @@ -1856,8 +1856,8 @@ func compareCacheWithNodeInfoSnapshot(cache *schedulerCache, snapshot *Snapshot) } // Compare the lists. - if snapshot.nodeSlices.outOfPartitionNodeSlice.Len() != cache.CommonStoresSwitch.Find(nodestore.Name).(*nodestore.NodeStore).Len() { - return fmt.Errorf("unexpected number of nodes in NodeInfoList. Expected: %v, got: %v", cache.CommonStoresSwitch.Find(nodestore.Name).(*nodestore.NodeStore).Len(), snapshot.nodeSlices.outOfPartitionNodeSlice.Len()) + if snapshot.nodeSlices.OutOfPartitionNodeSlice.Len() != cache.CommonStoresSwitch.Find(nodestore.Name).(*nodestore.NodeStore).Len() { + return fmt.Errorf("unexpected number of nodes in NodeInfoList. Expected: %v, got: %v", cache.CommonStoresSwitch.Find(nodestore.Name).(*nodestore.NodeStore).Len(), snapshot.nodeSlices.OutOfPartitionNodeSlice.Len()) } expectedNodeInfoList := make([]framework.NodeInfo, 0, cache.CommonStoresSwitch.Find(nodestore.Name).(*nodestore.NodeStore).Len()) @@ -1888,7 +1888,7 @@ func compareCacheWithNodeInfoSnapshot(cache *schedulerCache, snapshot *Snapshot) for _, expected := range expectedHavePodsWithAffinityNodeInfoList { find := false - for _, got := range snapshot.nodeSlices.havePodsWithAffinityNodeSlice.Nodes() { + for _, got := range snapshot.nodeSlices.HavePodsWithAffinityNodeSlice.Nodes() { if got == expected { find = true } @@ -2031,7 +2031,7 @@ func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) { t.Error(err) } nodeNames := sets.NewString() - for _, nodeInfo := range snapshot.nodeSlices.outOfPartitionNodeSlice.Nodes() { + for _, nodeInfo := range snapshot.nodeSlices.OutOfPartitionNodeSlice.Nodes() { nodeNames.Insert(nodeInfo.GetNodeName()) } if !test.expected.Equal(nodeNames) { diff --git a/pkg/scheduler/cache/commonstores/node_store/node_store.go b/pkg/scheduler/cache/commonstores/node_store/node_store.go index 2f2bfd90..c13b6ffa 100644 --- a/pkg/scheduler/cache/commonstores/node_store/node_store.go +++ b/pkg/scheduler/cache/commonstores/node_store/node_store.go @@ -54,8 +54,6 @@ func init() { // --------------------------------------------------------------------------------------- -var GlobalNodeInfoPlaceHolder = framework.NewNodeInfo() - func nodeInfoBelongToSubCluster(n framework.NodeInfo, matchedSubCluster string) bool { return matchedSubCluster == framework.DefaultSubCluster || n.GetNode() != nil && n.GetNode().Labels[framework.GetGlobalSubClusterKey()] == matchedSubCluster || @@ -413,7 +411,7 @@ func (cacheStore *NodeStore) UpdateSnapshot(store commonstore.Store) error { if subClusterConcurrentSchedulingEnabled && !nodeInfoBelongToSubCluster(nodeInfo, subCluster) { // ATTENTION: We should ensure that the `globalNodeInfoPlaceHolder` will not be added to nodeslice. - snapshotStore.Add(nodeName, GlobalNodeInfoPlaceHolder) + snapshotStore.Add(nodeName, framework.GlobalNodeInfoPlaceHolder) } else { // Use `s.Add` instead of `snapshot.Add` to maintain nodeSlices. snapshotStore.Add(nodeName, nodeInfo.Clone()) diff --git a/pkg/scheduler/cache/snapshot.go b/pkg/scheduler/cache/snapshot.go index 60cf4c58..36f4a38e 100644 --- a/pkg/scheduler/cache/snapshot.go +++ b/pkg/scheduler/cache/snapshot.go @@ -36,14 +36,14 @@ type Snapshot struct { handler commoncache.CacheHandler - nodeSlices *nodeSlices + nodeSlices *framework.NodeSlices } var _ framework.SharedLister = &Snapshot{} // NewEmptySnapshot initializes a Snapshot struct and returns it. func NewEmptySnapshot(handler commoncache.CacheHandler) *Snapshot { - nodeSlices := newNodeSlices() + nodeSlices := framework.NewNodeSlices() s := &Snapshot{ CommonStoresSwitch: commonstore.MakeStoreSwitch(handler, commonstore.Snapshot, commonstores.GlobalRegistries, orderedStoreNames), @@ -53,8 +53,8 @@ func NewEmptySnapshot(handler commoncache.CacheHandler) *Snapshot { nodeSlices: nodeSlices, } nodeStore := s.CommonStoresSwitch.Find(nodestore.Name) - nodeStore.(*nodestore.NodeStore).AfterAdd = func(n framework.NodeInfo) { nodeSlices.update(n, true) } - nodeStore.(*nodestore.NodeStore).AfterDelete = func(n framework.NodeInfo) { nodeSlices.update(n, false) } + nodeStore.(*nodestore.NodeStore).AfterAdd = func(n framework.NodeInfo) { nodeSlices.Update(n, true) } + nodeStore.(*nodestore.NodeStore).AfterDelete = func(n framework.NodeInfo) { nodeSlices.Update(n, false) } handler.SetNodeHandler(nodeStore.(*nodestore.NodeStore).GetNodeInfo) handler.SetPodOpFunc(podOpFunc(s.CommonStoresSwitch)) @@ -89,7 +89,7 @@ func (s *Snapshot) NodeInfos() framework.NodeInfoLister { // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) NumNodes() int { - return s.nodeSlices.inPartitionNodeSlice.Len() + s.nodeSlices.outOfPartitionNodeSlice.Len() + return s.nodeSlices.InPartitionNodeSlice.Len() + s.nodeSlices.OutOfPartitionNodeSlice.Len() } // List returns the list of nodes in the snapshot. @@ -97,14 +97,14 @@ func (s *Snapshot) NumNodes() int { // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) List() []framework.NodeInfo { - return append(s.nodeSlices.inPartitionNodeSlice.Nodes(), s.nodeSlices.outOfPartitionNodeSlice.Nodes()...) + return append(s.nodeSlices.InPartitionNodeSlice.Nodes(), s.nodeSlices.OutOfPartitionNodeSlice.Nodes()...) } // InPartitionList returns the list of nodes which are in the partition of the scheduler // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) InPartitionList() []framework.NodeInfo { - return s.nodeSlices.inPartitionNodeSlice.Nodes() + return s.nodeSlices.InPartitionNodeSlice.Nodes() } // OutOfPartitionList returns the list of nodes which are out of the partition of the scheduler @@ -112,7 +112,7 @@ func (s *Snapshot) InPartitionList() []framework.NodeInfo { // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) OutOfPartitionList() []framework.NodeInfo { - return s.nodeSlices.outOfPartitionNodeSlice.Nodes() + return s.nodeSlices.OutOfPartitionNodeSlice.Nodes() } // HavePodsWithAffinityList returns the list of nodes with at least one pod with inter-pod affinity @@ -120,7 +120,7 @@ func (s *Snapshot) OutOfPartitionList() []framework.NodeInfo { // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) HavePodsWithAffinityList() []framework.NodeInfo { - return s.nodeSlices.havePodsWithAffinityNodeSlice.Nodes() + return s.nodeSlices.HavePodsWithAffinityNodeSlice.Nodes() } // HavePodsWithRequiredAntiAffinityList returns the list of nodes with at least one pod with @@ -129,11 +129,11 @@ func (s *Snapshot) HavePodsWithAffinityList() []framework.NodeInfo { // Note: Snapshot operations are lock-free. Our premise for removing lock: even if read operations // are concurrent, write operations(AssumePod/ForgetPod/AddOneVictim) should always be serial. func (s *Snapshot) HavePodsWithRequiredAntiAffinityList() []framework.NodeInfo { - return s.nodeSlices.havePodsWithRequiredAntiAffinityNodeSlice.Nodes() + return s.nodeSlices.HavePodsWithRequiredAntiAffinityNodeSlice.Nodes() } func (s *Snapshot) Len() int { - return len(s.nodeSlices.inPartitionNodeSlice.Nodes()) + len(s.nodeSlices.outOfPartitionNodeSlice.Nodes()) + return len(s.nodeSlices.InPartitionNodeSlice.Nodes()) + len(s.nodeSlices.OutOfPartitionNodeSlice.Nodes()) } // Get returns the NodeInfo of the given node name. @@ -168,48 +168,3 @@ func (s *Snapshot) ForgetPod(podInfo *framework.CachePodInfo) error { func (s *Snapshot) FindStore(storeName commonstore.StoreName) commonstore.Store { return s.CommonStoresSwitch.Find(storeName) } - -// -------------------------------------- node slice for snapshot -------------------------------------- - -type nodeSlices struct { - inPartitionNodeSlice framework.NodeHashSlice - outOfPartitionNodeSlice framework.NodeHashSlice - havePodsWithAffinityNodeSlice framework.NodeHashSlice - havePodsWithRequiredAntiAffinityNodeSlice framework.NodeHashSlice -} - -func newNodeSlices() *nodeSlices { - return &nodeSlices{ - inPartitionNodeSlice: framework.NewNodeHashSlice(), - outOfPartitionNodeSlice: framework.NewNodeHashSlice(), - havePodsWithAffinityNodeSlice: framework.NewNodeHashSlice(), - havePodsWithRequiredAntiAffinityNodeSlice: framework.NewNodeHashSlice(), - } -} - -func op(slice framework.NodeHashSlice, n framework.NodeInfo, isAdd bool) { - if isAdd { - _ = slice.Add(n) - } else { - _ = slice.Del(n) - } -} - -func (s *nodeSlices) update(n framework.NodeInfo, isAdd bool) { - // ATTENTION: We should ensure that the `globalNodeInfoPlaceHolder` will not be added to nodelice. - if n == nodestore.GlobalNodeInfoPlaceHolder { - return - } - - if n.GetNodeInSchedulerPartition() || n.GetNMNodeInSchedulerPartition() { - op(s.inPartitionNodeSlice, n, isAdd) - } else { - op(s.outOfPartitionNodeSlice, n, isAdd) - } - if len(n.GetPodsWithAffinity()) > 0 { - op(s.havePodsWithAffinityNodeSlice, n, isAdd) - } - if len(n.GetPodsWithRequiredAntiAffinity()) > 0 { - op(s.havePodsWithRequiredAntiAffinityNodeSlice, n, isAdd) - } -} diff --git a/pkg/scheduler/factory.go b/pkg/scheduler/factory.go index fa40fb39..1b447b37 100644 --- a/pkg/scheduler/factory.go +++ b/pkg/scheduler/factory.go @@ -30,11 +30,13 @@ import ( "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" godelcache "github.com/kubewharf/godel-scheduler/pkg/scheduler/cache" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/coscheduling" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeports" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/noderesources" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeunschedulable" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podlauncher" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/tainttoleration" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/volumebinding" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/preemption-plugins/searching/newlystartedprotectionchecker" @@ -58,6 +60,8 @@ func basePluginsForKubelet() *framework.PluginCollection { framework.NewPluginSpec(nodeports.Name), framework.NewPluginSpec(volumebinding.Name), framework.NewPluginSpec(nodeaffinity.Name), + framework.NewPluginSpec(interpodaffinity.Name), + framework.NewPluginSpec(podtopologyspread.Name), framework.NewPluginSpec(tainttoleration.Name), }, Searchings: []*framework.VictimSearchingPluginCollectionSpec{ @@ -122,6 +126,8 @@ func basePluginsForNodeManager() *framework.PluginCollection { framework.NewPluginSpec(nodeports.Name), framework.NewPluginSpec(volumebinding.Name), framework.NewPluginSpec(nodeaffinity.Name), + framework.NewPluginSpec(interpodaffinity.Name), + framework.NewPluginSpec(podtopologyspread.Name), framework.NewPluginSpec(tainttoleration.Name), }, } diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/filtering.go b/pkg/scheduler/framework/plugins/interpodaffinity/filtering.go new file mode 100644 index 00000000..fc03eac4 --- /dev/null +++ b/pkg/scheduler/framework/plugins/interpodaffinity/filtering.go @@ -0,0 +1,130 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "context" + "fmt" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/interpodaffinity" + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + v1 "k8s.io/api/core/v1" +) + +const ( + // preFilterStateKey is the key in CycleState to InterPodAffinity pre-computed data for Filtering. + // Using the name of the plugin will likely help us avoid collisions with other plugins. + preFilterStateKey = "PreFilter" + Name +) + +// PreFilter invoked at the prefilter extension point. +func (pl *InterPodAffinity) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status { + var allNodes []framework.NodeInfo + var nodesWithRequiredAntiAffinityPods []framework.NodeInfo + allNodes = pl.sharedLister.NodeInfos().List() + nodesWithRequiredAntiAffinityPods = pl.sharedLister.NodeInfos().HavePodsWithRequiredAntiAffinityList() + + podInfo := framework.NewPodInfo(pod) + if podInfo.ParseError != nil { + return framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("parsing pod: %+v", podInfo.ParseError)) + } + + // existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity + existingPodAntiAffinityMap := utils.GetTPMapMatchingExistingAntiAffinity(pod, nodesWithRequiredAntiAffinityPods) + + // incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity + // incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity + incomingPodAffinityMap, incomingPodAntiAffinityMap := utils.GetTPMapMatchingIncomingAffinityAntiAffinity(podInfo, allNodes) + + s := &utils.PreFilterState{ + TopologyToMatchedAffinityTerms: incomingPodAffinityMap, + TopologyToMatchedAntiAffinityTerms: incomingPodAntiAffinityMap, + TopologyToMatchedExistingAntiAffinityTerms: existingPodAntiAffinityMap, + PodInfo: podInfo, + } + + cycleState.Write(preFilterStateKey, s) + return nil +} + +// PreFilterExtensions returns prefilter extensions, pod add and remove. +func (pl *InterPodAffinity) PreFilterExtensions() framework.PreFilterExtensions { + return pl +} + +// AddPod from pre-computed data in cycleState. +func (pl *InterPodAffinity) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + state, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + state.UpdateWithPod(podToAdd, nodeInfo, 1) + return nil +} + +// RemovePod from pre-computed data in cycleState. +func (pl *InterPodAffinity) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToRemove *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + state, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + state.UpdateWithPod(podToRemove, nodeInfo, -1) + return nil +} + +func getPreFilterState(cycleState *framework.CycleState) (*utils.PreFilterState, error) { + c, err := cycleState.Read(preFilterStateKey) + if err != nil { + // utils.PreFilterState doesn't exist, likely PreFilter wasn't invoked. + return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err) + } + + s, ok := c.(*utils.PreFilterState) + if !ok { + return nil, fmt.Errorf("%+v convert to interpodaffinity.state error", c) + } + return s, nil +} + +// Filter invoked at the filter extension point. +// It checks if a pod can be scheduled on the specified node with pod affinity/anti-affinity configuration. +func (pl *InterPodAffinity) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + podLauncher, status := podlauncher.NodeFits(cycleState, pod, nodeInfo) + if status != nil { + return status + } + + state, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + + if !utils.SatisfyPodAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.UnschedulableAndUnresolvable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonAffinityRulesNotMatch) + } + + if !utils.SatisfyPodAntiAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.Unschedulable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonAntiAffinityRulesNotMatch) + } + + if !utils.SatisfyExistingPodsAntiAffinity(state, nodeInfo, podLauncher) { + return framework.NewStatus(framework.Unschedulable, utils.ErrReasonAffinityNotMatch, utils.ErrReasonExistingAntiAffinityRulesNotMatch) + } + + return nil +} diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go b/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go new file mode 100644 index 00000000..4a6c51f1 --- /dev/null +++ b/pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go @@ -0,0 +1,2315 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package interpodaffinity + +import ( + "context" + "fmt" + "reflect" + "strings" + "testing" + + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/interpodaffinity" + + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + godelcache "github.com/kubewharf/godel-scheduler/pkg/scheduler/cache" + framework_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper/framework-helper" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" +) + +var ( + defaultNamespace = "" +) + +func createPodWithAffinityTerms(namespace, nodeName string, labels map[string]string, affinity, antiAffinity []v1.PodAffinityTerm) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + Namespace: namespace, + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + Affinity: &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: affinity, + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: antiAffinity, + }, + }, + }, + } + +} + +func TestRequiredAffinitySingleNode(t *testing.T) { + podLabel := map[string]string{"service": "securityscan"} + labels1 := map[string]string{ + "region": "r1", + "zone": "z11", + } + podLabel2 := map[string]string{"security": "S1"} + node1 := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labels1}} + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + node *v1.Node + name string + wantStatus *framework.Status + }{ + { + pod: new(v1.Pod), + node: &node1, + name: "A pod that has no required pod affinity scheduling rules can schedule onto a node with no existing pods", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "satisfies with requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using In operator that matches the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan3", "value3"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "satisfies the pod with requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using not in operator in labelSelector that matches the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + Namespaces: []string{"DiffNameSpace"}, + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel, Namespace: "ns"}}}, + node: &node1, + name: "Does not satisfy the PodAffinity with labelSelector because of diff Namespace", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "Doesn't satisfy the PodAffinity because of unmatching labelSelector with the existing pod", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"WrongValue"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "satisfies the PodAffinity with different label Operators in multiple RequiredDuringSchedulingIgnoredDuringExecution ", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan2"}, + }, { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"WrongValue"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "The labelSelector requirements(items of matchExpressions) are ANDed, the pod cannot schedule onto the node because one of the matchExpression item don't match.", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "satisfies the PodAffinity and PodAntiAffinity with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + }, + node: &node1, + name: "satisfies the PodAffinity and PodAntiAffinity and PodAntiAffinity symmetry with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "satisfies the PodAffinity but doesn't satisfy the PodAntiAffinity with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + name: "satisfies the PodAffinity and PodAntiAffinity but doesn't satisfy PodAntiAffinity symmetry with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabel}}}, + node: &node1, + name: "pod matches its own Label in PodAffinity and that matches the existing pod Labels", + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabel, + }, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + name: "verify that PodAntiAffinity from existing pod is respected when pod has no AntiAffinity constraints. doesn't satisfy PodAntiAffinity symmetry with the existing pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabel, + }, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"securityscan", "value2"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + name: "verify that PodAntiAffinity from existing pod is respected when pod has no AntiAffinity constraints. satisfy PodAntiAffinity symmetry with the existing pod", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + name: "satisfies the PodAntiAffinity with existing pod but doesn't satisfy PodAntiAffinity symmetry with incoming pod", + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check a1: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel2, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", podLabel, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check a2: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"abc": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", map[string]string{"def": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check b1: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"def": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "machine1", map[string]string{"abc": "", "xyz": ""}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "abc", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "def", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + node: &node1, + wantStatus: framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + name: "PodAntiAffinity symmetry check b2: incoming pod and existing pod partially match each other on AffinityTerms", + }, + { + name: "PodAffinity fails PreFilter with an invalid affinity label syntax", + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"{{.bad-value.}}"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"antivirusscan", "value2"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + node: &node1, + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + "Invalid value", + ), + }, + { + name: "PodAntiAffinity fails PreFilter with an invalid antiaffinity label syntax", + pod: createPodWithAffinityTerms(defaultNamespace, "", podLabel, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"foo"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"{{.bad-value.}}"}, + }, + }, + }, + TopologyKey: "node", + }, + }), + node: &node1, + wantStatus: framework.NewStatus( + framework.UnschedulableAndUnresolvable, + "Invalid value", + ), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(test.pods, []*v1.Node{test.node}, nil) + + p := &InterPodAffinity{ + sharedLister: snapshot, + } + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, test.pod) + if !preFilterStatus.IsSuccess() { + if !strings.Contains(preFilterStatus.Message(), test.wantStatus.Message()) { + t.Errorf("prefilter failed with status: %v", preFilterStatus) + } + } else { + nodeInfo := mustGetNodeInfo(t, snapshot, test.node.Name) + gotStatus := p.Filter(context.Background(), state, test.pod, nodeInfo) + if !reflect.DeepEqual(gotStatus, test.wantStatus) { + t.Errorf("status does not match: %v, want: %v", gotStatus, test.wantStatus) + } + } + }) + } +} + +func TestRequiredAffinityMultipleNodes(t *testing.T) { + podLabelA := map[string]string{ + "foo": "bar", + } + labelRgChina := map[string]string{ + "region": "China", + } + labelRgChinaAzAz1 := map[string]string{ + "region": "China", + "az": "az1", + } + labelRgIndia := map[string]string{ + "region": "India", + } + + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + wantStatuses []*framework.Status + name string + }{ + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + nil, + nil, + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "A pod can be scheduled onto all the nodes that have the same topology key & label value with one of them has an existing pod that matches the affinity rules", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"foo": "bar", "service": "securityscan"}, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "The affinity rule is to schedule all of the pods of this collection to the same zone. The first pod of the collection " + + "should not be blocked from being scheduled onto any node, even there's no existing pod that matches the rule anywhere.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"foo": "bar", "service": "securityscan"}, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zoneLabel": "az1", "hostname": "h1"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zoneLabel": "az2", "hostname": "h2"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "The first pod of the collection can only be scheduled on nodes labelled with the requested topology keys", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "This test ensures that anti-affinity matches a pod when any term of the anti-affinity rule matches a pod.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc"}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC", + }, + { + pod: createPodWithAffinityTerms("NS1", "", map[string]string{"foo": "123"}, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"foo": "bar"}, + Namespace: "NS1", + }, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + createPodWithAffinityTerms("NS2", "nodeC", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"123"}, + }, + }, + }, + TopologyKey: "region", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that matches the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test existing pod's anti-affinity: if an existing pod has a term with invalid topologyKey, labelSelector of the term is firstly checked, and then topologyKey of the term is also checked", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test incoming pod's anti-affinity: even if labelSelector matches, we still check if topologyKey matches", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + name: "Test existing pod's anti-affinity: incoming pod wouldn't considered as a fit as it violates each existingPod's terms on all nodes", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeB", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's anti-affinity: incoming pod wouldn't considered as a fit as it at least violates one anti-affinity rule of existingPod", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test existing pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when one term has invalid topologyKey", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "invalid-node-label", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "podA", Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test incoming pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when one term has invalid topologyKey", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + }, + name: "Test existing pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when all terms have valid topologyKey", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAntiAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's anti-affinity: only when labelSelector and topologyKey both match, it's counted as a single term match - case when all terms have valid topologyKey", + }, + { + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "", "bar": ""}}, + }, + pods: []*v1.Pod{ + createPodWithAffinityTerms(defaultNamespace, "nodeA", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "labelA", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + createPodWithAffinityTerms(defaultNamespace, "nodeB", nil, nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "labelB", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: map[string]string{"region": "r1", "zone": "z3", "hostname": "nodeC"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.Unschedulable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonExistingAntiAffinityRulesNotMatch, + ), + nil, + }, + name: "Test existing pod's anti-affinity: existingPod on nodeA and nodeB has at least one anti-affinity term matches incoming pod, so incoming pod can only be scheduled to nodeC", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pod1", Labels: map[string]string{"foo": "", "bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{nil, nil}, + name: "Test incoming pod's affinity: firstly check if all affinityTerms match, and then check if all topologyKeys match", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "zone", + }, + }, nil), + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "pod1", Labels: map[string]string{"foo": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "pod2", Labels: map[string]string{"bar": ""}}, + Spec: v1.PodSpec{ + NodeName: "nodeB", + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "Test incoming pod's affinity: firstly check if all affinityTerms match, and then check if all topologyKeys match, and the match logic should be satisfied on the same pod", + }, + } + + for indexTest, test := range tests { + t.Run(test.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(test.pods, test.nodes, nil) + + for indexNode, node := range test.nodes { + p := &InterPodAffinity{ + sharedLister: snapshot, + } + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, test.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("prefilter failed with status: %v", preFilterStatus) + } + nodeInfo := mustGetNodeInfo(t, snapshot, node.Name) + gotStatus := p.Filter(context.Background(), state, test.pod, nodeInfo) + if !reflect.DeepEqual(gotStatus, test.wantStatuses[indexNode]) { + t.Errorf("index: %d status does not match: %v, want: %v", indexTest, gotStatus, test.wantStatuses[indexNode]) + } + } + }) + } +} + +func TestNMNodesFilter(t *testing.T) { + podLabelA := map[string]string{ + "foo": "bar", + } + labelRgChina := map[string]string{ + "region": "China", + } + labelRgChinaAzAz1 := map[string]string{ + "region": "China", + "az": "az1", + } + labelRgIndia := map[string]string{ + "region": "India", + } + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + nmNodes []*nodev1alpha1.NMNode + wantStatuses []*framework.Status + name string + }{ + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + nil, + nil, + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "All nodes are of NMNode type, that is, they are managed by the node manager. A pod can be scheduled onto all the nodes that have the same topology key & label value with one of them has an existing pod that matches the affinity rules", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine0"}, ObjectMeta: metav1.ObjectMeta{Name: "p0", Labels: podLabelA}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine0", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + fmt.Sprintf(podlauncher.ErrReasonTemplate, podutil.NodeManager), + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + fmt.Sprintf(podlauncher.ErrReasonTemplate, podutil.NodeManager), + ), + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + nil, + }, + name: "Since the pod required by affinity is on v1.node of machine0, all nodes corresponding to NMNode in the India region can be scheduled. However, since both machine0 and machine1 only have v1.Node, they cannot be scheduled.", + }, + { + pod: createPodWithAffinityTerms(defaultNamespace, "", nil, + []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, nil), + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + wantStatuses: []*framework.Status{ + nil, + nil, + framework.NewStatus( + framework.UnschedulableAndUnresolvable, + utils.ErrReasonAffinityNotMatch, + utils.ErrReasonAffinityRulesNotMatch, + ), + }, + name: "Machine 1 has v1.Node and NMNode, the others are of NMNode type. Since the pod required by affinity is on v1.node of machine1, all nodes corresponding to NMNode in the China region can be scheduled.", + }, + } + + for indexTest, test := range tests { + t.Run(test.name, func(t *testing.T) { + test.pod.Annotations = map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)} + snapshot := framework_helper.MakeSnapShot(test.pods, test.nodes, test.nmNodes) + + nodeNames := getNodeNames(test.nodes, test.nmNodes) + for indexNode := 0; indexNode < len(nodeNames); indexNode++ { + p := &InterPodAffinity{ + sharedLister: snapshot, + } + var nodeInfo framework.NodeInfo + nodeInfo = mustGetNodeInfo(t, snapshot, nodeNames[indexNode]) + + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, test.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("prefilter failed with status: %v", preFilterStatus) + } + gotStatus := p.Filter(context.Background(), state, test.pod, nodeInfo) + if !reflect.DeepEqual(gotStatus, test.wantStatuses[indexNode]) { + t.Errorf("index: %d status does not match: %v, want: %v", indexTest, gotStatus, test.wantStatuses[indexNode]) + } + } + }) + } +} + +func getNodeNames(nodes []*v1.Node, nmNodes []*nodev1alpha1.NMNode) []string { + nameSet := sets.NewString() + for _, node := range nodes { + nameSet.Insert(node.Name) + } + for _, nmNode := range nmNodes { + nameSet.Insert(nmNode.Name) + } + return nameSet.List() +} + +func TestPreFilterDisabled(t *testing.T) { + pod := &v1.Pod{} + nodeInfo := framework.NewNodeInfo() + node := v1.Node{} + nodeInfo.SetNode(&node) + p := &InterPodAffinity{} + cycleState := framework.NewCycleState() + gotStatus := p.Filter(context.Background(), cycleState, pod, nodeInfo) + wantStatus := framework.NewStatus(framework.Error, `error reading "PreFilterInterPodAffinity" from cycleState: not found`) + if !reflect.DeepEqual(gotStatus, wantStatus) { + t.Errorf("status does not match: %v, want: %v", gotStatus, wantStatus) + } +} + +func TestPreFilterStateAddRemovePod(t *testing.T) { + var label1 = map[string]string{ + "region": "r1", + "zone": "z11", + } + var label2 = map[string]string{ + "region": "r1", + "zone": "z12", + } + var label3 = map[string]string{ + "region": "r2", + "zone": "z21", + } + selector1 := map[string]string{"foo": "bar"} + antiAffinityFooBar := &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + } + antiAffinityComplex := &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar", "buzz"}, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"bar", "security", "test"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, + } + affinityComplex := &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar", "buzz"}, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"bar", "security", "test"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, + } + + tests := []struct { + name string + pendingPod *v1.Pod + addedPod *v1.Pod + existingPods []*v1.Pod + nodes []*v1.Node + expectedAntiAffinity utils.TopologyToMatchedTermCount + expectedAffinity utils.TopologyToMatchedTermCount + }{ + { + name: "no affinity exist", + pendingPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1}, + }, + existingPods: []*v1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + {ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{NodeName: "nodeC"}, + }, + }, + addedPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeB"}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}}, + }, + expectedAntiAffinity: utils.TopologyToMatchedTermCount{}, + expectedAffinity: utils.TopologyToMatchedTermCount{}, + }, + { + name: "preFilterState anti-affinity terms are updated correctly after adding and removing a pod", + pendingPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityFooBar, + }, + }, + }, + existingPods: []*v1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + {ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeC", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityFooBar, + }, + }, + }, + }, + addedPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1}, + Spec: v1.PodSpec{ + NodeName: "nodeB", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityFooBar, + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}}, + }, + expectedAntiAffinity: utils.TopologyToMatchedTermCount{ + {Key: "region", Value: "r1"}: 2, + }, + expectedAffinity: utils.TopologyToMatchedTermCount{}, + }, + { + name: "preFilterState anti-affinity terms are updated correctly after adding and removing a pod", + pendingPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityComplex, + }, + }, + }, + existingPods: []*v1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + {ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeC", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityFooBar, + }, + }, + }, + }, + addedPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityComplex, + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}}, + }, + expectedAntiAffinity: utils.TopologyToMatchedTermCount{ + {Key: "region", Value: "r1"}: 2, + {Key: "zone", Value: "z11"}: 2, + {Key: "zone", Value: "z21"}: 1, + }, + expectedAffinity: utils.TopologyToMatchedTermCount{}, + }, + { + name: "preFilterState matching pod affinity and anti-affinity are updated correctly after adding and removing a pod", + pendingPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1}, + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAffinity: affinityComplex, + }, + }, + }, + existingPods: []*v1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }, + {ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeC", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityFooBar, + PodAffinity: affinityComplex, + }, + }, + }, + }, + addedPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1}, + Spec: v1.PodSpec{ + NodeName: "nodeA", + Affinity: &v1.Affinity{ + PodAntiAffinity: antiAffinityComplex, + }, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}}, + }, + expectedAntiAffinity: utils.TopologyToMatchedTermCount{}, + expectedAffinity: utils.TopologyToMatchedTermCount{ + {Key: "region", Value: "r1"}: 2, + {Key: "zone", Value: "z11"}: 2, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // getMeta creates predicate meta data given the list of pods. + getState := func(pods []*v1.Pod) (*InterPodAffinity, *framework.CycleState, *utils.PreFilterState, *godelcache.Snapshot) { + snapshot := framework_helper.MakeSnapShot(pods, test.nodes, nil) + + p := &InterPodAffinity{ + sharedLister: snapshot, + } + cycleState := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), cycleState, test.pendingPod) + if !preFilterStatus.IsSuccess() { + t.Errorf("prefilter failed with status: %v", preFilterStatus) + } + + state, err := getPreFilterState(cycleState) + if err != nil { + t.Errorf("failed to get preFilterState from cycleState: %v", err) + } + + return p, cycleState, state, snapshot + } + + // allPodsState is the state produced when all pods, including test.addedPod are given to prefilter. + _, _, allPodsState, _ := getState(append(test.existingPods, test.addedPod)) + + // state is produced for test.existingPods (without test.addedPod). + ipa, cycleState, state, snapshot := getState(test.existingPods) + // clone the state so that we can compare it later when performing Remove. + originalState := state.Clone() + + // Add test.addedPod to state1 and verify it is equal to allPodsState. + nodeInfo := mustGetNodeInfo(t, snapshot, test.addedPod.Spec.NodeName) + if err := ipa.AddPod(context.Background(), cycleState, test.pendingPod, test.addedPod, nodeInfo); err != nil { + t.Errorf("error adding pod to meta: %v", err) + } + + newState, err := getPreFilterState(cycleState) + if err != nil { + t.Errorf("failed to get preFilterState from cycleState: %v", err) + } + + if !reflect.DeepEqual(newState.TopologyToMatchedAntiAffinityTerms, test.expectedAntiAffinity) { + t.Errorf("State is not equal, got: %v, want: %v", newState.TopologyToMatchedAntiAffinityTerms, test.expectedAntiAffinity) + } + + if !reflect.DeepEqual(newState.TopologyToMatchedAffinityTerms, test.expectedAffinity) { + t.Errorf("State is not equal, got: %v, want: %v", newState.TopologyToMatchedAffinityTerms, test.expectedAffinity) + } + + fmt.Printf("name: %v,allPodsState: %v, state: %v ", test.name, allPodsState, state) + if !reflect.DeepEqual(allPodsState, state) { + t.Errorf("State is not equal, got: %v, want: %v", state, allPodsState) + } + + // Remove the added pod pod and make sure it is equal to the original state. + if err := ipa.RemovePod(context.Background(), cycleState, test.pendingPod, test.addedPod, nodeInfo); err != nil { + t.Errorf("error removing pod from meta: %v", err) + } + if !reflect.DeepEqual(originalState, state) { + t.Errorf("State is not equal, got: %v, want: %v", state, originalState) + } + }) + } +} + +func TestPreFilterStateClone(t *testing.T) { + source := &utils.PreFilterState{ + TopologyToMatchedExistingAntiAffinityTerms: utils.TopologyToMatchedTermCount{ + {Key: "name", Value: "machine1"}: 1, + {Key: "name", Value: "machine2"}: 1, + }, + TopologyToMatchedAffinityTerms: utils.TopologyToMatchedTermCount{ + {Key: "name", Value: "nodeA"}: 1, + {Key: "name", Value: "nodeC"}: 2, + }, + TopologyToMatchedAntiAffinityTerms: utils.TopologyToMatchedTermCount{ + {Key: "name", Value: "nodeN"}: 3, + {Key: "name", Value: "nodeM"}: 1, + }, + } + + clone := source.Clone() + if clone == source { + t.Errorf("Clone returned the exact same object!") + } + if !reflect.DeepEqual(clone, source) { + t.Errorf("Copy is not equal to source!") + } +} + +func mustGetNodeInfo(t *testing.T, snapshot *godelcache.Snapshot, name string) framework.NodeInfo { + t.Helper() + nodeInfo, err := snapshot.NodeInfos().Get(name) + if err != nil { + t.Fatal(err) + } + return nodeInfo +} diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go new file mode 100644 index 00000000..dc72cfa8 --- /dev/null +++ b/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go @@ -0,0 +1,77 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "fmt" + + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/validation" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/handle" + "k8s.io/apimachinery/pkg/runtime" +) + +// Name is the name of the plugin used in the plugin registry and configurations. +const Name = "InterPodAffinity" + +var _ framework.PreFilterPlugin = &InterPodAffinity{} +var _ framework.FilterPlugin = &InterPodAffinity{} +var _ framework.PreScorePlugin = &InterPodAffinity{} +var _ framework.ScorePlugin = &InterPodAffinity{} + +// InterPodAffinity is a plugin that checks inter pod affinity +type InterPodAffinity struct { + args config.InterPodAffinityArgs + sharedLister framework.SharedLister +} + +// Name returns name of the plugin. It is used in logs, etc. +func (pl *InterPodAffinity) Name() string { + return Name +} + +// New initializes a new plugin and returns it. +func New(plArgs runtime.Object, h handle.PodFrameworkHandle) (framework.Plugin, error) { + if h.SnapshotSharedLister() == nil { + return nil, fmt.Errorf("SnapshotSharedlister is nil") + } + args, err := getArgs(plArgs) + if err != nil { + return nil, err + } + if err := validation.ValidateInterPodAffinityArgs(args); err != nil { + return nil, err + } + return &InterPodAffinity{ + args: args, + sharedLister: h.SnapshotSharedLister(), + }, nil +} + +func getArgs(obj runtime.Object) (config.InterPodAffinityArgs, error) { + if obj == nil { + return config.InterPodAffinityArgs{}, nil + } + + ptr, ok := obj.(*config.InterPodAffinityArgs) + if !ok { + return config.InterPodAffinityArgs{}, fmt.Errorf("want args to be of type InterPodAffinityArgs, got %T", obj) + } + return *ptr, nil +} diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/scoring.go b/pkg/scheduler/framework/plugins/interpodaffinity/scoring.go new file mode 100644 index 00000000..544d7699 --- /dev/null +++ b/pkg/scheduler/framework/plugins/interpodaffinity/scoring.go @@ -0,0 +1,272 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interpodaffinity + +import ( + "context" + "fmt" + "sync/atomic" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + schedutil "github.com/kubewharf/godel-scheduler/pkg/scheduler/util" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" +) + +// preScoreStateKey is the key in CycleState to InterPodAffinity pre-computed data for Scoring. +const preScoreStateKey = "PreScore" + Name + +type scoreMap map[string]map[string]int64 + +// preScoreState computed at PreScore and used at Score. +type preScoreState struct { + topologyScore scoreMap + podInfo *framework.PodInfo +} + +// Clone implements the mandatory Clone interface. We don't really copy the data since +// there is no need for that. +func (s *preScoreState) Clone() framework.StateData { + return s +} + +func (m scoreMap) processTerm( + term *framework.WeightedAffinityTerm, + podToCheck *v1.Pod, + nodeLabels map[string]string, + multiplier int, +) { + if len(nodeLabels) == 0 { + return + } + + match := schedutil.PodMatchesTermsNamespaceAndSelector(podToCheck, term.Namespaces, term.Selector) + tpValue, tpValueExist := nodeLabels[term.TopologyKey] + if match && tpValueExist { + if m[term.TopologyKey] == nil { + m[term.TopologyKey] = make(map[string]int64) + } + m[term.TopologyKey][tpValue] += int64(term.Weight * int32(multiplier)) + } + return +} + +func (m scoreMap) processTerms(terms []framework.WeightedAffinityTerm, podToCheck *v1.Pod, nodeLabels map[string]string, multiplier int) { + for _, term := range terms { + m.processTerm(&term, podToCheck, nodeLabels, multiplier) + } +} + +func (m scoreMap) append(other scoreMap) { + for topology, oScores := range other { + scores := m[topology] + if scores == nil { + m[topology] = oScores + continue + } + for k, v := range oScores { + scores[k] += v + } + } +} + +func (pl *InterPodAffinity) processExistingPod( + state *preScoreState, + existingPod *framework.PodInfo, + nodeLabels map[string]string, + incomingPod *v1.Pod, + topoScore scoreMap, +) { + // For every soft pod affinity term of , if matches the term, + // increment for every node in the cluster with the same + // value as that of `s node by the term`s weight. + topoScore.processTerms(state.podInfo.PreferredAffinityTerms, existingPod.Pod, nodeLabels, 1) + + // For every soft pod anti-affinity term of , if matches the term, + // decrement for every node in the cluster with the same + // value as that of `s node by the term`s weight. + topoScore.processTerms(state.podInfo.PreferredAntiAffinityTerms, existingPod.Pod, nodeLabels, -1) + + // For every hard pod affinity term of , if matches the term, + // increment for every node in the cluster with the same + // value as that of 's node by the constant + if pl.args.HardPodAffinityWeight > 0 { + for _, term := range existingPod.RequiredAffinityTerms { + t := framework.WeightedAffinityTerm{AffinityTerm: term, Weight: pl.args.HardPodAffinityWeight} + topoScore.processTerm(&t, incomingPod, nodeLabels, 1) + } + } + + // For every soft pod affinity term of , if matches the term, + // increment for every node in the cluster with the same + // value as that of 's node by the term's weight. + topoScore.processTerms(existingPod.PreferredAffinityTerms, incomingPod, nodeLabels, 1) + + // For every soft pod anti-affinity term of , if matches the term, + // decrement for every node in the cluster with the same + // value as that of 's node by the term's weight. + topoScore.processTerms(existingPod.PreferredAntiAffinityTerms, incomingPod, nodeLabels, -1) +} + +// PreScore builds and writes cycle state used by Score and NormalizeScore. +func (pl *InterPodAffinity) PreScore( + pCtx context.Context, + cycleState *framework.CycleState, + pod *v1.Pod, + nodes []framework.NodeInfo, +) *framework.Status { + if len(nodes) == 0 { + // No nodes to score. + return nil + } + + if pl.sharedLister == nil { + return framework.NewStatus(framework.Error, fmt.Sprintf("InterPodAffinity PreScore with empty shared lister found")) + } + + affinity := pod.Spec.Affinity + hasPreferredAffinityConstraints := affinity != nil && affinity.PodAffinity != nil && len(affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 + hasPreferredAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil && len(affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 + + // Unless the pod being scheduled has preferred affinity terms, we only + // need to process nodes hosting pods with affinity. + var allNodes []framework.NodeInfo + if hasPreferredAffinityConstraints || hasPreferredAntiAffinityConstraints { + allNodes = pl.sharedLister.NodeInfos().List() + } else { + allNodes = pl.sharedLister.NodeInfos().HavePodsWithAffinityList() + } + + podInfo := framework.NewPodInfo(pod) + if podInfo.ParseError != nil { + // Ideally we never reach here, because errors will be caught by PreFilter + return framework.NewStatus(framework.Error, fmt.Sprintf("parsing pod: %+v", podInfo.ParseError)) + } + + state := &preScoreState{ + topologyScore: make(map[string]map[string]int64), + podInfo: podInfo, + } + + topoScores := make([]scoreMap, len(allNodes)) + index := int32(-1) + processNode := func(i int) { + nodeInfo := allNodes[i] + // Unless the pod being scheduled has preferred affinity terms, we only + // need to process pods with affinity in the node. + podsToProcess := nodeInfo.GetPodsWithAffinity() + if hasPreferredAffinityConstraints || hasPreferredAntiAffinityConstraints { + // We need to process all the pods. + podsToProcess = nodeInfo.GetPods() + } + + topoScore := make(scoreMap) + for _, existingPod := range podsToProcess { + pl.processExistingPod(state, existingPod, nodeInfo.GetNodeLabels(existingPod.PodLauncher), pod, topoScore) + } + if len(topoScore) > 0 { + topoScores[atomic.AddInt32(&index, 1)] = topoScore + } + } + parallelize.Until(context.Background(), len(allNodes), processNode) + + for i := 0; i <= int(index); i++ { + state.topologyScore.append(topoScores[i]) + } + + cycleState.Write(preScoreStateKey, state) + return nil +} + +func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) { + c, err := cycleState.Read(preScoreStateKey) + if err != nil { + return nil, fmt.Errorf("failed to read %q from cycleState: %v", preScoreStateKey, err) + } + + s, ok := c.(*preScoreState) + if !ok { + return nil, fmt.Errorf("%+v convert to interpodaffinity.preScoreState error", c) + } + return s, nil +} + +// Score invoked at the Score extension point. +// The "score" returned in this function is the sum of weights got from cycleState which have its topologyKey matching with the node's labels. +// it is normalized later. +// Note: the returned "score" is positive for pod-affinity, and negative for pod-antiaffinity. +func (pl *InterPodAffinity) Score(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { + podLauncher, _ := podutil.GetPodLauncher(pod) + nodeInfo, err := pl.sharedLister.NodeInfos().Get(nodeName) + if err != nil { + return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err)) + } + nodeLabels := nodeInfo.GetNodeLabels(podLauncher) + + s, err := getPreScoreState(cycleState) + if err != nil { + return 0, framework.NewStatus(framework.Error, err.Error()) + } + var score int64 + for tpKey, tpValues := range s.topologyScore { + if v, exist := nodeLabels[tpKey]; exist { + score += tpValues[v] + } + } + + return score, nil +} + +// NormalizeScore normalizes the score for each filteredNode. +func (pl *InterPodAffinity) NormalizeScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status { + s, err := getPreScoreState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + if len(s.topologyScore) == 0 { + return nil + } + + var maxCount, minCount int64 + for i := range scores { + score := scores[i].Score + if score > maxCount { + maxCount = score + } + if score < minCount { + minCount = score + } + } + + maxMinDiff := maxCount - minCount + for i := range scores { + fScore := float64(0) + if maxMinDiff > 0 { + fScore = float64(framework.MaxNodeScore) * (float64(scores[i].Score-minCount) / float64(maxMinDiff)) + } + + scores[i].Score = int64(fScore) + } + + return nil +} + +// ScoreExtensions of the Score plugin. +func (pl *InterPodAffinity) ScoreExtensions() framework.ScoreExtensions { + return pl +} diff --git a/pkg/scheduler/framework/plugins/interpodaffinity/scoring_test.go b/pkg/scheduler/framework/plugins/interpodaffinity/scoring_test.go new file mode 100644 index 00000000..4fe0bd74 --- /dev/null +++ b/pkg/scheduler/framework/plugins/interpodaffinity/scoring_test.go @@ -0,0 +1,916 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package interpodaffinity + +import ( + "context" + "reflect" + "strings" + "testing" + "time" + + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" + commoncache "github.com/kubewharf/godel-scheduler/pkg/common/cache" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + godelcache "github.com/kubewharf/godel-scheduler/pkg/scheduler/cache" + st "github.com/kubewharf/godel-scheduler/pkg/scheduler/testing" + framework_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper/framework-helper" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +func TestPreferredAffinity(t *testing.T) { + labelRgChina := map[string]string{ + "region": "China", + } + labelRgIndia := map[string]string{ + "region": "India", + } + labelAzAz1 := map[string]string{ + "az": "az1", + } + labelAzAz2 := map[string]string{ + "az": "az2", + } + labelRgChinaAzAz1 := map[string]string{ + "region": "China", + "az": "az1", + } + podLabelSecurityS1 := map[string]string{ + "security": "S1", + } + podLabelSecurityS2 := map[string]string{ + "security": "S2", + } + // considered only preferredDuringSchedulingIgnoredDuringExecution in pod affinity + stayWithS1InRegion := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + } + stayWithS2InRegion := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 6, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + } + affinity3 := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 8, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"S1"}, + }, { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S2"}, + }, + }, + }, + TopologyKey: "region", + }, + }, { + Weight: 2, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + } + hardAffinity := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1", "value2"}, + }, + }, + }, + TopologyKey: "region", + }, { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, { + Key: "wrongkey", + Operator: metav1.LabelSelectorOpDoesNotExist, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + } + awayFromS1InAz := &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1"}, + }, + }, + }, + TopologyKey: "az", + }, + }, + }, + }, + } + // to stay away from security S2 in any az. + awayFromS2InAz := &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S2"}, + }, + }, + }, + TopologyKey: "az", + }, + }, + }, + }, + } + // to stay with security S1 in same region, stay away from security S2 in any az. + stayWithS1InRegionAwayFromS2InAz := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 8, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S2"}, + }, + }, + }, + TopologyKey: "az", + }, + }, + }, + }, + } + + invalidAffinityLabels := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 8, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"{{.bad-value.}}"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + } + invalidAntiAffinityLabels := &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"{{.bad-value.}}"}, + }, + }, + }, + TopologyKey: "az", + }, + }, + }, + }, + } + + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + expectedList framework.NodeScoreList + name string + wantStatus *framework.Status + }{ + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: 0}, {Name: "machine3", Score: 0}}, + name: "all machines are same priority as Affinity is nil", + }, + // the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score + // the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score + // the node(machine2) that have the label {"region": "China"} (match the topology key) but that have existing pods that mismatch the labelSelector get low score + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 0}, {Name: "machine3", Score: 0}}, + name: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" + + "which doesn't match either pods in nodes or in topology key", + }, + // the node1(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score + // the node2(machine2) that have the label {"region": "China"}, match the topology key and have the same label value with node1, get the same high score with node1 + // the node3(machine3) that have the label {"region": "India"}, match the topology key but have a different label value, don't have existing pods that match the labelSelector, + // get a low score. + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score", + }, + // there are 2 regions, say regionChina(machine1,machine3,machine4) and regionIndia(machine2,machine5), both regions have nodes that match the preference. + // But there are more nodes(actually more existing pods) in regionChina that match the preference than regionIndia. + // Then, nodes in regionChina get higher score than nodes in regionIndia, and all the nodes in regionChina should get a same score(high score), + // while all the nodes in regionIndia should get another same score(low score). + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p4", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Name: "p5", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 50}, {Name: "machine3", Score: framework.MaxNodeScore}, {Name: "machine4", Score: framework.MaxNodeScore}, {Name: "machine5", Score: 50}}, + name: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score", + }, + // Test with the different operators and values for pod affinity scheduling preference, including some match failures. + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: affinity3}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 20}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ", + }, + // Test the symmetry cases for affinity, the difference between affinity and symmetry is not the pod wants to run together with some existing pods, + // but the existing pods have the inter pod affinity preference while the pod to schedule satisfy the preference. + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "Affinity symmetry: considered only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "Affinity symmetry: considered RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry", + }, + + // The pod to schedule prefer to stay away from some existing pods at node level using the pod anti affinity. + // the nodes that have the label {"node": "bar"} (match the topology key) and that have existing pods that match the labelSelector get low score + // the nodes that don't have the label {"node": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get high score + // the nodes that have the label {"node": "bar"} (match the topology key) but that have existing pods that mismatch the labelSelector get high score + // there are 2 nodes, say node1 and node2, both nodes have pods that match the labelSelector and have topology-key in node.Labels. + // But there are more pods on node1 that match the preference than node2. Then, node1 get a lower score than node2. + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: framework.MaxNodeScore}}, + name: "Anti Affinity: pod that doesnot match existing pods in node will get high score ", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: framework.MaxNodeScore}}, + name: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: framework.MaxNodeScore}}, + name: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score", + }, + // Test the symmetry cases for anti affinity + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1", Affinity: awayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz2}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: framework.MaxNodeScore}}, + name: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score", + }, + // Test both affinity and anti-affinity + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 0}}, + name: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity", + }, + // Combined cases considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels (they are in the same RC/service), + // the pod prefer to run together with its brother pods in the same region, but wants to stay away from them at node level, + // so that all the pods of a RC/service can stay in a same region but trying to separate with each other + // machine-1,machine-3,machine-4 are in ChinaRegion others machin-2,machine-5 are in IndiaRegion + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p4", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p5", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Name: "p7", Labels: podLabelSecurityS1}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChinaAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 40}, {Name: "machine3", Score: framework.MaxNodeScore}, {Name: "machine4", Score: framework.MaxNodeScore}, {Name: "machine5", Score: 40}}, + name: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels", + }, + // Consider Affinity, Anti Affinity and symmetry together. + // for Affinity, the weights are: 8, 0, 0, 0 + // for Anti Affinity, the weights are: 0, -5, 0, 0 + // for Affinity symmetry, the weights are: 0, 0, 8, 0 + // for Anti Affinity symmetry, the weights are: 0, 0, 0, -5 + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2}}, + {Spec: v1.PodSpec{NodeName: "machine3", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Name: "p3"}}, + {Spec: v1.PodSpec{NodeName: "machine4", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Name: "p4"}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelAzAz2}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 0}, {Name: "machine3", Score: framework.MaxNodeScore}, {Name: "machine4", Score: 0}}, + name: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry", + }, + // Cover https://github.com/kubernetes/kubernetes/issues/82796 which panics upon: + // 1. Some nodes in a topology don't have pods with affinity, but other nodes in the same topology have. + // 2. The incoming pod doesn't have affinity. + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Name: "p2"}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}}, + name: "Avoid panic when partial nodes in a topology don't have pods with affinity", + }, + { + name: "invalid Affinity fails PreScore", + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: invalidAffinityLabels}}, + wantStatus: framework.NewStatus(framework.Error, "Invalid value"), + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}}, + }, + }, + { + name: "invalid AntiAffinity fails PreScore", + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: invalidAntiAffinityLabels}}, + wantStatus: framework.NewStatus(framework.Error, "Invalid value"), + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}}, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + state := framework.NewCycleState() + snapshot := framework_helper.MakeSnapShot(test.pods, test.nodes, nil) + + p := &InterPodAffinity{ + args: config.InterPodAffinityArgs{ + HardPodAffinityWeight: 1, + }, + sharedLister: snapshot, + } + + nodeInfos := make([]framework.NodeInfo, len(test.nodes)) + for index, node := range test.nodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + + status := p.PreScore(context.Background(), state, test.pod, nodeInfos) + if !status.IsSuccess() { + if !strings.Contains(status.Message(), test.wantStatus.Message()) { + t.Errorf("unexpected error: %v", status) + } + } else { + var gotList framework.NodeScoreList + for _, n := range test.nodes { + nodeName := n.ObjectMeta.Name + score, status := p.Score(context.Background(), state, test.pod, nodeName) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + + if !reflect.DeepEqual(test.expectedList, gotList) { + t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, gotList) + } + } + + }) + } +} + +func TestPreferredAffinityWithHardPodAffinitySymmetricWeight(t *testing.T) { + podLabelServiceS1 := map[string]string{ + "service": "S1", + } + labelRgChina := map[string]string{ + "region": "China", + } + labelRgIndia := map[string]string{ + "region": "India", + } + labelAzAz1 := map[string]string{ + "az": "az1", + } + hardPodAffinity := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + } + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + hardPodAffinityWeight int32 + expectedList framework.NodeScoreList + name string + }{ + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p1"}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p2"}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p1"}}, + {Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}, ObjectMeta: metav1.ObjectMeta{Name: "p2"}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + hardPodAffinityWeight: 0, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: 0}, {Name: "machine3", Score: 0}}, + name: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + state := framework.NewCycleState() + cache := godelcache.New(commoncache.MakeCacheHandlerWrapper(). + ComponentName("").SchedulerType("").SubCluster(framework.DefaultSubCluster). + PodAssumedTTL(time.Second).Period(10 * time.Second).StopCh(make(<-chan struct{})). + EnableStore("PreemptionStore"). + Obj()) + snapshot := godelcache.NewEmptySnapshot(commoncache.MakeCacheHandlerWrapper(). + SubCluster(framework.DefaultSubCluster).SwitchType(framework.DefaultSubClusterSwitchType). + EnableStore("PreemptionStore"). + Obj()) + for _, pod := range test.pods { + pod.UID = types.UID(pod.Name) + cache.AddPod(pod) + } + for _, node := range test.nodes { + cache.AddNode(node) + } + cache.UpdateSnapshot(snapshot) + + fh, _ := st.NewPodFrameworkHandle(nil, nil, nil, nil, cache, snapshot, nil, nil, nil, nil) + + args := &config.InterPodAffinityArgs{HardPodAffinityWeight: test.hardPodAffinityWeight} + p, err := New(args, fh) + if err != nil { + t.Fatal(err) + } + + nodeInfos := make([]framework.NodeInfo, len(test.nodes)) + for index, node := range test.nodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + + status := p.(framework.PreScorePlugin).PreScore(context.Background(), state, test.pod, nodeInfos) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + var gotList framework.NodeScoreList + for _, n := range test.nodes { + nodeName := n.ObjectMeta.Name + score, status := p.(framework.ScorePlugin).Score(context.Background(), state, test.pod, nodeName) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.(framework.ScorePlugin).ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + + if !reflect.DeepEqual(test.expectedList, gotList) { + t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, gotList) + } + }) + } +} + +func TestNMNodesScore(t *testing.T) { + labelRgChina := map[string]string{ + "region": "China", + } + labelRgIndia := map[string]string{ + "region": "India", + } + labelAzAz1 := map[string]string{ + "az": "az1", + } + podLabelSecurityS1 := map[string]string{ + "security": "S1", + } + podLabelSecurityS2 := map[string]string{ + "security": "S2", + } + // considered only preferredDuringSchedulingIgnoredDuringExecution in pod affinity + stayWithS1InRegion := &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: 5, + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + }, + } + tests := []struct { + pod *v1.Pod + pods []*v1.Pod + nodes []*v1.Node + nmNodes []*nodev1alpha1.NMNode + expectedList framework.NodeScoreList + name string + wantStatus *framework.Status + }{ + // the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score + // the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score + // the node(machine2) that have the label {"region": "China"} (match the topology key) but that have existing pods that mismatch the labelSelector get low score + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 0}, {Name: "machine3", Score: 0}}, + name: "All nodes are of NMNode type, that is, they are managed by the node manager. Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" + + "which doesn't match either pods in nodes or in topology key", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine0"}, ObjectMeta: metav1.ObjectMeta{Name: "p0", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine0", Labels: labelRgIndia}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine0", Score: 0}, {Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "The first node machine0 is v1.node and the others are of NMNode type. Although the node type of machine0 is v1.Node, its pods with the S1 label are also counted, so the number of pods with affinity for all nodes with region India is 1, the same as the nodes with region China.However, since the node type of machine0 is v1.Node, its score is 0.", + }, + { + pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}}, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "machine0"}, ObjectMeta: metav1.ObjectMeta{Name: "p0", Labels: podLabelSecurityS1}}, + {Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: podLabelSecurityS2, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + {Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Name: "p3", Labels: podLabelSecurityS1, Annotations: map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)}}}, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine0", Labels: labelRgIndia}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine0", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}}, + }, + expectedList: []framework.NodeScore{{Name: "machine0", Score: framework.MaxNodeScore}, {Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}, {Name: "machine3", Score: 0}}, + name: "Machine0 has both v1.Node and NMNode. Although the pod with the S1 label is on v1.Node, it is also counted. Therefore, the number of pods with affinity for all nodes with region India is 1, the same as the nodes with region China.", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + test.pod.Annotations = map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)} + + state := framework.NewCycleState() + snapshot := framework_helper.MakeSnapShot(test.pods, test.nodes, test.nmNodes) + + p := &InterPodAffinity{ + args: config.InterPodAffinityArgs{ + HardPodAffinityWeight: 1, + }, + sharedLister: snapshot, + } + nodeNames := getNodeNames(test.nodes, test.nmNodes) + nodeInfos := make([]framework.NodeInfo, len(nodeNames)) + for indexNode := 0; indexNode < len(nodeInfos); indexNode++ { + nodeInfos[indexNode] = mustGetNodeInfo(t, snapshot, nodeNames[indexNode]) + } + + status := p.PreScore(context.Background(), state, test.pod, nodeInfos) + if !status.IsSuccess() { + if !strings.Contains(status.Message(), test.wantStatus.Message()) { + t.Errorf("unexpected error: %v", status) + } + } else { + var gotList framework.NodeScoreList + for _, nodeInfo := range nodeInfos { + nodeName := nodeInfo.GetNodeName() + score, status := p.Score(context.Background(), state, test.pod, nodeName) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + + if !reflect.DeepEqual(test.expectedList, gotList) { + t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, gotList) + } + } + + }) + } +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/common.go b/pkg/scheduler/framework/plugins/podtopologyspread/common.go new file mode 100644 index 00000000..027c764c --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/common.go @@ -0,0 +1,61 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/plugins/helper" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" +) + +// defaultConstraints builds the constraints for a pod using +// .DefaultConstraints and the selectors from the services, replication +// controllers, replica sets and stateful sets that match the pod. +func (pl *PodTopologySpread) defaultConstraints(p *v1.Pod, action v1.UnsatisfiableConstraintAction) ([]utils.TopologySpreadConstraint, error) { + constraints, err := utils.FilterTopologySpreadConstraints(pl.args.DefaultConstraints, action) + if err != nil || len(constraints) == 0 { + return nil, err + } + selector := helper.DefaultSelector(p, pl.services, pl.replicationCtrls, pl.replicaSets, pl.statefulSets) + if selector.Empty() { + return nil, nil + } + for i := range constraints { + constraints[i].Selector = selector + } + return constraints, nil +} + +func getNodeNameByPodLauncher(nodeInfo framework.NodeInfo, podLanucher podutil.PodLauncher) string { + if nodeInfo == nil { + return "" + } + + switch podLanucher { + case podutil.Kubelet: + if nodeInfo.GetNode() != nil { + return nodeInfo.GetNode().Name + } + case podutil.NodeManager: + if nodeInfo.GetNMNode() != nil { + return nodeInfo.GetNMNode().Name + } + } + return "" +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go b/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go new file mode 100644 index 00000000..6225ed10 --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go @@ -0,0 +1,136 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "fmt" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + v1 "k8s.io/api/core/v1" +) + +const preFilterStateKey = "PreFilter" + Name + +// PreFilter invoked at the prefilter extension point. +func (pl *PodTopologySpread) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status { + s, err := pl.calPreFilterState(pod) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + cycleState.Write(preFilterStateKey, s) + return nil +} + +// PreFilterExtensions returns prefilter extensions, pod add and remove. +func (pl *PodTopologySpread) PreFilterExtensions() framework.PreFilterExtensions { + return pl +} + +// AddPod from pre-computed data in cycleState. +func (pl *PodTopologySpread) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + s, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + + s.UpdateWithPod(podToAdd, podToSchedule, nodeInfo, 1) + return nil +} + +// RemovePod from pre-computed data in cycleState. +func (pl *PodTopologySpread) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToRemove *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + s, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + + s.UpdateWithPod(podToRemove, podToSchedule, nodeInfo, -1) + return nil +} + +// getPreFilterState fetches a pre-computed preFilterState. +func getPreFilterState(cycleState *framework.CycleState) (*utils.PreFilterState, error) { + c, err := cycleState.Read(preFilterStateKey) + if err != nil { + // preFilterState doesn't exist, likely PreFilter wasn't invoked. + return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err) + } + + s, ok := c.(*utils.PreFilterState) + if !ok { + return nil, fmt.Errorf("%+v convert to podtopologyspread.preFilterState error", c) + } + return s, nil +} + +func (pl *PodTopologySpread) getConstraints(pod *v1.Pod) ([]utils.TopologySpreadConstraint, error) { + var constraints []utils.TopologySpreadConstraint + var err error + if len(pod.Spec.TopologySpreadConstraints) > 0 { + // We have feature gating in APIServer to strip the spec + // so don't need to re-check feature gate, just check length of Constraints. + constraints, err = utils.FilterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.DoNotSchedule) + if err != nil { + return nil, fmt.Errorf("obtaining pod's hard topology spread constraints: %v", err) + } + } else { + constraints, err = pl.defaultConstraints(pod, v1.DoNotSchedule) + if err != nil { + return nil, fmt.Errorf("setting default hard topology spread constraints: %v", err) + } + } + + return constraints, nil +} + +// calPreFilterState computes preFilterState describing how pods are spread on topologies. +func (pl *PodTopologySpread) calPreFilterState(pod *v1.Pod) (*utils.PreFilterState, error) { + allNodes := pl.sharedLister.NodeInfos().List() + constraints, err := pl.getConstraints(pod) + if err != nil { + return nil, err + } + if len(constraints) == 0 { + return &utils.PreFilterState{}, nil + } + + state := utils.GetPreFilterState(pod, allNodes, constraints) + return &state, nil +} + +// Filter invoked at the filter extension point. +func (pl *PodTopologySpread) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo framework.NodeInfo) *framework.Status { + podLauncher, status := podlauncher.NodeFits(cycleState, pod, nodeInfo) + if status != nil { + return status + } + + s, err := getPreFilterState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + + // However, "empty" preFilterState is legit which tolerates every toSchedule Pod. + if len(s.Constraints) == 0 { + return nil + } + + return utils.IsSatisfyPodTopologySpreadConstraints(s, pod, nodeInfo, podLauncher) +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go new file mode 100644 index 00000000..5e1e92b2 --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go @@ -0,0 +1,1794 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "reflect" + "testing" + + "github.com/google/go-cmp/cmp" + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + testing_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper" + framework_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper/framework-helper" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/utils/pointer" +) + +var cmpOpts = []cmp.Option{ + cmp.Comparer(func(s1 labels.Selector, s2 labels.Selector) bool { + return reflect.DeepEqual(s1, s2) + }), + cmp.Comparer(func(p1, p2 utils.CriticalPaths) bool { + p1.Sort() + p2.Sort() + return p1[0] == p2[0] && p1[1] == p2[1] + }), +} + +func TestPreFilterState(t *testing.T) { + fooSelector := testing_helper.MakeLabelSelector().Exists("foo").Obj() + barSelector := testing_helper.MakeLabelSelector().Exists("bar").Obj() + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + objs []runtime.Object + defaultConstraints []v1.TopologySpreadConstraint + want *utils.PreFilterState + }{ + { + name: "clean cluster with one spreadConstraint", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 5, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Label("foo", "bar").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 5, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Label("foo", "bar").Obj()), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 0}, {"zone2", 0}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(0), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(0), + }, + }, + }, + { + name: "normal case with one spreadConstraint", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, fooSelector, + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 2}, {"zone1", 3}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "normal case with one spreadConstraint, on a 3-zone cluster", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + testing_helper.MakeNode().Name("node-o").Label("zone", "zone3").Label("node", "node-o").Obj(), + testing_helper.MakeNode().Name("node-p").Label("zone", "zone3").Label("node", "node-p").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone3", 0}, {"zone2", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(2), + {Key: "zone", Value: "zone3"}: pointer.Int32Ptr(0), + }, + }, + }, + { + name: "namespace mismatch doesn't count", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, fooSelector, + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Namespace("ns2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 1}, {"zone1", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(2), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "normal case with two spreadConstraints", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector). + SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 3}, {"zone2", 4}}, + "node": {{"node-x", 0}, {"node-b", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(4), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(2), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-y"}: pointer.Int32Ptr(4), + }, + }, + }, + { + name: "soft spreadConstraints should be bypassed", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, fooSelector). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector). + SpreadConstraint(1, "node", v1.ScheduleAnyway, fooSelector). + SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 3}, {"zone2", 4}}, + "node": {{"node-b", 1}, {"node-a", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(4), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(2), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-y"}: pointer.Int32Ptr(4), + }, + }, + }, + { + name: "different labelSelectors - simple version", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b").Node("node-b").Label("bar", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, barSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 0}, {"zone1", 1}}, + "node": {{"node-a", 0}, {"node-y", 0}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-y"}: pointer.Int32Ptr(0), + }, + }, + }, + { + name: "different labelSelectors - complex pods", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector). + SpreadConstraint(1, "node", v1.DoNotSchedule, barSelector). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, barSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 3}, {"zone2", 4}}, + "node": {{"node-b", 0}, {"node-a", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(4), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-y"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "two spreadConstraints, and with podAffinity", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + NodeAffinityNotIn("node", []string{"node-x"}, testing_helper.NodeAffinityWithRequiredReq). // exclude node-x + SpreadConstraint(1, "zone", v1.DoNotSchedule, fooSelector). + SpreadConstraint(1, "node", v1.DoNotSchedule, fooSelector). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, fooSelector), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 3}, {"zone2", 4}}, + "node": {{"node-b", 1}, {"node-a", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(4), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(2), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-y"}: pointer.Int32Ptr(4), + }, + }, + }, + { + name: "default constraints and a service", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Label("baz", "kar").Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 3, TopologyKey: "node", WhenUnsatisfiable: v1.DoNotSchedule}, + {MaxSkew: 2, TopologyKey: "node", WhenUnsatisfiable: v1.ScheduleAnyway}, + {MaxSkew: 5, TopologyKey: "rack", WhenUnsatisfiable: v1.DoNotSchedule}, + }, + objs: []runtime.Object{ + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": "bar"}}}, + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 3, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Label("foo", "bar").Obj()), + }, + { + MaxSkew: 5, + TopologyKey: "rack", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Label("foo", "bar").Obj()), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "node": utils.NewCriticalPaths(), + "rack": utils.NewCriticalPaths(), + }, + TpPairToMatchNum: make(map[utils.TopologyPair]*int32), + }, + }, + { + name: "default constraints and a service that doesn't match", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 3, TopologyKey: "node", WhenUnsatisfiable: v1.DoNotSchedule}, + }, + objs: []runtime.Object{ + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "kep"}}}, + }, + want: &utils.PreFilterState{}, + }, + { + name: "default constraints and a service, but pod has constraints", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Label("baz", "tar"). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Label("baz", "tar").Obj()). + SpreadConstraint(2, "planet", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Label("fot", "rok").Obj()).Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 2, TopologyKey: "node", WhenUnsatisfiable: v1.DoNotSchedule}, + }, + objs: []runtime.Object{ + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": "bar"}}}, + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Label("baz", "tar").Obj()), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": utils.NewCriticalPaths(), + }, + TpPairToMatchNum: make(map[utils.TopologyPair]*int32), + }, + }, + { + name: "default soft constraints and a service", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 2, TopologyKey: "node", WhenUnsatisfiable: v1.ScheduleAnyway}, + }, + objs: []runtime.Object{ + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": "bar"}}}, + }, + want: &utils.PreFilterState{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(fake.NewSimpleClientset(tt.objs...), 0) + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + pl := PodTopologySpread{ + sharedLister: snapshot, + args: config.PodTopologySpreadArgs{ + DefaultConstraints: tt.defaultConstraints, + }, + } + pl.setListers(informerFactory) + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + cs := framework.NewCycleState() + if s := pl.PreFilter(ctx, cs, tt.pod); !s.IsSuccess() { + t.Fatal(s.AsError()) + } + got, err := getPreFilterState(cs) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tt.want, got, cmpOpts...); diff != "" { + t.Errorf("PodTopologySpread#PreFilter() returned diff (-want,+got):\n%s", diff) + } + }) + } +} + +func TestPreFilterStateAddPod(t *testing.T) { + nodeConstraint := utils.TopologySpreadConstraint{ + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + } + zoneConstraint := nodeConstraint + zoneConstraint.TopologyKey = "zone" + tests := []struct { + name string + preemptor *v1.Pod + addedPod *v1.Pod + existingPods []*v1.Pod + nodeIdx int // denotes which node 'addedPod' belongs to + nodes []*v1.Node + want *utils.PreFilterState + }{ + { + name: "node a and b both impact current min match", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: nil, // it's an empty cluster + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "node": {{"node-b", 0}, {"node-a", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(0), + }, + }, + }, + { + name: "only node a impacts current min match", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "node": {{"node-a", 1}, {"node-b", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "add a pod in a different namespace doesn't change topologyKeyToMinPodsMap", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "node": {{"node-a", 0}, {"node-b", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "add pod on non-critical node won't trigger re-calculation", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodeIdx: 1, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "node": {{"node-a", 0}, {"node-b", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "node a and x both impact topologyKeyToMinPodsMap on zone and node", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: nil, // it's an empty cluster + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint, nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 0}, {"zone1", 1}}, + "node": {{"node-x", 0}, {"node-a", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(0), + }, + }, + }, + { + name: "only node a impacts topologyKeyToMinPodsMap on zone and node", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint, nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 1}, {"zone2", 1}}, + "node": {{"node-a", 1}, {"node-x", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "node a impacts topologyKeyToMinPodsMap on node, node x impacts topologyKeyToMinPodsMap on zone", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint, nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 1}, {"zone1", 3}}, + "node": {{"node-a", 1}, {"node-x", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(2), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "Constraints hold different labelSelectors, node a impacts topologyKeyToMinPodsMap on zone", + preemptor: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-x2").Node("node-x").Label("bar", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + zoneConstraint, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("bar").Obj()), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 1}, {"zone1", 2}}, + "node": {{"node-a", 0}, {"node-b", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(2), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(0), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "Constraints hold different labelSelectors, node a impacts topologyKeyToMinPodsMap on both zone and node", + preemptor: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + addedPod: testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Label("bar", "").Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-x2").Node("node-x").Label("bar", "").Obj(), + }, + nodeIdx: 0, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{ + zoneConstraint, + { + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("bar").Obj()), + }, + }, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 1}, {"zone2", 1}}, + "node": {{"node-a", 1}, {"node-b", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(2), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + pl := PodTopologySpread{ + sharedLister: snapshot, + } + cs := framework.NewCycleState() + ctx := context.Background() + if s := pl.PreFilter(ctx, cs, tt.preemptor); !s.IsSuccess() { + t.Fatal(s.AsError()) + } + nodeInfo, err := snapshot.Get(tt.nodes[tt.nodeIdx].Name) + if err != nil { + t.Fatal(err) + } + if s := pl.AddPod(ctx, cs, tt.preemptor, tt.addedPod, nodeInfo); !s.IsSuccess() { + t.Fatal(s.AsError()) + } + state, err := getPreFilterState(cs) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(state, tt.want, cmpOpts...); diff != "" { + t.Errorf("PodTopologySpread.AddPod() returned diff (-want,+got):\n%s", diff) + } + }) + } +} + +func TestPreFilterStateRemovePod(t *testing.T) { + nodeConstraint := utils.TopologySpreadConstraint{ + MaxSkew: 1, + TopologyKey: "node", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + } + zoneConstraint := nodeConstraint + zoneConstraint.TopologyKey = "zone" + tests := []struct { + name string + preemptor *v1.Pod // preemptor pod + nodes []*v1.Node + existingPods []*v1.Pod + deletedPodIdx int // need to reuse *Pod of existingPods[i] + deletedPod *v1.Pod // this field is used only when deletedPodIdx is -1 + nodeIdx int // denotes which node "deletedPod" belongs to + want *utils.PreFilterState + }{ + { + // A high priority pod may not be scheduled due to node taints or resource shortage. + // So preemption is triggered. + name: "one spreadConstraint on zone, topologyKeyToMinPodsMap unchanged", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + }, + deletedPodIdx: 0, // remove pod "p-a1" + nodeIdx: 0, // node-a + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 1}, {"zone2", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + }, + }, + }, + { + name: "one spreadConstraint on node, topologyKeyToMinPodsMap changed", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + deletedPodIdx: 0, // remove pod "p-a1" + nodeIdx: 0, // node-a + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 1}, {"zone2", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(1), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "delete an irrelevant pod won't help", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a0").Node("node-a").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + deletedPodIdx: 0, // remove pod "p-a0" + nodeIdx: 0, // node-a + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 2}, {"zone2", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(2), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "delete a non-existing pod won't help", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + deletedPodIdx: -1, + deletedPod: testing_helper.MakePod().Name("p-a0").Node("node-a").Label("bar", "").Obj(), + nodeIdx: 0, // node-a + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone1", 2}, {"zone2", 2}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(2), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(2), + }, + }, + }, + { + name: "two spreadConstraints", + preemptor: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x2").Node("node-x").Label("foo", "").Obj(), + }, + deletedPodIdx: 3, // remove pod "p-x1" + nodeIdx: 2, // node-x + want: &utils.PreFilterState{ + Constraints: []utils.TopologySpreadConstraint{zoneConstraint, nodeConstraint}, + TpKeyToCriticalPaths: map[string]*utils.CriticalPaths{ + "zone": {{"zone2", 1}, {"zone1", 3}}, + "node": {{"node-b", 1}, {"node-x", 1}}, + }, + TpPairToMatchNum: map[utils.TopologyPair]*int32{ + {Key: "zone", Value: "zone1"}: pointer.Int32Ptr(3), + {Key: "zone", Value: "zone2"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-a"}: pointer.Int32Ptr(2), + {Key: "node", Value: "node-b"}: pointer.Int32Ptr(1), + {Key: "node", Value: "node-x"}: pointer.Int32Ptr(1), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + pl := PodTopologySpread{ + sharedLister: snapshot, + } + cs := framework.NewCycleState() + ctx := context.Background() + s := pl.PreFilter(ctx, cs, tt.preemptor) + if !s.IsSuccess() { + t.Fatal(s.AsError()) + } + + deletedPod := tt.deletedPod + if tt.deletedPodIdx < len(tt.existingPods) && tt.deletedPodIdx >= 0 { + deletedPod = tt.existingPods[tt.deletedPodIdx] + } + + nodeInfo, err := snapshot.Get(tt.nodes[tt.nodeIdx].Name) + if err != nil { + t.Fatal(err) + } + if s := pl.RemovePod(ctx, cs, tt.preemptor, deletedPod, nodeInfo); !s.IsSuccess() { + t.Fatal(s.AsError()) + } + + state, err := getPreFilterState(cs) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(state, tt.want, cmpOpts...); diff != "" { + t.Errorf("PodTopologySpread.RemovePod() returned diff (-want,+got):\n%s", diff) + } + }) + } +} + +func BenchmarkFilter(b *testing.B) { + tests := []struct { + name string + pod *v1.Pod + existingPodsNum int + allNodesNum int + filteredNodesNum int + }{ + { + name: "1000nodes/single-constraint-zone", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelZoneFailureDomain, v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/single-constraint-node", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/two-Constraints-zone-node", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, v1.LabelZoneFailureDomain, v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + } + for _, tt := range tests { + var state *framework.CycleState + b.Run(tt.name, func(b *testing.B) { + existingPods, allNodes, _ := testing_helper.MakeNodesAndPodsForEvenPodsSpread(tt.pod.Labels, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum) + snapshot := framework_helper.MakeSnapShot(existingPods, allNodes, nil) + + pl := PodTopologySpread{ + sharedLister: snapshot, + } + ctx := context.Background() + b.ResetTimer() + for i := 0; i < b.N; i++ { + state = framework.NewCycleState() + s := pl.PreFilter(ctx, state, tt.pod) + if !s.IsSuccess() { + b.Fatal(s.AsError()) + } + filterNode := func(i int) { + n, _ := pl.sharedLister.NodeInfos().Get(allNodes[i].Name) + pl.Filter(ctx, state, tt.pod, n) + } + parallelize.Until(ctx, len(allNodes), filterNode) + } + }) + b.Run(tt.name+"/Clone", func(b *testing.B) { + for i := 0; i < b.N; i++ { + state.Clone() + } + }) + } +} + +func mustConvertLabelSelectorAsSelector(t *testing.T, ls *metav1.LabelSelector) labels.Selector { + t.Helper() + s, err := metav1.LabelSelectorAsSelector(ls) + if err != nil { + t.Fatal(err) + } + return s +} + +func TestSingleConstraint(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + wantStatusCode map[string]framework.Code + }{ + { + name: "no existing pods", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + name: "no existing pods, incoming pod doesn't match itself", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + name: "existing pods in a different namespace do not count", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Namespace("ns2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pods spread across zones as 3/3, all nodes fit", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Success, + }, + }, + { + // TODO(Huang-Wei): maybe document this to remind users that typos on node labels + // can cause unexpected behavior + name: "pods spread across zones as 1/2 due to absence of label 'zone' on node-b", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zon", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.UnschedulableAndUnresolvable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pod cannot be scheduled as all nodes don't have label 'rack'", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "rack", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.UnschedulableAndUnresolvable, + "node-x": framework.UnschedulableAndUnresolvable, + }, + }, + { + name: "pods spread across nodes as 2/1/0/3, only node-x fits", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + name: "pods spread across nodes as 2/1/0/3, maxSkew is 2, node-b and node-x fit", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 2, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // not a desired case, but it can happen + // TODO(Huang-Wei): document this "pod-not-match-itself" case + // in this case, placement of the new pod doesn't change pod distribution of the cluster + // as the incoming pod doesn't have label "foo" + name: "pods spread across nodes as 2/1/0/3, but pod doesn't match itself", + pod: testing_helper.MakePod().Name("p").Label("bar", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // only node-a and node-y are considered, so pods spread as 2/~1~/~0~/3 + // ps: '~num~' is a markdown symbol to denote a crossline through 'num' + // but in this unit test, we don't run NodeAffinity Predicate, so node-b and node-x are + // still expected to be fits; + // the fact that node-a fits can prove the underlying logic works + name: "incoming pod has nodeAffinity, pods spread as 2/~1~/~0~/3, hence node-a fits", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + NodeAffinityIn("node", []string{"node-a", "node-y"}, testing_helper.NodeAffinityWithRequiredReq). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, // in real case, it's false + "node-x": framework.Success, // in real case, it's false + "node-y": framework.Unschedulable, + }, + }, + { + name: "terminating Pods should be excluded", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a").Node("node-a").Label("foo", "").Terminating().Obj(), + testing_helper.MakePod().Name("p-b").Node("node-b").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Unschedulable, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + p := &PodTopologySpread{sharedLister: snapshot} + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, tt.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("preFilter failed with status: %v", preFilterStatus) + } + + for _, node := range tt.nodes { + nodeInfo, _ := snapshot.NodeInfos().Get(node.Name) + status := p.Filter(context.Background(), state, tt.pod, nodeInfo) + if len(tt.wantStatusCode) != 0 && status.Code() != tt.wantStatusCode[node.Name] { + t.Errorf("[%s]: expected status code %v got %v", node.Name, tt.wantStatusCode[node.Name], status.Code()) + } + } + }) + } +} + +func TestMultipleConstraints(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + wantStatusCode map[string]framework.Code + }{ + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on any zone (hence any node) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-x + // intersection of (1) and (2) returns node-x + name: "two Constraints on zone and node, spreads = [3/3, 2/1/0/3]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone1 (node-a or node-b) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-x + // intersection of (1) and (2) returns no node + name: "two Constraints on zone and node, spreads = [3/4, 2/1/0/4]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone2 (node-x or node-y) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a, node-b or node-x + // intersection of (1) and (2) returns node-x + name: "Constraints hold different labelSelectors, spreads = [1/0, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone2 (node-x or node-y) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a or node-b + // intersection of (1) and (2) returns no node + name: "Constraints hold different labelSelectors, spreads = [1/0, 0/0/1/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on zone1 (node-a or node-b) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-b or node-x + // intersection of (1) and (2) returns node-b + name: "Constraints hold different labelSelectors, spreads = [2/3, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. pod doesn't match itself on "zone" constraint, so it can be put onto any zone + // 2. to fulfil "node" constraint, incoming pod can be placed on node-a or node-b + // intersection of (1) and (2) returns node-a and node-b + name: "Constraints hold different labelSelectors but pod doesn't match itself on 'zone' constraint", + pod: testing_helper.MakePod().Name("p").Label("bar", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("bar", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Success, + "node-b": framework.Success, + "node-x": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + // 1. to fulfil "zone" constraint, incoming pod can be placed on any zone (hence any node) + // 2. to fulfil "node" constraint, incoming pod can be placed on node-b (node-x doesn't have the required label) + // intersection of (1) and (2) returns node-b + name: "two Constraints on zone and node, absence of label 'node' on node-x, spreads = [1/1, 1/0/0/1]", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Success, + "node-x": framework.UnschedulableAndUnresolvable, + "node-y": framework.Unschedulable, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, nil) + + p := &PodTopologySpread{sharedLister: snapshot} + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, tt.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("preFilter failed with status: %v", preFilterStatus) + } + + for _, node := range tt.nodes { + nodeInfo, _ := snapshot.NodeInfos().Get(node.Name) + status := p.Filter(context.Background(), state, tt.pod, nodeInfo) + if len(tt.wantStatusCode) != 0 && status.Code() != tt.wantStatusCode[node.Name] { + t.Errorf("[%s]: expected error code %v got %v", node.Name, tt.wantStatusCode[node.Name], status.Code()) + } + } + }) + } +} + +func TestPreFilterDisabled(t *testing.T) { + pod := &v1.Pod{} + nodeInfo := framework.NewNodeInfo() + node := v1.Node{} + nodeInfo.SetNode(&node) + p := &PodTopologySpread{} + cycleState := framework.NewCycleState() + gotStatus := p.Filter(context.Background(), cycleState, pod, nodeInfo) + wantStatus := framework.NewStatus(framework.Error, `error reading "PreFilterPodTopologySpread" from cycleState: not found`) + if !reflect.DeepEqual(gotStatus, wantStatus) { + t.Errorf("status does not match: %v, want: %v", gotStatus, wantStatus) + } +} + +func TestNMNodesFilter(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + nmNodes []*nodev1alpha1.NMNode + existingPods []*v1.Pod + wantStatusCode map[string]framework.Code + }{ + { + name: "All nodes are of NMNode type, that is, they are managed by the node manager. Pods spread across nodes as 2/1/0/3, only node-x fits", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{"node": "node-a"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{"node": "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-x", Labels: map[string]string{"node": "node-x"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-y", Labels: map[string]string{"node": "node-y"}}}, + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-x": framework.Success, + "node-y": framework.Unschedulable, + }, + }, + { + name: "The first node-x is v1.node and the others are of NMNode type. Although node-x matches the smallest number of pods, it cannot be scheduled to it because it is a v1.Node. It can only be scheduled to node-b. Pods spread across nodes as 0/2/1/3.", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-x", Labels: map[string]string{"node": "node-x"}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{"node": "node-a"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{"node": "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-y", Labels: map[string]string{"node": "node-y"}}}, + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-x": framework.UnschedulableAndUnresolvable, + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + { + name: "The first node-x has v1.node and NMNode, the others are of NMNode type. Node-x matches the smallest number of pods and has NMNode, it cannot be scheduled. Pods spread across nodes as 0/2/1/3.", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-x", Labels: map[string]string{"node": "node-x"}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{"node": "node-a"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{"node": "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-x", Labels: map[string]string{"node": "node-x"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-y", Labels: map[string]string{"node": "node-y"}}}, + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + wantStatusCode: map[string]framework.Code{ + "node-x": framework.Success, + "node-a": framework.Unschedulable, + "node-b": framework.Unschedulable, + "node-y": framework.Unschedulable, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.pod.Annotations = map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)} + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, tt.nmNodes) + + p := &PodTopologySpread{sharedLister: snapshot} + state := framework.NewCycleState() + preFilterStatus := p.PreFilter(context.Background(), state, tt.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("preFilter failed with status: %v", preFilterStatus) + } + + nodeInfos := snapshot.NodeInfos().List() + for _, nodeInfo := range nodeInfos { + nodeName := nodeInfo.GetNodeName() + status := p.Filter(context.Background(), state, tt.pod, nodeInfo) + if len(tt.wantStatusCode) != 0 && status.Code() != tt.wantStatusCode[nodeName] { + t.Errorf("[%s]: expected status code %v got %v", nodeName, tt.wantStatusCode[nodeName], status.Code()) + } + } + }) + } +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go new file mode 100644 index 00000000..5f45e72b --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go @@ -0,0 +1,94 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "fmt" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/validation" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/handle" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/informers" + appslisters "k8s.io/client-go/listers/apps/v1" + corelisters "k8s.io/client-go/listers/core/v1" +) + +// PodTopologySpread is a plugin that ensures pod's topologySpreadConstraints is satisfied. +type PodTopologySpread struct { + args config.PodTopologySpreadArgs + sharedLister framework.SharedLister + services corelisters.ServiceLister + replicationCtrls corelisters.ReplicationControllerLister + replicaSets appslisters.ReplicaSetLister + statefulSets appslisters.StatefulSetLister +} + +var _ framework.PreFilterPlugin = &PodTopologySpread{} +var _ framework.FilterPlugin = &PodTopologySpread{} +var _ framework.PreScorePlugin = &PodTopologySpread{} +var _ framework.ScorePlugin = &PodTopologySpread{} + +const ( + // Name is the name of the plugin used in the plugin registry and configurations. + Name = "PodTopologySpread" +) + +// Name returns name of the plugin. It is used in logs, etc. +func (pl *PodTopologySpread) Name() string { + return Name +} + +// BuildArgs returns the arguments used to build the plugin. +func (pl *PodTopologySpread) BuildArgs() interface{} { + return pl.args +} + +// TODO: this is implemented by k8s 1.19, consider updating the code +// New initializes a new plugin and returns it. +func New(plArgs runtime.Object, h handle.PodFrameworkHandle) (framework.Plugin, error) { + if h.SnapshotSharedLister() == nil { + return nil, fmt.Errorf("SnapshotSharedlister is nil") + } + args, err := utils.GetArgs(plArgs) + if err != nil { + return nil, err + } + if err := validation.ValidatePodTopologySpreadArgs(&args); err != nil { + return nil, err + } + pl := &PodTopologySpread{ + sharedLister: h.SnapshotSharedLister(), + args: args, + } + if len(pl.args.DefaultConstraints) != 0 { + if h.SharedInformerFactory() == nil { + return nil, fmt.Errorf("SharedInformerFactory is nil") + } + pl.setListers(h.SharedInformerFactory()) + } + return pl, nil +} + +func (pl *PodTopologySpread) setListers(factory informers.SharedInformerFactory) { + pl.services = factory.Core().V1().Services().Lister() + pl.replicationCtrls = factory.Core().V1().ReplicationControllers().Lister() + pl.replicaSets = factory.Apps().V1().ReplicaSets().Lister() + pl.statefulSets = factory.Apps().V1().StatefulSets().Lister() +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go new file mode 100644 index 00000000..9b6444ee --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go @@ -0,0 +1,321 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "fmt" + "math" + "sync/atomic" + + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + "github.com/kubewharf/godel-scheduler/pkg/plugins/helper" + "github.com/kubewharf/godel-scheduler/pkg/plugins/podlauncher" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" +) + +const preScoreStateKey = "PreScore" + Name + +// preScoreState computed at PreScore and used at Score. +// Fields are exported for comparison during testing. +type preScoreState struct { + Constraints []utils.TopologySpreadConstraint + // IgnoredNodes records a set of node names which match the scheduling pod's launcher type but miss some Constraints[*].topologyKey. + // It also records the node names that don't match the scheduling pod's launcher type + IgnoredNodes sets.String + // TopologyPairToPodCounts is keyed with topologyPair, and valued with the number of matching pods. + TopologyPairToPodCounts map[utils.TopologyPair]*int64 + // TopologyNormalizingWeight is the weight we give to the counts per topology. + // This allows the pod counts of smaller topologies to not be watered down by + // bigger ones. + TopologyNormalizingWeight []float64 +} + +// Clone implements the mandatory Clone interface. We don't really copy the data since +// there is no need for that. +func (s *preScoreState) Clone() framework.StateData { + return s +} + +// initPreScoreState iterates "filteredNodes" to filter out the nodes which +// don't have required topologyKey(s), and initialize: +// 1) s.TopologyPairToPodCounts: keyed with both eligible topology pair and node names. +// 2) s.IgnoredNodes: the set of nodes that shouldn't be scored. +// 3) s.TopologyNormalizingWeight: The weight to be given to each constraint based on the number of values in a topology. +func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []framework.NodeInfo) error { + var err error + if len(pod.Spec.TopologySpreadConstraints) > 0 { + s.Constraints, err = utils.FilterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.ScheduleAnyway) + if err != nil { + return fmt.Errorf("obtaining pod's soft topology spread constraints: %v", err) + } + } else { + s.Constraints, err = pl.defaultConstraints(pod, v1.ScheduleAnyway) + if err != nil { + return fmt.Errorf("setting default soft topology spread constraints: %v", err) + } + } + if len(s.Constraints) == 0 { + return nil + } + + scheduledPodLauncher, err := podutil.GetPodLauncher(pod) + if err != nil { + return err + } + + topoSize := make([]int, len(s.Constraints)) + for _, nodeInfo := range filteredNodes { + for _, podLanucher := range podutil.PodLanucherTypes { + if utils.IsNodeNil(nodeInfo, podLanucher) { + continue + } + + nodeLabels := nodeInfo.GetNodeLabels(podLanucher) + if !utils.NodeLabelsMatchSpreadConstraints(nodeLabels, s.Constraints) { + // Nodes which don't have all required topologyKeys present are ignored + // when scoring later. + if podLanucher == scheduledPodLauncher { + s.IgnoredNodes.Insert(getNodeNameByPodLauncher(nodeInfo, podLanucher)) + } + continue + } + for i, constraint := range s.Constraints { + // per-node counts are calculated during Score. + if constraint.TopologyKey == v1.LabelHostname { + continue + } + pair := utils.TopologyPair{Key: constraint.TopologyKey, Value: nodeLabels[constraint.TopologyKey]} + if s.TopologyPairToPodCounts[pair] == nil { + s.TopologyPairToPodCounts[pair] = new(int64) + topoSize[i]++ + } + } + } + if utils.IsNodeNil(nodeInfo, scheduledPodLauncher) { + s.IgnoredNodes.Insert(nodeInfo.GetNodeName()) + } + } + + s.TopologyNormalizingWeight = make([]float64, len(s.Constraints)) + for i, c := range s.Constraints { + sz := topoSize[i] + if c.TopologyKey == v1.LabelHostname { + sz = len(filteredNodes) - len(s.IgnoredNodes) + } + s.TopologyNormalizingWeight[i] = topologyNormalizingWeight(sz) + } + return nil +} + +// PreScore builds and writes cycle state used by Score and NormalizeScore. +func (pl *PodTopologySpread) PreScore( + ctx context.Context, + cycleState *framework.CycleState, + pod *v1.Pod, + filteredNodes []framework.NodeInfo, +) *framework.Status { + allNodes := pl.sharedLister.NodeInfos().List() + + if len(filteredNodes) == 0 || len(allNodes) == 0 { + // No nodes to score. + return nil + } + + state := &preScoreState{ + IgnoredNodes: sets.NewString(), + TopologyPairToPodCounts: make(map[utils.TopologyPair]*int64), + } + err := pl.initPreScoreState(state, pod, filteredNodes) + if err != nil { + return framework.NewStatus(framework.Error, fmt.Sprintf("error when calculating preScoreState: %v", err)) + } + + // return if incoming pod doesn't have soft topology spread Constraints. + if len(state.Constraints) == 0 { + cycleState.Write(preScoreStateKey, state) + return nil + } + + processAllNode := func(i int) { + nodeInfo := allNodes[i] + for _, podLanucher := range podutil.PodLanucherTypes { + if utils.IsNodeNil(nodeInfo, podLanucher) { + continue + } + + nodeLabels := nodeInfo.GetNodeLabels(podLanucher) + // (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity + // (2) All topologyKeys need to be present in `node` + if !helper.PodMatchesNodeSelectorAndAffinityTerms(pod, nodeInfo, podLanucher) || + !utils.NodeLabelsMatchSpreadConstraints(nodeLabels, state.Constraints) { + return + } + + for _, c := range state.Constraints { + pair := utils.TopologyPair{Key: c.TopologyKey, Value: nodeLabels[c.TopologyKey]} + // If current topology pair is not associated with any candidate node, + // continue to avoid unnecessary calculation. + // Per-node counts are also skipped, as they are done during Score. + tpCount := state.TopologyPairToPodCounts[pair] + if tpCount == nil { + continue + } + count := utils.CountPodsMatchSelector(nodeInfo.GetPods(), c.Selector, pod.Namespace, podLanucher) + atomic.AddInt64(tpCount, int64(count)) + } + } + } + parallelize.Until(ctx, len(allNodes), processAllNode) + + cycleState.Write(preScoreStateKey, state) + return nil +} + +// Score invoked at the Score extension point. +// The "score" returned in this function is the matching number of pods on the `nodeName`, +// it is normalized later. +func (pl *PodTopologySpread) Score(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { + nodeInfo, err := pl.sharedLister.NodeInfos().Get(nodeName) + if err != nil { + return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v,", nodeName, err)) + } + podLauncher, status := podlauncher.NodeFits(cycleState, pod, nodeInfo) + if status != nil { + return 0, nil + } + + nodeLabels := nodeInfo.GetNodeLabels(podLauncher) + + s, err := getPreScoreState(cycleState) + if err != nil { + return 0, framework.NewStatus(framework.Error, err.Error()) + } + + // Return if the node is not qualified. + if s.IgnoredNodes.Has(getNodeNameByPodLauncher(nodeInfo, podLauncher)) { + return 0, nil + } + + // For each present , current node gets a credit of . + // And we sum up and return it as this node's score. + var score float64 + for i, c := range s.Constraints { + if tpVal, ok := nodeLabels[c.TopologyKey]; ok { + var cnt int64 + if c.TopologyKey == v1.LabelHostname { + cnt = int64(utils.CountPodsMatchSelector(nodeInfo.GetPods(), c.Selector, pod.Namespace, podLauncher)) + } else { + pair := utils.TopologyPair{Key: c.TopologyKey, Value: tpVal} + cnt = *s.TopologyPairToPodCounts[pair] + } + score += scoreForCount(cnt, c.MaxSkew, s.TopologyNormalizingWeight[i]) + } + } + return int64(score), nil +} + +// NormalizeScore invoked after scoring all nodes. +func (pl *PodTopologySpread) NormalizeScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status { + s, err := getPreScoreState(cycleState) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + if s == nil { + return nil + } + + // Calculate and + var minScore int64 = math.MaxInt64 + var maxScore int64 + for _, score := range scores { + // it's mandatory to check if is present in m.IgnoredNodes + if s.IgnoredNodes.Has(score.Name) { + continue + } + if score.Score < minScore { + minScore = score.Score + } + if score.Score > maxScore { + maxScore = score.Score + } + } + + for i := range scores { + nodeInfo, err := pl.sharedLister.NodeInfos().Get(scores[i].Name) + if err != nil { + return framework.NewStatus(framework.Error, err.Error()) + } + + if s.IgnoredNodes.Has(nodeInfo.GetNodeName()) { + scores[i].Score = 0 + continue + } + + if maxScore == 0 { + scores[i].Score = framework.MaxNodeScore + continue + } + + s := scores[i].Score + scores[i].Score = framework.MaxNodeScore * (maxScore + minScore - s) / maxScore + } + return nil +} + +// ScoreExtensions of the Score plugin. +func (pl *PodTopologySpread) ScoreExtensions() framework.ScoreExtensions { + return pl +} + +func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) { + c, err := cycleState.Read(preScoreStateKey) + if err != nil { + return nil, fmt.Errorf("error reading %q from cycleState: %v", preScoreStateKey, err) + } + + s, ok := c.(*preScoreState) + if !ok { + return nil, fmt.Errorf("%+v convert to podtopologyspread.preScoreState error", c) + } + return s, nil +} + +// topologyNormalizingWeight calculates the weight for the topology, based on +// the number of values that exist for a topology. +// Since is at least 1 (all nodes that passed the Filters are in the +// same topology), and k8s supports 5k nodes, the result is in the interval +// <1.09, 8.52>. +// +// Note: could also be zero when no nodes have the required topologies, +// however we don't care about topology weight in this case as we return a 0 +// score for all nodes. +func topologyNormalizingWeight(size int) float64 { + return math.Log(float64(size + 2)) +} + +// scoreForCount calculates the score based on number of matching pods in a +// topology domain, the constraint's maxSkew and the topology weight. +// `maxSkew-1` is added to the score so that differences between topology +// domains get watered down, controlling the tolerance of the score to skews. +func scoreForCount(cnt int64, maxSkew int32, tpWeight float64) float64 { + return float64(cnt)*tpWeight + float64(maxSkew-1) +} diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go new file mode 100644 index 00000000..be43f5b9 --- /dev/null +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go @@ -0,0 +1,988 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtopologyspread + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" + framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + utils "github.com/kubewharf/godel-scheduler/pkg/plugins/podtopologyspread" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/apis/config" + testing_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper" + framework_helper "github.com/kubewharf/godel-scheduler/pkg/testing-helper/framework-helper" + "github.com/kubewharf/godel-scheduler/pkg/util/parallelize" + podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/utils/pointer" +) + +func TestPreScoreStateEmptyNodes(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + objs []runtime.Object + defaultConstraints []v1.TopologySpreadConstraint + want *preScoreState + }{ + { + name: "normal case", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + }, + want: &preScoreState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + }, + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + }, + }, + IgnoredNodes: sets.NewString(), + TopologyPairToPodCounts: map[utils.TopologyPair]*int64{ + {Key: "zone", Value: "zone1"}: pointer.Int64Ptr(0), + {Key: "zone", Value: "zone2"}: pointer.Int64Ptr(0), + }, + TopologyNormalizingWeight: []float64{topologyNormalizingWeight(2), topologyNormalizingWeight(3)}, + }, + }, + { + name: "node-x doesn't have label zone", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label(v1.LabelHostname, "node-x").Obj(), + }, + want: &preScoreState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + }, + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("bar").Obj()), + }, + }, + IgnoredNodes: sets.NewString("node-x"), + TopologyPairToPodCounts: map[utils.TopologyPair]*int64{ + {Key: "zone", Value: "zone1"}: pointer.Int64Ptr(0), + }, + TopologyNormalizingWeight: []float64{topologyNormalizingWeight(1), topologyNormalizingWeight(2)}, + }, + }, + { + name: "defaults constraints and a replica set", + pod: testing_helper.MakePod().Name("p").Label("foo", "tar").Label("baz", "sup").Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 1, TopologyKey: v1.LabelHostname, WhenUnsatisfiable: v1.ScheduleAnyway}, + {MaxSkew: 2, TopologyKey: "rack", WhenUnsatisfiable: v1.DoNotSchedule}, + {MaxSkew: 2, TopologyKey: "planet", WhenUnsatisfiable: v1.ScheduleAnyway}, + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("rack", "rack1").Label(v1.LabelHostname, "node-a").Label("planet", "mars").Obj(), + }, + objs: []runtime.Object{ + &appsv1.ReplicaSet{Spec: appsv1.ReplicaSetSpec{Selector: testing_helper.MakeLabelSelector().Exists("foo").Obj()}}, + }, + want: &preScoreState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + }, + { + MaxSkew: 2, + TopologyKey: "planet", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Exists("foo").Obj()), + }, + }, + IgnoredNodes: sets.NewString(), + TopologyPairToPodCounts: map[utils.TopologyPair]*int64{ + {Key: "planet", Value: "mars"}: pointer.Int64Ptr(0), + }, + TopologyNormalizingWeight: []float64{topologyNormalizingWeight(1), topologyNormalizingWeight(1)}, + }, + }, + { + name: "defaults constraints and a replica set that doesn't match", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Label("baz", "sup").Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 2, TopologyKey: "planet", WhenUnsatisfiable: v1.ScheduleAnyway}, + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("planet", "mars").Obj(), + }, + objs: []runtime.Object{ + &appsv1.ReplicaSet{Spec: appsv1.ReplicaSetSpec{Selector: testing_helper.MakeLabelSelector().Exists("tar").Obj()}}, + }, + want: &preScoreState{ + TopologyPairToPodCounts: make(map[utils.TopologyPair]*int64), + }, + }, + { + name: "defaults constraints and a replica set, but pod has constraints", + pod: testing_helper.MakePod().Name("p").Label("foo", "bar").Label("baz", "sup"). + SpreadConstraint(1, "zone", v1.DoNotSchedule, testing_helper.MakeLabelSelector().Label("foo", "bar").Obj()). + SpreadConstraint(2, "planet", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Label("baz", "sup").Obj()).Obj(), + defaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 2, TopologyKey: "galaxy", WhenUnsatisfiable: v1.ScheduleAnyway}, + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("planet", "mars").Label("galaxy", "andromeda").Obj(), + }, + objs: []runtime.Object{ + &appsv1.ReplicaSet{Spec: appsv1.ReplicaSetSpec{Selector: testing_helper.MakeLabelSelector().Exists("foo").Obj()}}, + }, + want: &preScoreState{ + Constraints: []utils.TopologySpreadConstraint{ + { + MaxSkew: 2, + TopologyKey: "planet", + Selector: mustConvertLabelSelectorAsSelector(t, testing_helper.MakeLabelSelector().Label("baz", "sup").Obj()), + }, + }, + IgnoredNodes: sets.NewString(), + TopologyPairToPodCounts: map[utils.TopologyPair]*int64{ + {"planet", "mars"}: pointer.Int64Ptr(0), + }, + TopologyNormalizingWeight: []float64{topologyNormalizingWeight(1)}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(fake.NewSimpleClientset(tt.objs...), 0) + snapshot := framework_helper.MakeSnapShot(nil, tt.nodes, nil) + + pl := PodTopologySpread{ + sharedLister: snapshot, + args: config.PodTopologySpreadArgs{ + DefaultConstraints: tt.defaultConstraints, + }, + } + pl.setListers(informerFactory) + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + cs := framework.NewCycleState() + nodeInfos := make([]framework.NodeInfo, len(tt.nodes)) + for index, node := range tt.nodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + + if s := pl.PreScore(context.Background(), cs, tt.pod, nodeInfos); !s.IsSuccess() { + t.Fatal(s.AsError()) + } + + got, err := getPreScoreState(cs) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(tt.want, got, cmpOpts...); diff != "" { + t.Errorf("PodTopologySpread#PreScore() returned (-want, +got):\n%s", diff) + } + }) + } +} + +func TestPodTopologySpreadScore(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + existingPods []*v1.Pod + nodes []*v1.Node + failedNodes []*v1.Node // nodes + failedNodes = all nodes + want framework.NodeScoreList + }{ + // Explanation on the Legend: + // a) X/Y means there are X matching pods on node1 and Y on node2, both nodes are candidates + // (i.e. they have passed all predicates) + // b) X/~Y~ means there are X matching pods on node1 and Y on node2, but node Y is NOT a candidate + // c) X/?Y? means there are X matching pods on node1 and Y on node2, both nodes are candidates + // but node2 either i) doesn't have all required topologyKeys present, or ii) doesn't match + // incoming pod's nodeSelector/nodeAffinity + { + name: "one constraint on node, no existing pods", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 100}, + }, + }, + { + // if there is only one candidate node, it should be scored to 100 + name: "one constraint on node, only one node is candidate", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + }, + }, + { + name: "one constraint on node, all nodes have the same number of matching pods", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 100}, + }, + }, + { + // matching pods spread as 2/1/0/3. + name: "one constraint on node, all 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(), + testing_helper.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []framework.NodeScore{ + {Name: "node-a", Score: 40}, + {Name: "node-b", Score: 80}, + {Name: "node-c", Score: 100}, + {Name: "node-d", Score: 0}, + }, + }, + { + name: "one constraint on node, all 4 nodes are candidates, maxSkew=2", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(2, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + // matching pods spread as 2/1/0/3. + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(), + testing_helper.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []framework.NodeScore{ + {Name: "node-a", Score: 50}, // +10, compared to maxSkew=1 + {Name: "node-b", Score: 83}, // +3, compared to maxSkew=1 + {Name: "node-c", Score: 100}, + {Name: "node-d", Score: 16}, // +16, compared to maxSkew=1 + }, + }, + { + name: "one constraint on node, all 4 nodes are candidates, maxSkew=3", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(3, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + // matching pods spread as 4/3/2/1. + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b3").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-c1").Node("node-c").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-c2").Node("node-c").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(), + testing_helper.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []framework.NodeScore{ + {Name: "node-a", Score: 33}, // +19 compared to maxSkew=1 + {Name: "node-b", Score: 55}, // +13 compared to maxSkew=1 + {Name: "node-c", Score: 77}, // +6 compared to maxSkew=1 + {Name: "node-d", Score: 100}, + }, + }, + { + // matching pods spread as 4/2/1/~3~ (node4 is not a candidate) + name: "one constraint on node, 3 out of 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label(v1.LabelHostname, "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-y").Label(v1.LabelHostname, "node-y").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 16}, + {Name: "node-b", Score: 66}, + {Name: "node-x", Score: 100}, + }, + }, + { + // matching pods spread as 4/?2?/1/~3~, total = 4+?+1 = 5 (as node2 is problematic) + name: "one constraint on node, 3 out of 4 nodes are candidates, one node doesn't match topology key", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("n", "node-b").Obj(), // label `n` doesn't match topologyKey + testing_helper.MakeNode().Name("node-x").Label(v1.LabelHostname, "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-y").Label(v1.LabelHostname, "node-y").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 20}, + {Name: "node-b", Score: 0}, + {Name: "node-x", Score: 100}, + }, + }, + { + // matching pods spread as 4/2/1/~3~ + name: "one constraint on zone, 3 out of 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label(v1.LabelHostname, "node-y").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 62}, + {Name: "node-b", Score: 62}, + {Name: "node-x", Score: 100}, + }, + }, + { + // matching pods spread as 2/~1~/2/~4~. + name: "two Constraints on zone and node, 2 out of 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-x2").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label(v1.LabelHostname, "node-y").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-x", Score: 54}, + }, + }, + { + // If Constraints hold different labelSelectors, it's a little complex. + // +----------------------+------------------------+ + // | zone1 | zone2 | + // +----------------------+------------------------+ + // | node-a | node-b | node-x | node-y | + // +--------+-------------+--------+---------------+ + // | P{foo} | P{foo, bar} | | P{foo} P{bar} | + // +--------+-------------+--------+---------------+ + // For the first constraint (zone): the matching pods spread as 2/2/1/1 + // For the second constraint (node): the matching pods spread as 0/1/0/1 + name: "two Constraints on zone and node, with different labelSelectors", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label(v1.LabelHostname, "node-y").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []framework.NodeScore{ + {Name: "node-a", Score: 75}, + {Name: "node-b", Score: 25}, + {Name: "node-x", Score: 100}, + {Name: "node-y", Score: 50}, + }, + }, + { + // For the first constraint (zone): the matching pods spread as 0/0/2/2 + // For the second constraint (node): the matching pods spread as 0/1/0/1 + name: "two Constraints on zone and node, with different labelSelectors, some nodes have 0 pods", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label(v1.LabelHostname, "node-y").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 75}, + {Name: "node-x", Score: 50}, + {Name: "node-y", Score: 0}, + }, + }, + { + // For the first constraint (zone): the matching pods spread as 2/2/1/~1~ + // For the second constraint (node): the matching pods spread as 0/1/0/~1~ + name: "two Constraints on zone and node, with different labelSelectors, 3 out of 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(), + testing_helper.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label("zone", "zone1").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label("zone", "zone1").Label(v1.LabelHostname, "node-b").Obj(), + testing_helper.MakeNode().Name("node-x").Label("zone", "zone2").Label(v1.LabelHostname, "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-y").Label("zone", "zone2").Label(v1.LabelHostname, "node-y").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 75}, + {Name: "node-b", Score: 25}, + {Name: "node-x", Score: 100}, + }, + }, + { + name: "existing pods in a different namespace do not count", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 50}, + }, + }, + { + name: "terminating Pods should be excluded", + pod: testing_helper.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + testing_helper.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + testing_helper.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + }, + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a").Node("node-a").Label("foo", "").Terminating().Obj(), + testing_helper.MakePod().Name("p-b").Node("node-b").Label("foo", "").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 0}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + allNodes := append([]*v1.Node{}, tt.nodes...) + allNodes = append(allNodes, tt.failedNodes...) + state := framework.NewCycleState() + + snapshot := framework_helper.MakeSnapShot(tt.existingPods, allNodes, nil) + + p := &PodTopologySpread{sharedLister: snapshot} + + nodeInfos := make([]framework.NodeInfo, len(tt.nodes)) + for index, node := range tt.nodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + + status := p.PreScore(context.Background(), state, tt.pod, nodeInfos) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + + var gotList framework.NodeScoreList + for _, n := range tt.nodes { + nodeName := n.Name + score, status := p.Score(context.Background(), state, tt.pod, nodeName) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.NormalizeScore(context.Background(), state, tt.pod, gotList) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + if diff := cmp.Diff(tt.want, gotList, cmpOpts...); diff != "" { + t.Errorf("unexpected scores (-want,+got):\n%s", diff) + } + }) + } +} + +func TestNMNodesScore(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + existingPods []*v1.Pod + nodes []*v1.Node + nmNodes []*nodev1alpha1.NMNode + want framework.NodeScoreList + }{ + { + // matching pods spread as 2/1/0/3. + name: "All nodes are of NMNode type, that is, they are managed by the node manager. One constraint on node, all 4 nodes are candidates", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{v1.LabelHostname: "node-a"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{v1.LabelHostname: "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-c", Labels: map[string]string{v1.LabelHostname: "node-c"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-d", Labels: map[string]string{v1.LabelHostname: "node-d"}}}, + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 40}, + {Name: "node-b", Score: 80}, + {Name: "node-c", Score: 100}, + {Name: "node-d", Score: 0}, + }, + }, + { + // matching pods spread as 2/1/0/3. + name: "The first node-a is v1.node and the others are of NMNode type. Since node-a is of v1.Node type, it does not meet the requirements of the pod to be scheduled, so the score is 0.", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{v1.LabelHostname: "node-a"}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{v1.LabelHostname: "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-c", Labels: map[string]string{v1.LabelHostname: "node-c"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-d", Labels: map[string]string{v1.LabelHostname: "node-d"}}}, + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 0}, + {Name: "node-b", Score: 75}, + {Name: "node-c", Score: 100}, + {Name: "node-d", Score: 0}, + }, + }, + { + // matching pods spread as 2-0/1/0/3. The first node-a has v1.node and NMNode. + // When counting the pods of each topology, we will record all the pods of node-a. + // However, when calculating its score, since the podLanucher of the pod to be scheduled is the node manager, + //only the pods on the NMNode of node-a will be considered. Since the number of pods on the NMNode is 0, the score is 100. + name: "The first node-a has v1.node and NMNode, and the others are of NMNode type. The score of node-a is 100", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + testing_helper.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + testing_helper.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + testing_helper.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Annotation(podutil.PodLauncherAnnotationKey, string(podutil.NodeManager)).Obj(), + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{v1.LabelHostname: "node-a"}}}, + }, + nmNodes: []*nodev1alpha1.NMNode{ + {ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{v1.LabelHostname: "node-a"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-b", Labels: map[string]string{v1.LabelHostname: "node-b"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-c", Labels: map[string]string{v1.LabelHostname: "node-c"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node-d", Labels: map[string]string{v1.LabelHostname: "node-d"}}}, + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 80}, + {Name: "node-c", Score: 100}, + {Name: "node-d", Score: 0}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.pod.Annotations = map[string]string{podutil.PodLauncherAnnotationKey: string(podutil.NodeManager)} + state := framework.NewCycleState() + + snapshot := framework_helper.MakeSnapShot(tt.existingPods, tt.nodes, tt.nmNodes) + + p := &PodTopologySpread{sharedLister: snapshot} + + nodeNames := getNodeNames(tt.nodes, tt.nmNodes) + nodeInfos := make([]framework.NodeInfo, len(nodeNames)) + for indexNode := 0; indexNode < len(nodeNames); indexNode++ { + nodeInfos[indexNode], _ = snapshot.NodeInfos().Get(nodeNames[indexNode]) + } + + status := p.PreScore(context.Background(), state, tt.pod, nodeInfos) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + + var gotList framework.NodeScoreList + for _, nodeInfo := range nodeInfos { + nodeName := nodeInfo.GetNodeName() + score, status := p.Score(context.Background(), state, tt.pod, nodeName) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.NormalizeScore(context.Background(), state, tt.pod, gotList) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + if diff := cmp.Diff(tt.want, gotList, cmpOpts...); diff != "" { + t.Errorf("unexpected scores (-want,+got):\n%s", diff) + } + }) + } +} + +func getNodeNames(nodes []*v1.Node, nmNodes []*nodev1alpha1.NMNode) []string { + nameSet := sets.NewString() + for _, node := range nodes { + nameSet.Insert(node.Name) + } + for _, nmNode := range nmNodes { + nameSet.Insert(nmNode.Name) + } + return nameSet.List() +} + +func BenchmarkTestPodTopologySpreadScore(b *testing.B) { + tests := []struct { + name string + pod *v1.Pod + existingPodsNum int + allNodesNum int + filteredNodesNum int + }{ + { + name: "1000nodes/single-constraint-zone", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelZoneFailureDomain, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/single-constraint-node", + pod: testing_helper.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/two-Constraints-zone-node", + pod: testing_helper.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, v1.LabelZoneFailureDomain, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, v1.LabelHostname, v1.ScheduleAnyway, testing_helper.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + } + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + existingPods, allNodes, filteredNodes := testing_helper.MakeNodesAndPodsForEvenPodsSpread(tt.pod.Labels, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum) + state := framework.NewCycleState() + snapshot := framework_helper.MakeSnapShot(existingPods, allNodes, nil) + + p := &PodTopologySpread{sharedLister: snapshot} + + nodeInfos := make([]framework.NodeInfo, len(filteredNodes)) + for index, node := range filteredNodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + status := p.PreScore(context.Background(), state, tt.pod, nodeInfos) + if !status.IsSuccess() { + b.Fatalf("unexpected error: %v", status) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + var gotList framework.NodeScoreList + for _, n := range filteredNodes { + nodeName := n.Name + score, status := p.Score(context.Background(), state, tt.pod, nodeName) + if !status.IsSuccess() { + b.Fatalf("unexpected error: %v", status) + } + gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) + } + + status = p.NormalizeScore(context.Background(), state, tt.pod, gotList) + if !status.IsSuccess() { + b.Fatal(status) + } + } + }) + } +} + +// The following test allows to compare PodTopologySpread.Score with +// SelectorSpread.Score by using a similar rule. +// See pkg/scheduler/framework/plugins/selectorspread/selector_spread_perf_test.go +// for the equivalent test. + +var ( + tests = []struct { + name string + existingPodsNum int + allNodesNum int + }{ + { + name: "100nodes", + existingPodsNum: 1000, + allNodesNum: 100, + }, + { + name: "1000nodes", + existingPodsNum: 10000, + allNodesNum: 1000, + }, + } +) + +func BenchmarkTestDefaultEvenPodsSpreadPriority(b *testing.B) { + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + pod := testing_helper.MakePod().Name("p").Label("foo", "").Obj() + existingPods, allNodes, filteredNodes := testing_helper.MakeNodesAndPodsForEvenPodsSpread(pod.Labels, tt.existingPodsNum, tt.allNodesNum, tt.allNodesNum) + state := framework.NewCycleState() + snapshot := framework_helper.MakeSnapShot(existingPods, allNodes, nil) + + p := &PodTopologySpread{ + sharedLister: snapshot, + args: config.PodTopologySpreadArgs{ + DefaultConstraints: []v1.TopologySpreadConstraint{ + {MaxSkew: 1, TopologyKey: v1.LabelHostname, WhenUnsatisfiable: v1.ScheduleAnyway}, + {MaxSkew: 1, TopologyKey: v1.LabelZoneFailureDomain, WhenUnsatisfiable: v1.ScheduleAnyway}, + }, + }, + } + client := fake.NewSimpleClientset( + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": ""}}}, + ) + ctx := context.Background() + informerFactory := informers.NewSharedInformerFactory(client, 0) + p.setListers(informerFactory) + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + + b.ResetTimer() + + nodeInfos := make([]framework.NodeInfo, len(filteredNodes)) + for index, node := range filteredNodes { + nodeInfos[index] = framework_helper.WithNode(node) + } + + for i := 0; i < b.N; i++ { + status := p.PreScore(ctx, state, pod, nodeInfos) + if !status.IsSuccess() { + b.Fatalf("unexpected error: %v", status) + } + gotList := make(framework.NodeScoreList, len(filteredNodes)) + scoreNode := func(i int) { + n := filteredNodes[i] + score, _ := p.Score(ctx, state, pod, n.Name) + gotList[i] = framework.NodeScore{Name: n.Name, Score: score} + } + parallelize.Until(ctx, len(filteredNodes), scoreNode) + status = p.NormalizeScore(ctx, state, pod, gotList) + if !status.IsSuccess() { + b.Fatal(status) + } + } + }) + } +} diff --git a/pkg/scheduler/framework/registry.go b/pkg/scheduler/framework/registry.go index 20373b35..a679d99a 100644 --- a/pkg/scheduler/framework/registry.go +++ b/pkg/scheduler/framework/registry.go @@ -26,6 +26,7 @@ import ( "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/handle" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/coscheduling" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/imagelocality" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/loadaware" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodelabel" @@ -36,6 +37,7 @@ import ( "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodevolumelimits" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nonnativeresource" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podlauncher" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/tainttoleration" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/volumebinding" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/preemption-plugins/searching/newlystartedprotectionchecker" @@ -101,6 +103,8 @@ func NewInTreeRegistry() Registry { nodepreferavoidpods.Name: nodepreferavoidpods.New, tainttoleration.Name: tainttoleration.New, nodeaffinity.Name: nodeaffinity.New, + interpodaffinity.Name: interpodaffinity.New, + podtopologyspread.Name: podtopologyspread.New, nodelabel.Name: nodelabel.New, nodeports.Name: nodeports.New, podlauncher.Name: podlauncher.New, diff --git a/pkg/scheduler/util/topologies.go b/pkg/scheduler/util/topologies.go new file mode 100644 index 00000000..8f622ff5 --- /dev/null +++ b/pkg/scheduler/util/topologies.go @@ -0,0 +1,84 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" +) + +// GetNamespacesFromPodAffinityTerm returns a set of names +// according to the namespaces indicated in podAffinityTerm. +// If namespaces is empty it considers the given pod's namespace. +func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String { + names := sets.String{} + if len(podAffinityTerm.Namespaces) == 0 { + names.Insert(pod.Namespace) + } else { + names.Insert(podAffinityTerm.Namespaces...) + } + return names +} + +// PodMatchesTermsNamespaceAndSelector returns true if the given +// matches the namespace and selector defined by `s . +func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool { + if selector == nil { + return false + } + if !namespaces.Has(pod.Namespace) { + return false + } + + if !selector.Matches(labels.Set(pod.Labels)) { + return false + } + return true +} + +// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key. +// Returns false if topologyKey is empty. +func NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool { + if len(topologyKey) == 0 { + return false + } + + if nodeA.Labels == nil || nodeB.Labels == nil { + return false + } + + nodeALabel, okA := nodeA.Labels[topologyKey] + nodeBLabel, okB := nodeB.Labels[topologyKey] + + // If found label in both nodes, check the label + if okB && okA { + return nodeALabel == nodeBLabel + } + + return false +} + +// Topologies contains topologies information of nodes. +type Topologies struct { + DefaultKeys []string +} + +// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key. +func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool { + return NodesHaveSameTopologyKey(nodeA, nodeB, topologyKey) +} diff --git a/pkg/testing-helper/framework-helper/wrappers.go b/pkg/testing-helper/framework-helper/wrappers.go index da224b5a..0b061fb8 100644 --- a/pkg/testing-helper/framework-helper/wrappers.go +++ b/pkg/testing-helper/framework-helper/wrappers.go @@ -17,11 +17,17 @@ limitations under the License. package framework_helper import ( + "time" + + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" katalystv1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" + commoncache "github.com/kubewharf/godel-scheduler/pkg/common/cache" framework "github.com/kubewharf/godel-scheduler/pkg/framework/api" + godelcache "github.com/kubewharf/godel-scheduler/pkg/scheduler/cache" ) type NodeInfoWrapper struct { @@ -86,3 +92,44 @@ func (n *NodeInfoWrapper) CNRCapacity(resources map[v1.ResourceName]string) *Nod n.SetCNR(cnr) return n } + +func WithNode(node *v1.Node) framework.NodeInfo { + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(node) + return nodeInfo +} + +func WithNMNode(nmNode *nodev1alpha1.NMNode) framework.NodeInfo { + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNMNode(nmNode) + return nodeInfo +} + +func MakeSnapShot(existingPods []*v1.Pod, nodes []*v1.Node, nmNodes []*nodev1alpha1.NMNode) *godelcache.Snapshot { + cache := godelcache.New(commoncache.MakeCacheHandlerWrapper(). + ComponentName("").SchedulerType("").SubCluster(framework.DefaultSubCluster). + PodAssumedTTL(time.Second).Period(10 * time.Second).StopCh(make(<-chan struct{})). + EnableStore("PreemptionStore"). + Obj()) + snapshot := godelcache.NewEmptySnapshot(commoncache.MakeCacheHandlerWrapper(). + SubCluster(framework.DefaultSubCluster).SwitchType(framework.DefaultSubClusterSwitchType). + EnableStore("PreemptionStore"). + Obj()) + + for _, pod := range existingPods { + pod.UID = types.UID(pod.Name) + cache.AddPod(pod) + } + for _, node := range nodes { + // WithNode(node) + cache.AddNode(node) + } + for _, nmNode := range nmNodes { + // WithNMNode(nmNode) + cache.AddNMNode(nmNode) + } + + cache.UpdateSnapshot(snapshot) + + return snapshot +} diff --git a/pkg/util/parallelize/parallelism.go b/pkg/util/parallelize/parallelism.go index ae1cc370..f70a9bc3 100644 --- a/pkg/util/parallelize/parallelism.go +++ b/pkg/util/parallelize/parallelism.go @@ -18,13 +18,28 @@ package parallelize import ( "context" + "math" "k8s.io/client-go/util/workqueue" ) const parallelism = 16 +// chunkSizeFor returns a chunk size for the given number of items to use for +// parallel work. The size aims to produce good CPU utilization. +// returns max(1, min(sqrt(n), n/Parallelism)) +func chunkSizeFor(n, parallelism int) int { + s := int(math.Sqrt(float64(n))) + + if r := n/parallelism + 1; s > r { + s = r + } else if s < 1 { + s = 1 + } + return s +} + // Until is a wrapper around workqueue.ParallelizeUntil to use in scheduling algorithms. func Until(ctx context.Context, pieces int, doWorkPiece workqueue.DoWorkPieceFunc) { - workqueue.ParallelizeUntil(ctx, parallelism, pieces, doWorkPiece) + workqueue.ParallelizeUntil(ctx, parallelism, pieces, doWorkPiece, workqueue.WithChunkSize(chunkSizeFor(pieces, parallelism))) } diff --git a/pkg/util/pod/podstate.go b/pkg/util/pod/podstate.go index 84335261..39897383 100644 --- a/pkg/util/pod/podstate.go +++ b/pkg/util/pod/podstate.go @@ -130,7 +130,8 @@ const ( ) var ( - PodLauncherUnsupportError = fmt.Errorf("pod launcher only allow %v", []PodLauncher{Kubelet, NodeManager}) + PodLanucherTypes = []PodLauncher{Kubelet, NodeManager} + PodLauncherUnsupportError = fmt.Errorf("pod launcher only allow %v", PodLanucherTypes) PodLauncherMissedError = fmt.Errorf("missing pod launcher") ) diff --git a/test/e2e/scheduling/hard_constraints.go b/test/e2e/scheduling/hard_constraints.go index fd16f910..0b265475 100644 --- a/test/e2e/scheduling/hard_constraints.go +++ b/test/e2e/scheduling/hard_constraints.go @@ -22,6 +22,7 @@ import ( "time" schedulingv1a1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/scheduling/v1alpha1" + godelclient "github.com/kubewharf/godel-scheduler-api/pkg/client/clientset/versioned" "github.com/onsi/ginkgo" _ "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" @@ -34,8 +35,11 @@ import ( clientset "k8s.io/client-go/kubernetes" k8utilnet "k8s.io/utils/net" + nodev1alpha1 "github.com/kubewharf/godel-scheduler-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/godel-scheduler/pkg/framework/config" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeaffinity" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/tainttoleration" podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" "github.com/kubewharf/godel-scheduler/test/e2e/framework" @@ -76,6 +80,7 @@ type pausePodConfig struct { var _ = SIGDescribe("SchedulingHardConstraints [Serial]", func() { var cs clientset.Interface + var fs godelclient.Interface var nodeList *v1.NodeList var RCName string var ns string @@ -92,6 +97,7 @@ var _ = SIGDescribe("SchedulingHardConstraints [Serial]", func() { ginkgo.BeforeEach(func() { cs = f.ClientSet + fs = f.Godelclient ns = f.Namespace.Name nodeList = &v1.NodeList{} var err error @@ -781,21 +787,450 @@ var _ = SIGDescribe("SchedulingHardConstraints [Serial]", func() { verifyResult(cs, 0, 1, ns) }) - // TODO InterPodAffinity plugin is needed + // Test scenario: + // 1. Find 2 nodes to run pods, add same extra resource to the nodes. + // 2. Create one basicPod with two label and set its node name to the first node that needs 20% of the extra resource. + // 3. Wait for the pods to be scheduled. + // 4. Create one affinityPod with inter-pod affinity to the basicPod by label[0] that needs 20% of the extra resource. + // 5. Make sure the affinityPod is scheduled to the first node as same as basicPod. + // 6. Create one antiAffinityPod with inter-pod anti affinity to the basicPod by label[1] that needs 20% of the extra resource. + // 7. Make sure the antiAffinityPod is scheduled to the second node. + // 8. Create one failedPod with label[1] that needs 70% of the extra resource. + // 9. Make sure the failedPod is not scheduled becaues it only can be scheduled to the node1 due to antiAffinityPod's anti affinity. + // But the node1 haven't enough resource. + // 10. Create one successPod with label[0] that needs 70% of the extra resource. + // 11. Make sure the successPod is scheduled to the second node, because it isn't affected by any inter-pod affinity. /* Testname: Scheduling pods with inter-pod affinity matching - Description: TODO + Description: Scheduling MUST meet inter-pod affinity requirements and scheduling pods MUST fail if no resource meets the specified pod + */ + ginkgo.Context("validates pod scheduling fits inter-pod affinity[requiredDuringSchedulingIgnoredDuringExecution]", func() { + testLabelKeys := []string{"godel.bytedance.com/test-label1", "godel.bytedance.com/test-label2"} + testLabelValues := []string{"test", "foo"} + var beardsecond v1.ResourceName = "example.com/beardsecond" + var nodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + nodeNames = Get2NodesThatCanRunPod(f) + + ginkgo.By("Set fake resource-" + nodeNames[0] + "-" + nodeNames[1]) + // Get node object: + for _, testNodeName := range nodeNames { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + nodeCopy.ResourceVersion = "0" + + nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000") + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove fake resource") + for _, testNodeName := range nodeNames { + // remove fake resource: + if testNodeName != "" { + // Get node object: + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + delete(nodeCopy.Status.Capacity, beardsecond) + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to update node %v", testNodeName) + } + } + }) + + ginkgo.It("verify inter-pod affinity matches pod labels", func() { + ginkgo.By("Trying to create basicPod, ns: " + ns) + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0], testLabelKeys[1]: testLabelValues[1]}, + NodeName: nodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, basicPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create affinityPod") + affinityPodConf := pausePodConfig{ + Name: "affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Affinity: getInterPodAffinity(map[string]string{testLabelKeys[0]: testLabelValues[0]}, nil), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, affinityPodConf), ns, affinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, affinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, affinityPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create antiAffinityPod") + antiAffinityPodConf := pausePodConfig{ + Name: "anti-affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{nodeaffinity.Name}), + Affinity: getInterPodAffinity(nil, map[string]string{testLabelKeys[1]: testLabelValues[1]}), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, antiAffinityPodConf), ns, antiAffinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, antiAffinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, antiAffinityPodConf.Name).Spec.NodeName, nodeNames[1]) + + ginkgo.By("Trying to create failedPod") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[1]: testLabelValues[1]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, 3, 1, ns) + + ginkgo.By("Trying to create successpod") + successPodConf := pausePodConfig{ + Name: "success-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), nodeNames[1]) + }) + }) + + // Test scenario: + // 1. Find 2 nodes to run pods, then create nmnodes as same as the 2 nodes and set same extra resource to the nmnodes. + // 2. Create one basicPod with two label and set its node name to the first nmnode that needs 20% of the extra resource. + // 3. Wait for the pods to be scheduled. + // 4. Create one affinityPod with inter-pod affinity to the basicPod by label[0] that needs 20% of the extra resource. + // 5. Make sure the affinityPod is scheduled to the first nmnode as same as basicPod. + // 6. Create one antiAffinityPod with inter-pod anti affinity to the basicPod by label[1] that needs 20% of the extra resource. + // 7. Make sure the antiAffinityPod is scheduled to the second nmnode. + // 8. Create one failedPod with label[1] that needs 70% of the extra resource. + // 9. Make sure the failedPod is not scheduled becaues it only can be scheduled to the first nmnode due to antiAffinityPod's anti affinity. + // But the first nmnode haven't enough resource. + // 10. Create one successPod with label[0] that needs 70% of the extra resource. + // 11. Make sure the successPod is scheduled to the second nmnode, because it isn't affected by any inter-pod affinity. + /* + Testname: Scheduling pods with inter-pod affinity matching for nmnodes + Description: Scheduling MUST meet inter-pod affinity requirements and scheduling pods MUST fail if no resource meets the specified pod + */ + ginkgo.Context("validates pod scheduling fits inter-pod affinity[requiredDuringSchedulingIgnoredDuringExecution] for nmnodes", func() { + testLabelKeys := []string{"godel.bytedance.com/test-label1", "godel.bytedance.com/test-label2"} + testLabelValues := []string{"test", "foo"} + var beardsecond v1.ResourceName = "example.com/beardsecond" + var nodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + nodeNames = Get2NodesThatCanRunPod(f) + + ginkgo.By("Create nmnodes and set fake resource for " + nodeNames[0] + " and " + nodeNames[1]) + // Get node object: + for _, testNodeName := range nodeNames { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nmnodeTemplate := GetNMNodeTemplateByNode(node) + (*nmnodeTemplate.Status.ResourceCapacity)[beardsecond] = resource.MustParse("1000") + (*nmnodeTemplate.Status.ResourceAllocatable)[beardsecond] = resource.MustParse("1000") + _, err = fs.NodeV1alpha1().NMNodes().Create(context.TODO(), nmnodeTemplate, metav1.CreateOptions{}) + framework.ExpectNoError(err, "unable to create NM node for node %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove nmnodes") + for _, testNodeName := range nodeNames { + // remove nmnodes: + if testNodeName != "" { + err := fs.NodeV1alpha1().NMNodes().Delete(context.TODO(), testNodeName, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "unable to remove nmnode %v", testNodeName) + } + } + }) + + ginkgo.It("verify inter-pod affinity matches pod labels for nmnodes", func() { + ginkgo.By("Trying to create basicPod, ns: " + ns) + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0], testLabelKeys[1]: testLabelValues[1]}, + NodeName: nodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, basicPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create affinityPod") + affinityPodConf := pausePodConfig{ + Name: "affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{interpodaffinity.Name}), + Affinity: getInterPodAffinity(map[string]string{testLabelKeys[0]: testLabelValues[0]}, nil), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, affinityPodConf), ns, affinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, affinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, affinityPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create antiAffinityPod") + antiAffinityPodConf := pausePodConfig{ + Name: "anti-affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{nodeaffinity.Name}), + Affinity: getInterPodAffinity(nil, map[string]string{testLabelKeys[1]: testLabelValues[1]}), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, antiAffinityPodConf), ns, antiAffinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, antiAffinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, antiAffinityPodConf.Name).Spec.NodeName, nodeNames[1]) + + ginkgo.By("Trying to create failedPod") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[1]: testLabelValues[1]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, 3, 1, ns) + + ginkgo.By("Trying to create successpod") + successPodConf := pausePodConfig{ + Name: "success-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), nodeNames[1]) + }) + }) + + // Test scenario: + // 1. Find 2 nodes to run pods, then create nmnodes as same as the 2 nodes and set same extra resource to the 2 nmnodes and 2 nodes. + // Note that nmnode and node on the same host share the same extra resource. + // The scenario looks like this: + // ┌────────────┐ ┌────────────┐ + // │ host 1 │ │ host 2 │ + // │┌──────────┐│ │┌──────────┐│ + // ││ node 1 ││ ││ node 2 ││ + // │└──────────┘│ │└──────────┘│ + // │┌──────────┐│ │┌──────────┐│ + // ││ nmnode 1 ││ ││ nmnode 2 ││ + // │└──────────┘│ │└──────────┘│ + // │ extra 100% │ │ extra 100% │ + // └────────────┘ └────────────┘ + // 2. Create one basicPod[kubelet] with two label and set its node name to the node 1 that needs 20% of the extra resource. + // 3. Wait for the pods to be scheduled. + // 4. Create one affinityPod[node-manager] with inter-pod affinity to the basicPod[kubelet] by label[0] that needs 20% of the extra resource. + // 5. Make sure the affinityPod is scheduled to the nmnode 1 as same as basicPod. + // 6. Create one antiAffinityPod[node-manager] with inter-pod anti affinity to the basicPod[kubelet] by label[1] that needs 20% of the extra resource. + // 7. Make sure the antiAffinityPod is scheduled to the nmnode 2. + // 8. Create one failedPod[kubelet] with label[1] that needs 70% of the extra resource. + // 9. Make sure the failedPod[kubelet] is not scheduled becaues it only can be scheduled to the node 1 due to antiAffinityPod[node-manager]'s anti affinity. + // But the node 1 haven't enough resource, the host 1 only has 60% of the extra resource. + // 10. Create one successPod[kubelet] with label[0] that needs 70% of the extra resource. + // 11. Make sure the successPod[kubelet] is scheduled to the node 2, because it isn't affected by any inter-pod affinity. + /* + Testname: Scheduling pods with inter-pod affinity matching for the mix of nodes and nmnodes + Description: Scheduling MUST meet inter-pod affinity requirements and scheduling pods MUST fail if no resource meets the specified pod */ - //framework.ConformanceIt("", func() { - //}) + ginkgo.Context("validates pod scheduling fits inter-pod affinity[requiredDuringSchedulingIgnoredDuringExecution] for the mix of nodes and nmnodes", func() { + testLabelKeys := []string{"godel.bytedance.com/test-label1", "godel.bytedance.com/test-label2"} + testLabelValues := []string{"test", "foo"} + var beardsecond v1.ResourceName = "example.com/beardsecond" + var nodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + nodeNames = Get2NodesThatCanRunPod(f) + + ginkgo.By("Create NMNodes and set fake resource for " + nodeNames[0] + " and " + nodeNames[1]) + // Get node object: + for _, testNodeName := range nodeNames { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nmnodeTemplate := GetNMNodeTemplateByNode(node) + (*nmnodeTemplate.Status.ResourceCapacity)[beardsecond] = resource.MustParse("1000") + (*nmnodeTemplate.Status.ResourceAllocatable)[beardsecond] = resource.MustParse("1000") + _, err = fs.NodeV1alpha1().NMNodes().Create(context.TODO(), nmnodeTemplate, metav1.CreateOptions{}) + framework.ExpectNoError(err, "unable to create NM node for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + nodeCopy.ResourceVersion = "0" + + nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000") + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove nmnodes and fake resource") + for _, testNodeName := range nodeNames { + // remove fake resource: + if testNodeName != "" { + // Get node object: + err := fs.NodeV1alpha1().NMNodes().Delete(context.TODO(), testNodeName, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "unable to remove nmnode %v", testNodeName) + + // Get node object: + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + delete(nodeCopy.Status.Capacity, beardsecond) + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to update node %v", testNodeName) + } + } + }) + + ginkgo.It("verify inter-pod affinity matches pod labels for the mix of nodes and nmnodes", func() { + ginkgo.By("Trying to create basicPod, ns: " + ns) + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0], testLabelKeys[1]: testLabelValues[1]}, + NodeName: nodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, basicPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create affinityPod") + affinityPodConf := pausePodConfig{ + Name: "affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{interpodaffinity.Name}), + Affinity: getInterPodAffinity(map[string]string{testLabelKeys[0]: testLabelValues[0]}, nil), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, affinityPodConf), ns, affinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, affinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, affinityPodConf.Name).Spec.NodeName, nodeNames[0]) + + ginkgo.By("Trying to create antiAffinityPod") + antiAffinityPodConf := pausePodConfig{ + Name: "anti-affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{nodeaffinity.Name}), + Affinity: getInterPodAffinity(nil, map[string]string{testLabelKeys[1]: testLabelValues[1]}), + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("200")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("200")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, antiAffinityPodConf), ns, antiAffinityPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, antiAffinityPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, antiAffinityPodConf.Name).Spec.NodeName, nodeNames[1]) + + ginkgo.By("Trying to create failedPod") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[1]: testLabelValues[1]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, 3, 1, ns) + + ginkgo.By("Trying to create successpod") + successPodConf := pausePodConfig{ + Name: "success-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0]}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("700")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("700")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), nodeNames[1]) + }) + }) - // TODO InterPodAffinity plugin is needed /* Testname: Scheduling pods with inter-pod affinity not matching - Description: TODO + Description: Create a Pod with a inter-pod affinity set to a value that does not match a pod in the cluster. + Since there are no pods matching the criteria the Pod MUST NOT be scheduled. + Then create a pod with inter-pod anti affinity set to a value that does not match a pod in the cluster. + Since there are no pods matching the criteria the Pod MUST be scheduled. */ - //framework.ConformanceIt("", func() { - //}) + framework.ConformanceIt("validates inter-pod affinity if not matching ", func() { + testLabelKeys := []string{"godel.bytedance.com/test-label1", "godel.bytedance.com/test-label2"} + testLabelValues := []string{"test", "foo"} + WaitForStableCluster(cs, workerNodes) + + ginkgo.By(fmt.Sprintf("Trying to schedule affinityPod with unmatched inter-pod affinity {%s: %s}", testLabelKeys[0], testLabelValues[0])) + affinityPodConf := pausePodConfig{ + Name: "affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Affinity: getInterPodAffinity(map[string]string{testLabelKeys[0]: testLabelValues[0]}, nil), + } + + WaitForSchedulerAfterAction(f, createPausePodAction(f, affinityPodConf), ns, affinityPodConf.Name, false) + verifyResult(cs, 0, 1, ns) + + ginkgo.By(fmt.Sprintf("Trying to schedule antiAffinityPod with unmatched inter-pod anti affinity {%s: %s}", testLabelKeys[1], testLabelValues[1])) + antiAffinityPodConf := pausePodConfig{ + Name: "anti-affinity-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{interpodaffinity.Name}), + Affinity: getInterPodAffinity(nil, map[string]string{testLabelKeys[1]: testLabelValues[1]}), + } + + WaitForSchedulerAfterAction(f, createPausePodAction(f, antiAffinityPodConf), ns, antiAffinityPodConf.Name, true) + verifyResult(cs, 1, 1, ns) + }) // Test scenario: // 1. Run a pod to get an available node, then delete the pod @@ -984,13 +1419,480 @@ var _ = SIGDescribe("SchedulingHardConstraints [Serial]", func() { verifyResult(cs, 1, 0, ns) }) - // TODO PodTopologySpread plugin is needed + // Test scenario: + // 1. Add same extra resource to all worker nodes. + // 2. Create basicPods with same pod topology spread constraints(MaxSkew = 1) and related label. + // Note that the first pod requests 10% of the resources, and the remaining pods request 70% of the resources. + // 3. Wait for the pods to be scheduled and check whether pods are evenly scheduled to each node. + // 4. Create one normal pod with related label that needs 10% of the extra resource to the node-1 where the first pod is scheduled as mentioned above + // 5. Make sure the normal pod is scheduled to the node-1. + // 6. Create one failedPod with same pod topology spread constraints(MaxSkew = 1) and related label that needs 40% of the extra resource. + // 7. Make sure the failedPod is not scheduled becaues it only can be scheduled to the node-1 due the resource limit. + // But the node-1 doesn't satisfy the the pod topology spread constraints. + // Because when the failedPod is scheduled, the skew will be 2 which is greater than MaxSkew = 1. + // 8. Create one successPod with same pod topology spread constraints(MaxSkew = 2) and related label that needs 40% of the extra resource. + // 9. Make sure the successPod is scheduled to the node-1, because it satisfies the pod topology spread constraints and has enough resource. /* Testname: Scheduling pods with pod topology spread constraints - Description: TODO + Description: Scheduling MUST meet pod topology spread requirements. + */ + ginkgo.Context("validates pod scheduling fits pod topology spread constraints[DoNotSchedule]", func() { + testLabelKey := "godel.bytedance.com/test-label-pod-topology" + testLabelValue := "test" + var beardsecond v1.ResourceName = "example.com/beardsecond" + var scheduledNodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + + ginkgo.By("Set fake resource for every node") + // Get node object:) + for _, testNodeName := range workerNodes.List() { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + nodeCopy.ResourceVersion = "0" + + nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000") + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove fake resource") + for _, testNodeName := range workerNodes.List() { + // remove fake resource: + if testNodeName != "" { + // Get node object: + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + delete(nodeCopy.Status.Capacity, beardsecond) + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to update node %v", testNodeName) + } + } + }) + + ginkgo.It("verify pod topology spread constraints[DoNotSchedule]", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + ginkgo.By("Trying to create pod with pod topology spread constraints(MaxSkew = 1) and related label in every node") + for i := 0; i < len(workerNodes); i++ { + requestedResource := "" + if i == 0 { + requestedResource = "100" + } else { + requestedResource = "700" + } + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + Limits: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + scheduledNodeNames = append(scheduledNodeNames, GetPod(f, ns, basicPodConf.Name).Spec.NodeName) + } + framework.ExpectConsistOf(scheduledNodeNames, workerNodes.List()) + + ginkgo.By(fmt.Sprintf("Trying to create 1 normal pods with related label in %v", scheduledNodeNames[0])) + normalPodConf := pausePodConfig{ + Name: "normal-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + NodeName: scheduledNodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("100")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("100")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, normalPodConf), ns, normalPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, normalPodConf.Name, ns, framework.PollShortTimeout)) + verifyResult(cs, len(scheduledNodeNames)+1, 0, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 1) and related label in two nodes which will fail") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, len(scheduledNodeNames)+1, 1, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 2) and related label in two nodes which will success") + successPodConf := pausePodConfig{ + Name: "sucess-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(2, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), scheduledNodeNames[0]) + }) + }) + + // Test scenario: + // 1. Create nmnode for every worker node and add same extra resource to all created nmnodes. + // 2. Create basicPods[node-manager] with same pod topology spread constraints(MaxSkew = 1) and related label. + // Note that the first pod requests 10% of the resources, and the remaining pods request 70% of the resources. + // 3. Wait for the pods to be scheduled and check whether pods are evenly scheduled to each node. + // 4. Create one normal pod[node-manager] with related label that needs 10% of the extra resource to the node-1 where the first pod is scheduled as mentioned above + // 5. Make sure the normal pod is scheduled to the node-1. + // 6. Create one failedPod[node-manager] with same pod topology spread constraints(MaxSkew = 1) and related label that needs 40% of the extra resource. + // 7. Make sure the failedPod is not scheduled becaues it only can be scheduled to the node-1 due the resource limit. + // But the node-1 doesn't satisfy the the pod topology spread constraints. + // Because when the failedPod is scheduled, the skew will be 2 which is greater than MaxSkew = 1. + // 8. Create one successPod[node-manager] with same pod topology spread constraints(MaxSkew = 2) and related label that needs 40% of the extra resource. + // 9. Make sure the successPod is scheduled to the node-1, because it satisfies the pod topology spread constraints and has enough resource. + /* + Testname: Scheduling pods[node-manager] with pod topology spread constraints for nmnodes + Description: Scheduling MUST meet pod topology spread requirements. + */ + ginkgo.Context("validates pod scheduling fits pod topology spread constraints[DoNotSchedule] for nmnodes", func() { + testLabelKey := "godel.bytedance.com/test-label-pod-topology" + testLabelValue := "test" + var beardsecond v1.ResourceName = "example.com/beardsecond" + var scheduledNodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + + ginkgo.By("Create nmnode for every node") + // Get node object:) + for _, testNodeName := range workerNodes.List() { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nmnodeTemplate := GetNMNodeTemplateByNode(node) + (*nmnodeTemplate.Status.ResourceCapacity)[beardsecond] = resource.MustParse("1000") + (*nmnodeTemplate.Status.ResourceAllocatable)[beardsecond] = resource.MustParse("1000") + _, err = fs.NodeV1alpha1().NMNodes().Create(context.TODO(), nmnodeTemplate, metav1.CreateOptions{}) + framework.ExpectNoError(err, "unable to create NM node for node %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove nmnodes") + for _, testNodeName := range workerNodes.List() { + // remove nmnodes: + if testNodeName != "" { + err := fs.NodeV1alpha1().NMNodes().Delete(context.TODO(), testNodeName, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "unable to remove nmnode %v", testNodeName) + } + } + }) + + ginkgo.It("verify pod topology spread constraints[DoNotSchedule] for nmnodes", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + ginkgo.By("Trying to create pod with pod topology spread constraints(MaxSkew = 1) and related label in every node") + for i := 0; i < len(workerNodes); i++ { + requestedResource := "" + if i == 0 { + requestedResource = "100" + } else { + requestedResource = "700" + } + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + Limits: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + scheduledNodeNames = append(scheduledNodeNames, GetPod(f, ns, basicPodConf.Name).Spec.NodeName) + } + framework.ExpectConsistOf(scheduledNodeNames, workerNodes.List()) + + ginkgo.By(fmt.Sprintf("Trying to create 1 normal pods with related label in %v", scheduledNodeNames[0])) + normalPodConf := pausePodConfig{ + Name: "normal-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + NodeName: scheduledNodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("100")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("100")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, normalPodConf), ns, normalPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, normalPodConf.Name, ns, framework.PollShortTimeout)) + verifyResult(cs, len(scheduledNodeNames)+1, 0, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 1) and related label in two nodes which will fail") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, len(scheduledNodeNames)+1, 1, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 2) and related label in two nodes which will success") + successPodConf := pausePodConfig{ + Name: "sucess-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(2, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), scheduledNodeNames[0]) + }) + }) + + // Test scenario: + // 1. Create nmnode for every worker node and add same extra resource to all nmnodes and nodes. + // Note that nmnode and node on the same host share the same extra resource. + // The scenario looks like this: + // ┌────────────┐ ┌────────────┐ ┌────────────┐ + // │ host 1 │ │ host 2 │ │ host 3 │ + // │┌──────────┐│ │┌──────────┐│ │┌──────────┐│ + // ││ node 1 ││ ││ node 2 ││ ││ node 3 ││ + // │└──────────┘│ │└──────────┘│ │└──────────┘│ + // │┌──────────┐│ │┌──────────┐│ │┌──────────┐│ + // ││ nmnode 1 ││ ││ nmnode 2 ││ ││ nmnode 3 ││ + // │└──────────┘│ │└──────────┘│ │└──────────┘│ + // │ extra 100% │ │ extra 100% │ │ extra 100% │ + // └────────────┘ └────────────┘ └────────────┘ + // 2. Create basicPods with same pod topology spread constraints(MaxSkew = 1) and related label. + // Note that the first pod requests 10% of the resources, and the remaining pods request 70% of the resources. + // Set odd-numbered pods to kubelet management type and even-numbered pods to node-manager management type + // 3. Wait for the pods to be scheduled and check whether pods are evenly scheduled to each host. + // 4. Create one normal pod[kubelet] with related label that needs 10% of the extra resource to the node-1 where the first pod is scheduled as mentioned above + // 5. Make sure the normal pod is scheduled to the node-1. + // 6. Create one failedPod[node-manager] with same pod topology spread constraints(MaxSkew = 1) and related label that needs 40% of the extra resource. + // 7. Make sure the failedPod is not scheduled becaues it only can be scheduled to the node-1 due the resource limit. + // But the node-1 doesn't satisfy the the pod topology spread constraints. + // Because when the failedPod is scheduled, the skew will be 2 which is greater than MaxSkew = 1. + // 8. Create one successPod[node-manager] with same pod topology spread constraints(MaxSkew = 2) and related label that needs 40% of the extra resource. + // 9. Make sure the successPod is scheduled to the node-1, because it satisfies the pod topology spread constraints and has enough resource. + /* + Testname: Scheduling pods[node-manager] with pod topology spread constraints for mix of nmnodes and nodes + Description: Scheduling MUST meet pod topology spread requirements. */ - //framework.ConformanceIt("", func() { - //}) + ginkgo.Context("validates pod scheduling fits pod topology spread constraints[DoNotSchedule] for mix of nmnodes and nodes", func() { + testLabelKey := "godel.bytedance.com/test-label-pod-topology" + testLabelValue := "test" + var beardsecond v1.ResourceName = "example.com/beardsecond" + var scheduledNodeNames []string + + ginkgo.BeforeEach(func() { + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + + ginkgo.By("Create nmnode for every node") + // Get node object:) + for _, testNodeName := range workerNodes.List() { + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nmnodeTemplate := GetNMNodeTemplateByNode(node) + (*nmnodeTemplate.Status.ResourceCapacity)[beardsecond] = resource.MustParse("1000") + (*nmnodeTemplate.Status.ResourceAllocatable)[beardsecond] = resource.MustParse("1000") + _, err = fs.NodeV1alpha1().NMNodes().Create(context.TODO(), nmnodeTemplate, metav1.CreateOptions{}) + framework.ExpectNoError(err, "unable to create NM node for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + nodeCopy.ResourceVersion = "0" + + nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000") + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName) + } + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Remove nmnodes") + for _, testNodeName := range workerNodes.List() { + // remove nmnodes: + if testNodeName != "" { + err := fs.NodeV1alpha1().NMNodes().Delete(context.TODO(), testNodeName, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "unable to remove nmnode %v", testNodeName) + + // Get node object: + node, err := cs.CoreV1().Nodes().Get(context.TODO(), testNodeName, metav1.GetOptions{}) + framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName) + + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + delete(nodeCopy.Status.Capacity, beardsecond) + _, err = cs.CoreV1().Nodes().UpdateStatus(context.TODO(), nodeCopy, metav1.UpdateOptions{}) + framework.ExpectNoError(err, "unable to update node %v", testNodeName) + } + } + }) + + ginkgo.It("verify pod topology spread constraints[DoNotSchedule] for mix of nmnodes and nodes", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + ginkgo.By("Trying to create pod with pod topology spread constraints(MaxSkew = 1) and related label in every node") + for i := 0; i < len(workerNodes); i++ { + requestedResource := "" + if i == 0 { + requestedResource = "100" + } else { + requestedResource = "700" + } + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + Limits: v1.ResourceList{beardsecond: resource.MustParse(requestedResource)}, + }, + } + if i%2 == 0 { + basicPodConf.Annotations = WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}) + } else { + basicPodConf.Annotations = WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}) + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + scheduledNodeNames = append(scheduledNodeNames, GetPod(f, ns, basicPodConf.Name).Spec.NodeName) + } + framework.ExpectConsistOf(scheduledNodeNames, workerNodes.List()) + + ginkgo.By(fmt.Sprintf("Trying to create 1 normal pods with related label in %v", scheduledNodeNames[0])) + normalPodConf := pausePodConfig{ + Name: "normal-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + NodeName: scheduledNodeNames[0], + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("100")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("100")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, normalPodConf), ns, normalPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, normalPodConf.Name, ns, framework.PollShortTimeout)) + verifyResult(cs, len(scheduledNodeNames)+1, 0, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 1) and related label in two nodes which will fail") + failedPodConf := pausePodConfig{ + Name: "failed-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(1, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, len(scheduledNodeNames)+1, 1, ns) + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 2) and related label in two nodes which will success") + successPodConf := pausePodConfig{ + Name: "sucess-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKey: testLabelValue}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{getPodTopologySpreadConstraint(2, map[string]string{testLabelKey: testLabelValue}, v1.LabelHostname)}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{beardsecond: resource.MustParse("400")}, + Limits: v1.ResourceList{beardsecond: resource.MustParse("400")}, + }, + } + framework.ExpectEqual(runPodAndGetNodeName(f, successPodConf), scheduledNodeNames[0]) + }) + }) + + // Test scenario: + // 1. Add extra label to all worker nodes. + // 2. Create a pod on each worker node and set the pod label to label[node index%2] + // 3. Wait for the pods to be scheduled and check whether pods are scheduled to right nodes. + // 4. Create one pod with all two labels and set it up to two topology spread constraints. + // One is for the label[0] and the topologyKey is host name, and the other is for the label[1] and the topologyKey is the extra label.The maxskew of both is 1 + // 5. Make sure the pod is not scheduled. Becaues fot the label[0] topology spread constraints, it can only be scheduled to the node without a pod with the label[0], + // for the label[1] topology spread constraints, it can only be scheduled to the node without a pod with the label[1]. + // But there is no such node, every node has a pod with the label[0] or the label[1]. + /* + Testname: Scheduling pods with pod topology spread constraints not matching + Description: Scheduling MUST be failed if it not meet pod topology spread requirements. + */ + framework.ConformanceIt("validates pod topology spread if not matching ", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + testLabelKeys := []string{"godel.bytedance.com/test-label-pod-topology1", "godel.bytedance.com/test-label-pod-topology2"} + testLabelValues := []string{"test", "foo"} + extraHostTopologyLabelKey := "kubernetes.io/topology-extra-host" + workerNodeNameList := workerNodes.List() + WaitForStableCluster(cs, workerNodes) + ginkgo.By("cluster is stable") + + ginkgo.By("Trying to add extra topology label to every nodes") + for _, nodeName := range workerNodeNameList { + topologyValue := fmt.Sprintf("extra-host-%s", nodeName) + framework.AddOrUpdateLabelOnNode(cs, nodeName, extraHostTopologyLabelKey, topologyValue) + framework.ExpectNodeHasLabel(cs, nodeName, extraHostTopologyLabelKey, topologyValue) + defer framework.RemoveLabelOffNode(cs, nodeName, extraHostTopologyLabelKey) + } + + ginkgo.By("Trying to schedule pods with two different labels to every nodes") + for i := 0; i < len(workerNodeNameList); i++ { + basicPodConf := pausePodConfig{ + Name: "basic-pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKeys[i%2]: testLabelValues[i%2]}, + NodeName: workerNodeNameList[i], + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, basicPodConf), ns, basicPodConf.Name, true) + framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(f.ClientSet, basicPodConf.Name, ns, framework.PollShortTimeout)) + framework.ExpectEqual(GetPod(f, ns, basicPodConf.Name).Spec.NodeName, workerNodeNameList[i]) + } + + ginkgo.By("Trying to create 1 pod with pod topology spread constraints(MaxSkew = 1) and related label not matching") + failedPodConf := pausePodConfig{ + Name: "pod-" + string(uuid.NewUUID()), + Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{podtopologyspread.Name}), + Labels: map[string]string{testLabelKeys[0]: testLabelValues[0], testLabelKeys[1]: testLabelValues[1]}, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + getPodTopologySpreadConstraint(1, map[string]string{testLabelKeys[0]: testLabelValues[0]}, v1.LabelHostname), + getPodTopologySpreadConstraint(1, map[string]string{testLabelKeys[1]: testLabelValues[1]}, extraHostTopologyLabelKey), + }, + } + WaitForSchedulerAfterAction(f, createPausePodAction(f, failedPodConf), ns, failedPodConf.Name, false) + verifyResult(cs, len(workerNodeNameList), 1, ns) + }) }) // printAllPodsOnNode outputs status of all kubelet pods into log. @@ -1208,6 +2110,46 @@ func Get2NodesThatCanRunPod(f *framework.Framework) []string { return []string{firstNode, secondNode} } +func getInterPodAffinity(affinityMap, antiAffinityMap map[string]string) *v1.Affinity { + affinity := v1.Affinity{} + if affinityMap != nil { + affinity.PodAffinity = &v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: affinityMap, + }, + TopologyKey: v1.LabelHostname, + }, + }, + } + } + if antiAffinityMap != nil { + affinity.PodAntiAffinity = &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: antiAffinityMap, + }, + TopologyKey: v1.LabelHostname, + }, + }, + } + } + return &affinity +} + +func getPodTopologySpreadConstraint(maxSkew int32, matchLabels map[string]string, topologyKey string) v1.TopologySpreadConstraint { + return v1.TopologySpreadConstraint{ + MaxSkew: maxSkew, + TopologyKey: topologyKey, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: matchLabels, + }, + } +} + func getNodeThatCanRunPodWithoutToleration(f *framework.Framework) string { ginkgo.By("Trying to launch a pod without a toleration to get a node which can launch it.") return runPodAndGetNodeName(f, pausePodConfig{ @@ -1330,3 +2272,26 @@ func setNodeUnschedulableAction(cs clientset.Interface, nodeName string, unsched return setNodeUnschedulable(cs, nodeName, unschedulabel) } } + +func GetNMNodeTemplateByNode(node *v1.Node) *nodev1alpha1.NMNode { + // Note if capacity takes a copy of capacity, + // then when nodeinfo calculates the overall remaining resources, + // it multiplies the resources consumed by the node by 2, which is wrong. + capacity := node.Status.Allocatable.DeepCopy() + allocatable := node.Status.Allocatable.DeepCopy() + return &nodev1alpha1.NMNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + Namespace: node.Namespace, + Labels: node.Labels, + Annotations: node.Annotations, + }, + Spec: nodev1alpha1.NMNodeSpec{ + Taints: node.Spec.Taints, + }, + Status: nodev1alpha1.NMNodeStatus{ + ResourceCapacity: &capacity, + ResourceAllocatable: &allocatable, + }, + } +} diff --git a/test/e2e/scheduling/soft_constraints.go b/test/e2e/scheduling/soft_constraints.go index 061050a4..d863c8ca 100644 --- a/test/e2e/scheduling/soft_constraints.go +++ b/test/e2e/scheduling/soft_constraints.go @@ -33,8 +33,11 @@ import ( "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" + godelclient "github.com/kubewharf/godel-scheduler-api/pkg/client/clientset/versioned" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/interpodaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodeaffinity" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/nodepreferavoidpods" + "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/podtopologyspread" "github.com/kubewharf/godel-scheduler/pkg/scheduler/framework/plugins/tainttoleration" schedutil "github.com/kubewharf/godel-scheduler/pkg/util" podutil "github.com/kubewharf/godel-scheduler/pkg/util/pod" @@ -43,6 +46,7 @@ import ( e2enode "github.com/kubewharf/godel-scheduler/test/e2e/framework/node" e2epod "github.com/kubewharf/godel-scheduler/test/e2e/framework/pod" e2erc "github.com/kubewharf/godel-scheduler/test/e2e/framework/rc" + e2eskipper "github.com/kubewharf/godel-scheduler/test/e2e/framework/skipper" testutils "github.com/kubewharf/godel-scheduler/test/utils" imageutils "github.com/kubewharf/godel-scheduler/test/utils/image" ) @@ -130,16 +134,22 @@ func removeAvoidPodsOffNode(c clientset.Interface, nodeName string) { // This test suite is used to verifies scheduler soft constraints functions var _ = SIGDescribe("SchedulingSoftConstraints [Serial]", func() { var cs clientset.Interface + var fs godelclient.Interface var nodeList *v1.NodeList var systemPodsNo int var ns string f := framework.NewDefaultFramework("sched-priority") ginkgo.AfterEach(func() { + for _, node := range nodeList.Items { + err := fs.NodeV1alpha1().NMNodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + } }) ginkgo.BeforeEach(func() { cs = f.ClientSet + fs = f.Godelclient ns = f.Namespace.Name nodeList = &v1.NodeList{} var err error @@ -151,97 +161,225 @@ var _ = SIGDescribe("SchedulingSoftConstraints [Serial]", func() { } framework.ExpectNoErrorWithOffset(0, err) + for _, node := range nodeList.Items { + nmnode := GetNMNodeTemplateByNode(&node) + _, err := fs.NodeV1alpha1().NMNodes().Create(context.TODO(), nmnode, metav1.CreateOptions{}) + framework.ExpectNoError(err) + } + err = framework.CheckTestingNSDeletedExcept(cs, ns) framework.ExpectNoError(err) err = e2epod.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, map[string]string{}) framework.ExpectNoError(err) }) - // TODO interpodaffinity plugin should be supported - // ginkgo.It("Pod should be scheduled to node that don't match the PodAntiAffinity terms", func() { - - // e2eskipper.SkipUnlessNodeCountIsAtLeast(2) - - // ginkgo.By("Trying to launch a pod with a label to get a node which can launch it.") - // pod := runPausePod(f, pausePodConfig{ - // Name: "pod-with-label-security-s1", - // Annotations: GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), - // Labels: map[string]string{"security": "S1"}, - // }) - // nodeName := pod.Spec.NodeName - - // k := v1.LabelHostname - // ginkgo.By("Verifying the node has a label " + k) - // node, err := cs.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) - // framework.ExpectNoError(err) - // if _, hasLabel := node.Labels[k]; !hasLabel { - // // If the label is not exists, label all nodes for testing. - - // ginkgo.By("Trying to apply a label on the found node.") - // k = "kubernetes.io/e2e-node-topologyKey" - // v := "topologyvalue1" - // framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v) - // framework.ExpectNodeHasLabel(cs, nodeName, k, v) - // defer framework.RemoveLabelOffNode(cs, nodeName, k) - - // ginkgo.By("Trying to apply a label on other nodes.") - // v = "topologyvalue2" - // for _, node := range nodeList.Items { - // if node.Name != nodeName { - // framework.AddOrUpdateLabelOnNode(cs, node.Name, k, v) - // framework.ExpectNodeHasLabel(cs, node.Name, k, v) - // defer framework.RemoveLabelOffNode(cs, node.Name, k) - // } - // } - // } - - // // make the nodes have balanced cpu,mem usage - // err = createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6) - // framework.ExpectNoError(err) - // ginkgo.By("Trying to launch the pod with podAntiAffinity.") - // labelPodName := "pod-with-pod-antiaffinity" - // pod = createPausePod(f, pausePodConfig{ - // Resources: podRequestedResource, - // Name: labelPodName, - // Annotations: WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), []string{nodeaffinity.Name}), - // Affinity: &v1.Affinity{ - // PodAntiAffinity: &v1.PodAntiAffinity{ - // PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ - // { - // PodAffinityTerm: v1.PodAffinityTerm{ - // LabelSelector: &metav1.LabelSelector{ - // MatchExpressions: []metav1.LabelSelectorRequirement{ - // { - // Key: "security", - // Operator: metav1.LabelSelectorOpIn, - // Values: []string{"S1", "value2"}, - // }, - // { - // Key: "security", - // Operator: metav1.LabelSelectorOpNotIn, - // Values: []string{"S2"}, - // }, { - // Key: "security", - // Operator: metav1.LabelSelectorOpExists, - // }, - // }, - // }, - // TopologyKey: k, - // Namespaces: []string{ns}, - // }, - // Weight: 10, - // }, - // }, - // }, - // }, - // }) - // ginkgo.By("Wait the pod becomes running") - // framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) - // labelPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), labelPodName, metav1.GetOptions{}) - // framework.ExpectNoError(err) - // ginkgo.By("Verify the pod was scheduled to the expected node.") - // framework.ExpectNotEqual(labelPod.Spec.NodeName, nodeName) - // }) + // Create a pod with a label and then create a pod with preferred podAntiAffinity terms that match the label. + // Verify that the pod is not scheduled to the node that the first pod is scheduled to. + ginkgo.It("Pod should be scheduled to node that don't match the PodAntiAffinity terms", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + ginkgo.By("Trying to launch a pod with a label to get a node which can launch it.") + pod := runPausePod(f, pausePodConfig{ + Name: "pod-with-label-security-s1", + Annotations: GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + Labels: map[string]string{"security": "S1"}, + }) + nodeName := pod.Spec.NodeName + + k := v1.LabelHostname + ginkgo.By("Verifying the node has a label " + k) + node, err := cs.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + if _, hasLabel := node.Labels[k]; !hasLabel { + // If the label is not exists, label all nodes for testing. + + ginkgo.By("Trying to apply a label on the found node.") + k = "kubernetes.io/e2e-node-topologyKey" + v := "topologyvalue1" + framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v) + framework.ExpectNodeHasLabel(cs, nodeName, k, v) + defer framework.RemoveLabelOffNode(cs, nodeName, k) + + ginkgo.By("Trying to apply a label on other nodes.") + v = "topologyvalue2" + for _, node := range nodeList.Items { + if node.Name != nodeName { + framework.AddOrUpdateLabelOnNode(cs, node.Name, k, v) + framework.ExpectNodeHasLabel(cs, node.Name, k, v) + defer framework.RemoveLabelOffNode(cs, node.Name, k) + } + } + } + + // make the nodes have balanced cpu,mem usage + ginkgo.By("Trying to create balanced pods for all nodes.") + err = createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6) + framework.ExpectNoError(err) + ginkgo.By("Trying to launch the pod with podAntiAffinity.") + labelPodName := "pod-with-pod-antiaffinity" + pod = createPausePod(f, pausePodConfig{ + Resources: podRequestedResource, + Name: labelPodName, + Annotations: WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + []string{interpodaffinity.Name}), []string{interpodaffinity.Name}), + Affinity: &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1", "value2"}, + }, + { + Key: "security", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"S2"}, + }, { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: k, + Namespaces: []string{ns}, + }, + Weight: 10, + }, + }, + }, + }, + }) + ginkgo.By("Wait the pod becomes running") + framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) + labelPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), labelPodName, metav1.GetOptions{}) + framework.ExpectNoError(err) + ginkgo.By("Verify the pod was scheduled to the expected node.") + framework.ExpectNotEqual(labelPod.Spec.NodeName, nodeName) + }) + + // Create a pod[node-manager] with a label and then create a pod[kubelet] with preferred podAntiAffinity terms that match the label. + // Verify that the pod is not scheduled to the node that the first pod is scheduled to. + ginkgo.It("Pod should be scheduled to node that don't match the PodAntiAffinity terms for mix of nmnodes and nodes", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + ginkgo.By("Trying to launch a pod with a label to get a node which can launch it.") + pod := runPausePod(f, pausePodConfig{ + Name: "pod-with-label-security-s1", + Annotations: GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), + Labels: map[string]string{"security": "S1"}, + }) + nodeName := pod.Spec.NodeName + + // make the nodes have balanced cpu,mem usage + ginkgo.By("Trying to create balanced pods for all nodes.") + err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6) + framework.ExpectNoError(err) + ginkgo.By("Trying to launch the pod with podAntiAffinity.") + labelPodName := "pod-with-pod-antiaffinity" + pod = createPausePod(f, pausePodConfig{ + Resources: podRequestedResource, + Name: labelPodName, + Annotations: WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + []string{interpodaffinity.Name}), []string{interpodaffinity.Name}), + Affinity: &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + PodAffinityTerm: v1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "security", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"S1", "value2"}, + }, + { + Key: "security", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{"S2"}, + }, { + Key: "security", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: v1.LabelHostname, + Namespaces: []string{ns}, + }, + Weight: 10, + }, + }, + }, + }, + }) + ginkgo.By("Wait the pod becomes running") + framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) + labelPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), labelPodName, metav1.GetOptions{}) + framework.ExpectNoError(err) + ginkgo.By("Verify the pod was scheduled to the expected node.") + framework.ExpectNotEqual(labelPod.Spec.NodeName, nodeName) + }) + + // Create as many pods as the number of nodes with pod topology sprea constrain (MaxSkew = 1) and check whether they are evenly scheduled on each node. + ginkgo.It("Pod should be evenly scheduled to node that match the preferred pod topology spread constraint", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + testLabelKey := "godel.bytedance.com/test-label-preferred-pod-topology" + testLabelValue := "test-value-pod-topology" + nodeListNames := make([]string, 0, len(nodeList.Items)) + for _, node := range nodeList.Items { + nodeListNames = append(nodeListNames, node.Name) + ginkgo.By(fmt.Sprintf("node %s has label %s=%s", node.Name, v1.LabelHostname, node.Labels[v1.LabelHostname])) + } + scheduledNodeNames := make([]string, 0, len(nodeListNames)) + + // make the nodes have balanced cpu,mem usage + ginkgo.By("Trying to create balanced pods for all nodes.") + err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6) + framework.ExpectNoError(err) + ginkgo.By(fmt.Sprintf("Trying to launch %d pod with preferred pod topology spread constraints evenly.", len(nodeList.Items))) + for i := 0; i < len(nodeListNames); i++ { + pod := createPausePod(f, pausePodConfig{ + // Because the github ci machine has limited resources, the kind-control-plane node does not have enough resources + // So we don't set the resource requests and limits for the pod + // Resources: podRequestedResource, + Name: "topology-spread-pod-" + string(uuid.NewUUID()), + Labels: map[string]string{testLabelKey: testLabelValue}, + Annotations: WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + []string{podtopologyspread.Name}), []string{podtopologyspread.Name}), + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.ScheduleAnyway, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{testLabelKey: testLabelValue}, + }, + }, + }, + }) + + ginkgo.By("Wait the pod becomes running") + framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) + scheduledPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + scheduledNodeNames = append(scheduledNodeNames, scheduledPod.Spec.NodeName) + ginkgo.By(fmt.Sprintf("Pod %s was scheduled to node %s", pod.Name, scheduledPod.Spec.NodeName)) + } + + ginkgo.By("Verify the pod was evenly scheduled to the nodes.") + framework.ExpectConsistOf(scheduledNodeNames, nodeListNames) + }) ginkgo.It("Pod should avoid nodes that have avoidPod annotation", func() { nodeName := nodeList.Items[0].Name @@ -306,6 +444,69 @@ var _ = SIGDescribe("SchedulingSoftConstraints [Serial]", func() { } }) + // Create as many pods as the number of nodes with pod topology sprea constrain (MaxSkew = 1) and check whether they are evenly scheduled on each node. + ginkgo.It("Pod that match the preferred pod topology spread constraint should be evenly scheduled to nmnodes and nodes", func() { + + e2eskipper.SkipUnlessNodeCountIsAtLeast(2) + + testLabelKey := "godel.bytedance.com/test-label-preferred-pod-topology" + testLabelValue := "test-value-pod-topology" + nodeListNames := make([]string, 0, len(nodeList.Items)) + for _, node := range nodeList.Items { + nodeListNames = append(nodeListNames, node.Name) + ginkgo.By(fmt.Sprintf("node %s has label %s=%s", node.Name, v1.LabelHostname, node.Labels[v1.LabelHostname])) + } + scheduledNodeNames := make([]string, 0, len(nodeListNames)) + + // make the nodes have balanced cpu,mem usage + ginkgo.By("Trying to create balanced pods for all nodes.") + err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6) + framework.ExpectNoError(err) + ginkgo.By(fmt.Sprintf("Trying to launch %d pod with preferred pod topology spread constraints evenly.", len(nodeList.Items))) + for i := 0; i < len(nodeListNames); i++ { + pod := createPausePod(f, pausePodConfig{ + // Because the github ci machine has limited resources, the kind-control-plane node does not have enough resources + // So we don't set the resource requests and limits for the pod + // Resources: podRequestedResource, + Name: "topology-spread-pod-" + string(uuid.NewUUID()), + Labels: map[string]string{testLabelKey: testLabelValue}, + Annotations: WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + []string{podtopologyspread.Name}), []string{podtopologyspread.Name}), + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelHostname, + WhenUnsatisfiable: v1.ScheduleAnyway, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{testLabelKey: testLabelValue}, + }, + }, + }, + }) + + if i%2 == 0 { + pod.Annotations = WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.NodeManager), + []string{podtopologyspread.Name}), []string{podtopologyspread.Name}) + } else { + pod.Annotations = WithSoftConstraints( + WithHardConstraints(GetPodAnnotations(podutil.GuaranteedPod, podutil.Kubelet), + []string{podtopologyspread.Name}), []string{podtopologyspread.Name}) + } + + ginkgo.By("Wait the pod becomes running") + framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) + scheduledPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + scheduledNodeNames = append(scheduledNodeNames, scheduledPod.Spec.NodeName) + ginkgo.By(fmt.Sprintf("Pod %s was scheduled to node %s", pod.Name, scheduledPod.Spec.NodeName)) + } + + ginkgo.By("Verify the pod was evenly scheduled to the nodes.") + framework.ExpectConsistOf(scheduledNodeNames, nodeListNames) + }) + ginkgo.It("Pod should be preferably scheduled to nodes pod can tolerate[Tolerate]", func() { // make the nodes have balanced cpu,mem usage ratio err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.5)