Skip to content

Commit

Permalink
feat: Flag for enabling recording of failed evictions
Browse files Browse the repository at this point in the history
Signed-off-by: Grzegorz Głąb <[email protected]>
  • Loading branch information
grzesuav committed Dec 5, 2023
1 parent 89f453e commit eb11464
Show file tree
Hide file tree
Showing 15 changed files with 100 additions and 30 deletions.
1 change: 1 addition & 0 deletions cmd/descheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
fs.Float64Var(&rs.Tracing.SampleRate, "otel-sample-rate", 1.0, "Sample rate to collect the Traces")
fs.BoolVar(&rs.Tracing.FallbackToNoOpProviderOnError, "otel-fallback-no-op-on-error", false, "Fallback to NoOp Tracer in case of error")
fs.BoolVar(&rs.EnableHTTP2, "enable-http2", false, "If http/2 should be enabled for the metrics and health check")
fs.BoolVar(&rs.RecordEventsForEvictionErrors, "record-events-for-eviction-errors", false, "Set this flag to record events in case of eviction errors")

componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)

Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/componentconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type DeschedulerConfiguration struct {
// ClientConnection specifies the kubeconfig file and client connection settings to use when communicating with the apiserver.
// Refer to [ClientConnection](https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/componentconfig#ClientConnectionConfiguration) for more information.
ClientConnection componentbaseconfig.ClientConnectionConfiguration

// RecordEventsForEvictionErrors sets event recording in case of eviction error
RecordEventsForEvictionErrors bool
}

type TracingConfiguration struct {
Expand Down
1 change: 1 addition & 0 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
nodes,
!d.rs.DisableMetrics,
d.eventRecorder,
d.rs.RecordEventsForEvictionErrors,
)

d.runProfiles(ctx, client, nodes, podEvictor)
Expand Down
66 changes: 39 additions & 27 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,17 @@ type (
)

type PodEvictor struct {
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
metricsEnabled bool
eventRecorder events.EventRecorder
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
metricsEnabled bool
eventRecorder events.EventRecorder
recordEventsForEvictionErrors bool
}

func NewPodEvictor(
Expand All @@ -63,6 +64,7 @@ func NewPodEvictor(
nodes []*v1.Node,
metricsEnabled bool,
eventRecorder events.EventRecorder,
recordEventsForEvictionErrors bool,
) *PodEvictor {
nodePodCount := make(nodePodEvictedCount)
namespacePodCount := make(namespacePodEvictCount)
Expand All @@ -72,16 +74,17 @@ func NewPodEvictor(
}

return &PodEvictor{
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
metricsEnabled: metricsEnabled,
eventRecorder: eventRecorder,
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
metricsEnabled: metricsEnabled,
eventRecorder: eventRecorder,
recordEventsForEvictionErrors: recordEventsForEvictionErrors,
}
}

Expand Down Expand Up @@ -153,6 +156,10 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc()
}
if pe.recordEventsForEvictionErrors {
reason := extractReason(opts, strategy)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod cannot be evicted from %v node by sigs.k8s.io/descheduler: %v", pod.Spec.NodeName, err)
}
return false
}

Expand All @@ -169,18 +176,23 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName)
} else {
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName)
reason := opts.Reason
if len(reason) == 0 {
reason = strategy
if len(reason) == 0 {
reason = "NotSet"
}
}
reason := extractReason(opts, strategy)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return true
}

func extractReason(opts EvictOptions, strategy string) string {
reason := opts.Reason
if len(reason) == 0 {
reason = strategy
if len(reason) == 0 {
reason = "NotSet"
}
}
return reason
}

func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ func TestHighNodeUtilization(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down Expand Up @@ -645,6 +646,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
item.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestLowNodeUtilization(t *testing.T) {
n1NodeName := "n1"
n2NodeName := "n2"
Expand Down Expand Up @@ -895,6 +899,7 @@ func TestLowNodeUtilization(t *testing.T) {
test.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down Expand Up @@ -1067,6 +1072,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
item.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
5 changes: 5 additions & 0 deletions pkg/framework/plugins/podlifetime/pod_lifetime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestPodLifeTime(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
Expand Down Expand Up @@ -354,6 +358,7 @@ func TestPodLifeTime(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func buildTestPodWithImage(podName, node, image string) *v1.Pod {
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
Expand Down Expand Up @@ -322,6 +326,7 @@ func TestFindDuplicatePods(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

nodeFit := testCase.nodefit
Expand Down Expand Up @@ -771,6 +776,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
testCase.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultEvictorFilterArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
5 changes: 5 additions & 0 deletions pkg/framework/plugins/removefailedpods/failedpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

var OneHourInSeconds uint = 3600

func TestRemoveFailedPods(t *testing.T) {
Expand Down Expand Up @@ -298,6 +302,7 @@ func TestRemoveFailedPods(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func initPods(node *v1.Node) []*v1.Pod {
pods := make([]*v1.Pod, 0)

Expand Down Expand Up @@ -317,6 +321,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestPodAntiAffinity(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
Expand Down Expand Up @@ -242,6 +246,7 @@ func TestPodAntiAffinity(t *testing.T) {
test.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
Expand Down Expand Up @@ -368,6 +372,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func createNoScheduleTaint(key, value string, index int) v1.Taint {
return v1.Taint{
Key: "testTaint" + fmt.Sprintf("%v", index),
Expand Down Expand Up @@ -360,6 +364,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import (
"sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestTopologySpreadConstraint(t *testing.T) {
testCases := []struct {
name string
Expand Down Expand Up @@ -1465,6 +1469,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
tc.nodes,
false,
eventRecorder,
noRecordEventsForEvictionFailures,
)

defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
Expand Down
10 changes: 7 additions & 3 deletions pkg/framework/profile/profile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ import (
testutils "sigs.k8s.io/descheduler/test"
)

const (
noRecordEventsForEvictionFailures = false
)

func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -244,7 +248,7 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient)
defer eventBroadcaster.Shutdown()

podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder)
podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures)

prfl, err := NewProfile(
test.config,
Expand Down Expand Up @@ -393,7 +397,7 @@ func TestProfileExtensionPoints(t *testing.T) {
eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient)
defer eventBroadcaster.Shutdown()

podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder)
podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures)

prfl, err := NewProfile(
api.DeschedulerProfile{
Expand Down Expand Up @@ -605,7 +609,7 @@ func TestProfileExtensionPointOrdering(t *testing.T) {
eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient)
defer eventBroadcaster.Shutdown()

podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder)
podEvictor := evictions.NewPodEvictor(client, "policy/v1", false, nil, nil, nodes, true, eventRecorder, noRecordEventsForEvictionFailures)

prfl, err := NewProfile(
api.DeschedulerProfile{
Expand Down

0 comments on commit eb11464

Please sign in to comment.