Skip to content

Commit 796d042

Browse files
authored
implement more configuration through env vars and cli override args (#24)
1 parent f2b6255 commit 796d042

File tree

2 files changed

+103
-21
lines changed

2 files changed

+103
-21
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ The termination handler consists of a [ServiceAccount](https://kubernetes.io/doc
1010

1111
You can create and run all of these at once on your own Kubernetes cluster by running the following command:
1212
```
13-
kubectl apply -k https://github.com/aws/aws-node-termination-handler/config/base?ref=master
13+
kubectl apply -k 'https://github.com/aws/aws-node-termination-handler/config/base?ref=master'
1414
```
1515

1616
By default, the aws-node-termination-handler will run on all of your nodes (on-demand and spot). If your spot instances are labeled, you can configure aws-node-termination-handler to only run on your labeled spot nodes. If you're using the tag `lifecycle=Ec2Spot`, you can run the following to apply our spot-node-selector overlay:
1717

1818
```
19-
kubectl apply -k https://github.com/aws/aws-node-termination-handler/config/overlays/spot-node-selector?ref=master
19+
kubectl apply -k 'https://github.com/aws/aws-node-termination-handler/config/overlays/spot-node-selector?ref=master'
2020
```
2121

2222
If you're using a different key/value tag to label your spot nodes, you can write your own overlay to set a spot-node-selector while still receiving updates of the base kubernetes resource files. See our [spot-node-selector](https://github.com/aws/aws-node-termination-handler/tree/master/config/overlays/spot-node-selector) overlay for an example.

handler.go

Lines changed: 101 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ package main
1515

1616
import (
1717
"encoding/json"
18+
"flag"
19+
"fmt"
1820
"log"
1921
"math/rand"
2022
"net/http"
2123
"os"
24+
"strconv"
2225
"time"
2326

2427
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,10 +33,27 @@ import (
3033
const (
3134
nodeInterruptionDuration = 2 * time.Minute
3235
// EC2 Instance Metadata is configurable mainly for testing purposes
33-
instanceMetadataUrlConfigKey = "INSTANCE_METADATA_URL"
34-
defaultInstanceMetadataUrl = "http://169.254.169.254"
36+
instanceMetadataUrlConfigKey = "INSTANCE_METADATA_URL"
37+
dryRunConfigKey = "DRY_RUN"
38+
nodeNameConfigKey = "NODE_NAME"
39+
kubernetesServiceHostConfigKey = "KUBERNETES_SERVICE_HOST"
40+
kubernetesServicePortConfigKey = "KUBERNETES_SERVICE_PORT"
41+
deleteLocalDataConfigKey = "DELETE_LOCAL_DATA"
42+
ignoreDaemonSetsConfigKey = "IGNORE_DAEMON_SETS"
43+
podTerminationGracePeriodConfigKey = "GRACE_PERIOD"
44+
defaultInstanceMetadataUrl = "http://169.254.169.254"
3545
)
3646

47+
// arguments set via CLI, environment variables, or defaults
48+
var dryRun bool
49+
var nodeName string
50+
var metadataUrl string
51+
var ignoreDaemonSets bool
52+
var deleteLocalData bool
53+
var kubernetesServiceHost string
54+
var kubernetesServicePort string
55+
var podTerminationGracePeriod int
56+
3757
// InstanceActionDetail metadata structure for json parsing
3858
type InstanceActionDetail struct {
3959
InstanceId string `json:"instance-id"`
@@ -54,7 +74,6 @@ type InstanceAction struct {
5474
}
5575

5676
func requestMetadata() (*http.Response, error) {
57-
metadataUrl := getEnv(instanceMetadataUrlConfigKey, defaultInstanceMetadataUrl)
5877
return http.Get(metadataUrl + "/latest/meta-data/spot/instance-action")
5978
}
6079

@@ -123,28 +142,83 @@ func getDrainHelper(nodeName string) *drain.Helper {
123142
return &drain.Helper{
124143
Client: clientset,
125144
Force: true,
126-
GracePeriodSeconds: 30, //default k8s value
127-
IgnoreAllDaemonSets: true,
128-
Timeout: time.Second * 60,
145+
GracePeriodSeconds: podTerminationGracePeriod,
146+
IgnoreAllDaemonSets: ignoreDaemonSets,
147+
DeleteLocalData: deleteLocalData,
148+
Timeout: nodeInterruptionDuration,
129149
Out: os.Stdout,
130150
ErrOut: os.Stderr,
131151
}
132152
}
133153

134154
// Get env var or default
135-
func getEnv(key, fallback string) string {
155+
func getEnv(key string, fallback string) string {
136156
if value, ok := os.LookupEnv(key); ok {
137157
return value
138158
}
139159
return fallback
140160
}
141161

142-
func main() {
143-
nodeName := os.Getenv("NODE_NAME")
144-
if len(nodeName) == 0 {
145-
log.Fatalln("Failed to get NODE_NAME from environment. " +
146-
"Check that the kubernetes yaml file is configured correctly")
162+
// Parse env var to int if key exists
163+
func getIntEnv(key string, fallback int) int {
164+
envStrValue := getEnv(key, "")
165+
if envStrValue == "" {
166+
return fallback
167+
}
168+
envIntValue, err := strconv.Atoi(envStrValue)
169+
if err != nil {
170+
log.Fatalln("Env Var " + key + " must be an integer")
171+
}
172+
return envIntValue
173+
}
174+
175+
// Parse env var to boolean if key exists
176+
func getBoolEnv(key string, fallback bool) bool {
177+
envStrValue := getEnv(key, "")
178+
if envStrValue == "" {
179+
return fallback
180+
}
181+
envBoolValue, err := strconv.ParseBool(envStrValue)
182+
if err != nil {
183+
log.Fatalln("Env Var " + key + " must be either true or false")
184+
}
185+
return envBoolValue
186+
}
187+
188+
func parseCliArgs() {
189+
flag.BoolVar(&dryRun, "dry-run", getBoolEnv(dryRunConfigKey, false), "If true, only log if a node would be drained")
190+
flag.StringVar(&nodeName, "node-name", getEnv(nodeNameConfigKey, ""), "The kubernetes node name")
191+
flag.StringVar(&metadataUrl, "metadata-url", getEnv(instanceMetadataUrlConfigKey, defaultInstanceMetadataUrl), "The URL of EC2 instance metadata. This shouldn't need to be changed unless you are testing.")
192+
flag.BoolVar(&ignoreDaemonSets, "ignore-daemon-sets", getBoolEnv(ignoreDaemonSetsConfigKey, true), "If true, drain daemon sets when a spot interrupt is received.")
193+
flag.BoolVar(&deleteLocalData, "delete-local-data", getBoolEnv(deleteLocalDataConfigKey, true), "If true, do not drain pods that are using local node storage in emptyDir")
194+
flag.StringVar(&kubernetesServiceHost, "kubernetes-service-host", getEnv(kubernetesServiceHostConfigKey, ""), "[ADVANCED] The k8s service host to send api calls to.")
195+
flag.StringVar(&kubernetesServicePort, "kubernetes-service-port", getEnv(kubernetesServicePortConfigKey, ""), "[ADVANCED] The k8s service port to send api calls to.")
196+
flag.IntVar(&podTerminationGracePeriod, "grace-period", getIntEnv(podTerminationGracePeriodConfigKey, -1), "Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used.")
197+
198+
flag.Parse()
199+
200+
if nodeName == "" {
201+
log.Fatalln("You must provide a node-name to the CLI or NODE_NAME environment variable.")
147202
}
203+
// client-go expects these to be set in env vars
204+
os.Setenv(kubernetesServiceHostConfigKey, kubernetesServiceHost)
205+
os.Setenv(kubernetesServicePortConfigKey, kubernetesServicePort)
206+
207+
fmt.Printf("aws-node-termination-handler arguments: \n"+
208+
"\tdry-run: %t,\n"+
209+
"\tnode-name: %s,\n"+
210+
"\tmetadata-url: %s,\n"+
211+
"\tkubernetes-service-host: %s,\n"+
212+
"\tkubernetes-service-port: %s,\n"+
213+
"\tdelete-local-data: %t,\n"+
214+
"\tignore-daemon-sets: %t\n"+
215+
"\tgrace-period: %d\n",
216+
dryRun, nodeName, metadataUrl, kubernetesServiceHost, kubernetesServicePort, deleteLocalData, ignoreDaemonSets, podTerminationGracePeriod)
217+
}
218+
219+
func main() {
220+
var dryRunMessageSuffix = "but dry-run flag was set"
221+
parseCliArgs()
148222
helper := getDrainHelper(nodeName)
149223

150224
node, err := helper.Client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
@@ -155,15 +229,23 @@ func main() {
155229
log.Println("Kubernetes Spot Node Termination Handler has started successfully!")
156230
waitForTermination()
157231

158-
err = drain.RunCordonOrUncordon(helper, node, true)
159-
if err != nil {
160-
log.Fatalf("Couldn't cordon node %q: %s\n", nodeName, err.Error())
232+
if dryRun {
233+
log.Printf("Node %s would have been cordoned, %s", nodeName, dryRunMessageSuffix)
234+
} else {
235+
err = drain.RunCordonOrUncordon(helper, node, true)
236+
if err != nil {
237+
log.Fatalf("Couldn't cordon node %q: %s\n", nodeName, err.Error())
238+
}
161239
}
162240

163-
// Delete all pods on the node
164-
err = drain.RunNodeDrain(helper, nodeName)
165-
if err != nil {
166-
log.Fatalln(err.Error())
241+
if dryRun {
242+
log.Printf("Node %s would have been drained, %s", nodeName, dryRunMessageSuffix)
243+
} else {
244+
// Delete all pods on the node
245+
err = drain.RunNodeDrain(helper, nodeName)
246+
if err != nil {
247+
log.Fatalln(err.Error())
248+
}
167249
}
168250

169251
log.Printf("Node %q successfully drained.\n", nodeName)

0 commit comments

Comments
 (0)