Skip to content

Commit 4a119be

Browse files
paulyeo21Kwangu Paul Yeo
andauthored
waitUntilProxyReady feature (#598)
* Wait for envoy to initialize * Move envoy container to front of list of pod app containers * Update tests to reflect change in ordering * Validate behavior with integration test * Implement postStart hook * Validate server_info LIVE does not guarantee envoy mesh * Validate integration test by checking annotation * Verison control integration test docker images * Provision role and rolebinding * Refactor provisioning functions to use kubernetes SDK * More refactoring of test * Use Job instead of Deployment * Add feature flag * Unit test feature flag * Use existing vars instead of passing waitUntilProxyStarts flag individually * Add config to test.yaml * Fix nit * Fix unit test * Set backofflimit to 1 * Add sidecar feature controller in CI integration tests * Fix typo * Add default for feature flag to false in Makefile * Pass as command line arg not environment var * Wait for controller upgrade * Support agent poll envoyReadiness command * Rename from wait for start to ready * Fix unit test * Update helm README * Add integration test validating PostStart with Envoy v1.22 * Run without setting sidecar image tag * Set sidecar image tag * Validate all containers are ready * Fix unit test * Parse and check envoy version * Add comment * Fix unit test * Add env vars to start agent server * Regex version to 1.22.2-appmesh.1 * Enable sidecar test suite * Properly pass config postStart values * Fix unit tests * Fix bash rematch condition * Assign sidecar image repo and tag configs separately * Clean up argument passing * Default sidecar image tag * Rename v1.22 sidecar test suite namespace * Properly cleanup sidecar test resources Co-authored-by: Kwangu Paul Yeo <[email protected]>
1 parent 40f2929 commit 4a119be

28 files changed

+2278
-51
lines changed

Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ VERSION ?= $(shell git describe --dirty --tags --always)
88
IMAGE ?= $(REPO):$(VERSION)
99
PREVIEW=false
1010
ENABLE_BACKEND_GROUPS?=false
11+
WAIT_PROXY_READY=false
12+
SIDECAR_IMAGE_TAG=v1.22.2.1-prod
1113

1214
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
1315
CRD_OPTIONS ?= "crd:trivialVersions=true,crdVersions=v1"
@@ -56,9 +58,14 @@ deploy: check-env manifests
5658
helm-lint:
5759
${MAKEFILE_PATH}/test/helm/helm-lint.sh
5860

59-
6061
helm-deploy: check-env manifests
61-
helm upgrade -i appmesh-controller config/helm/appmesh-controller --namespace appmesh-system --set image.repository=$(REPO) --set image.tag=$(VERSION) --set preview=$(PREVIEW) --set enableBackendGroups=$(ENABLE_BACKEND_GROUPS)
62+
helm upgrade -i appmesh-controller config/helm/appmesh-controller --namespace appmesh-system \
63+
--set image.repository=$(REPO) \
64+
--set image.tag=$(VERSION) \
65+
--set preview=$(PREVIEW) \
66+
--set enableBackendGroups=$(ENABLE_BACKEND_GROUPS) \
67+
--set sidecar.waitUntilProxyReady=$(WAIT_PROXY_READY) \
68+
--set sidecar.image.tag=$(SIDECAR_IMAGE_TAG)
6269

6370
# Generate manifests e.g. CRD, RBAC etc.
6471
manifests: controller-gen

config/helm/appmesh-controller/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,11 @@ Parameter | Description | Default
390390
`sidecar.resources.requests` | Envoy container resource requests | `requests: cpu 10m memory 32Mi`
391391
`sidecar.resources.limits` | Envoy container resource limits | `limits: cpu "" memory ""`
392392
`sidecar.lifecycleHooks.preStopDelay` | Envoy container PreStop Hook Delay Value | `20s`
393+
`sidecar.lifecycleHooks.postStartInterval` | Envoy container PostStart Hook Interval Value | `5s`
394+
`sidecar.lifecycleHooks.postStartTimeout` | Envoy container PostStart Hook Timeout Value | `180s`
393395
`sidecar.probes.readinessProbeInitialDelay` | Envoy container Readiness Probe Initial Delay | `1s`
394396
`sidecar.probes.readinessProbePeriod` | Envoy container Readiness Probe Period | `10s`
397+
`sidecar.waitUntilProxyReady` | Enable pod postStart hook to delay application startup until proxy is ready to accept traffic | `false`
395398
`init.image.repository` | Route manager image repository | `840364872350.dkr.ecr.us-west-2.amazonaws.com/aws-appmesh-proxy-route-manager`
396399
`init.image.tag` | Route manager image tag | `<VERSION>`
397400
`stats.tagsEnabled` | If `true`, Envoy should include app-mesh tags | `false`

config/helm/appmesh-controller/templates/deployment.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,17 @@ spec:
5656
args:
5757
- --enable-leader-election=true
5858
- --log-level={{ .Values.log.level }}
59-
- --sidecar-image={{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}
59+
- --sidecar-image-repository={{ .Values.sidecar.image.repository }}
60+
- --sidecar-image-tag={{ .Values.sidecar.image.tag }}
6061
- --sidecar-cpu-requests={{ .Values.sidecar.resources.requests.cpu }}
6162
- --sidecar-memory-requests={{ .Values.sidecar.resources.requests.memory }}
6263
- --sidecar-cpu-limits={{ .Values.sidecar.resources.limits.cpu }}
6364
- --sidecar-memory-limits={{ .Values.sidecar.resources.limits.memory }}
6465
- --init-image={{ .Values.init.image.repository }}:{{ .Values.init.image.tag }}
6566
- --enable-stats-tags={{ .Values.stats.tagsEnabled }}
6667
- --prestop-delay={{ .Values.sidecar.lifecycleHooks.preStopDelay }}
68+
- --poststart-timeout={{ .Values.sidecar.lifecycleHooks.postStartTimeout }}
69+
- --poststart-interval={{ .Values.sidecar.lifecycleHooks.postStartInterval }}
6770
- --readiness-probe-initial-delay={{ .Values.sidecar.probes.readinessProbeInitialDelay }}
6871
- --readiness-probe-period={{ .Values.sidecar.probes.readinessProbePeriod }}
6972
- --envoy-admin-access-port={{ .Values.sidecar.envoyAdminAccessPort }}
@@ -114,6 +117,7 @@ spec:
114117
- --sidecar-log-level={{ .Values.sidecar.logLevel }}
115118
# this must be same as livenessProbe port which can be configured
116119
- --health-probe-port={{ .Values.livenessProbe.httpGet.port }}
120+
- --wait-until-proxy-ready={{ .Values.sidecar.waitUntilProxyReady }}
117121
{{- if .Values.env }}
118122
env:
119123
{{- range $key, $value := .Values.env }}

config/helm/appmesh-controller/test.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,13 @@ sidecar:
3535
lifecycleHooks:
3636
# sidecar.lifecycleHooks: Envoy PreStop Hook Delay
3737
preStopDelay: 20
38+
postStartTimeout: 180
39+
postStartInterval: 5
3840
probes:
3941
# sidecar.probes: Envoy Readiness Probe
4042
readinessProbeInitialDelay: 1
4143
readinessProbePeriod: 10
44+
waitUntilProxyReady: false
4245
init:
4346
image:
4447
repository: 840364872350.dkr.ecr.us-west-2.amazonaws.com/aws-appmesh-proxy-route-manager

config/helm/appmesh-controller/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,13 @@ sidecar:
3636
lifecycleHooks:
3737
# sidecar.lifecycleHooks: Envoy PreStop Hook Delay
3838
preStopDelay: 20
39+
postStartInterval: 5
40+
postStartTimeout: 180
3941
probes:
4042
# sidecar.probes: Envoy Readiness Probe
4143
readinessProbeInitialDelay: 1
4244
readinessProbePeriod: 10
45+
waitUntilProxyReady: false
4346
init:
4447
image:
4548
repository: 840364872350.dkr.ecr.us-west-2.amazonaws.com/aws-appmesh-proxy-route-manager

pkg/inject/config.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,24 @@ const (
1313
flagSdsUdsPath = "sds-uds-path"
1414
flagEnableBackendGroups = "enable-backend-groups"
1515

16-
flagSidecarImage = "sidecar-image"
16+
flagSidecarImageRepository = "sidecar-image-repository"
17+
flagSidecarImageTag = "sidecar-image-tag"
1718
flagSidecarCpuRequests = "sidecar-cpu-requests"
1819
flagSidecarMemoryRequests = "sidecar-memory-requests"
1920
flagSidecarCpuLimits = "sidecar-cpu-limits"
2021
flagSidecarMemoryLimits = "sidecar-memory-limits"
2122
flagPreview = "preview"
2223
flagLogLevel = "sidecar-log-level"
2324
flagPreStopDelay = "prestop-delay"
25+
flagPostStartTimeout = "poststart-timeout"
26+
flagPostStartInterval = "poststart-interval"
2427
flagReadinessProbeInitialDelay = "readiness-probe-initial-delay"
2528
flagReadinessProbePeriod = "readiness-probe-period"
2629
flagEnvoyAdminAccessPort = "envoy-admin-access-port"
2730
flagEnvoyAdminAccessLogFile = "envoy-admin-access-log-file"
2831
flagEnvoyAdminAccessEnableIpv6 = "envoy-admin-access-enable-ipv6"
2932
flagDualStackEndpoint = "dual-stack-endpoint"
33+
flagWaitUntilProxyReady = "wait-until-proxy-ready"
3034

3135
flagInitImage = "init-image"
3236
flagIgnoredIPs = "ignored-ips"
@@ -66,20 +70,24 @@ type Config struct {
6670
EnableBackendGroups bool
6771

6872
// Sidecar settings
69-
SidecarImage string
73+
SidecarImageRepository string
74+
SidecarImageTag string
7075
SidecarCpuRequests string
7176
SidecarMemoryRequests string
7277
SidecarCpuLimits string
7378
SidecarMemoryLimits string
7479
Preview bool
7580
LogLevel string
7681
PreStopDelay string
82+
PostStartTimeout int32
83+
PostStartInterval int32
7784
ReadinessProbeInitialDelay int32
7885
ReadinessProbePeriod int32
7986
EnvoyAdminAcessPort int32
8087
EnvoyAdminAccessLogFile string
8188
DualStackEndpoint bool
8289
EnvoyAdminAccessEnableIPv6 bool
90+
WaitUntilProxyReady bool
8391

8492
// Init container settings
8593
InitImage string
@@ -127,8 +135,9 @@ func (cfg *Config) BindFlags(fs *pflag.FlagSet) {
127135
fs.StringVar(&cfg.SdsUdsPath, flagSdsUdsPath, "/run/spire/sockets/agent.sock",
128136
"Unix Domain Socket path for SDS provider")
129137
fs.BoolVar(&cfg.EnableBackendGroups, flagEnableBackendGroups, false, "If enabled, experimental Backend Groups feature will be enabled.")
130-
fs.StringVar(&cfg.SidecarImage, flagSidecarImage, "public.ecr.aws/appmesh/aws-appmesh-envoy:v1.22.2.1-prod",
131-
"Envoy sidecar container image.")
138+
fs.StringVar(&cfg.SidecarImageRepository, flagSidecarImageRepository, "public.ecr.aws/appmesh/aws-appmesh-envoy",
139+
"Envoy sidecar container image repository.")
140+
fs.StringVar(&cfg.SidecarImageTag, flagSidecarImageTag, "v1.22.2.1-prod", "Envoy sidecar container image tag.")
132141
fs.StringVar(&cfg.SidecarCpuRequests, flagSidecarCpuRequests, "10m",
133142
"Sidecar CPU resources requests.")
134143
fs.StringVar(&cfg.SidecarMemoryRequests, flagSidecarMemoryRequests, "32Mi",
@@ -147,6 +156,10 @@ func (cfg *Config) BindFlags(fs *pflag.FlagSet) {
147156
"AWS App Mesh envoy access log path")
148157
fs.StringVar(&cfg.PreStopDelay, flagPreStopDelay, "20",
149158
"AWS App Mesh envoy preStop hook sleep duration")
159+
fs.Int32Var(&cfg.PostStartTimeout, flagPostStartTimeout, 180,
160+
"AWS App Mesh envoy postStart hook timeout duration")
161+
fs.Int32Var(&cfg.PostStartInterval, flagPostStartInterval, 5,
162+
"AWS App Mesh envoy postStart hook interval duration")
150163
fs.Int32Var(&cfg.ReadinessProbeInitialDelay, flagReadinessProbeInitialDelay, 1,
151164
"Number of seconds after Envoy has started before readiness probes are initiated")
152165
fs.Int32Var(&cfg.ReadinessProbePeriod, flagReadinessProbePeriod, 10,
@@ -192,6 +205,8 @@ func (cfg *Config) BindFlags(fs *pflag.FlagSet) {
192205
fs.BoolVar(&cfg.DualStackEndpoint, flagDualStackEndpoint, false, "Use DualStack Endpoint")
193206
fs.BoolVar(&cfg.DualStackEndpoint, flagEnvoyAdminAccessEnableIpv6, false, "Enable Admin access when using IPv6")
194207
fs.StringVar(&cfg.ClusterName, flagClusterName, "", "ClusterName in context")
208+
fs.BoolVar(&cfg.WaitUntilProxyReady, flagWaitUntilProxyReady, false,
209+
"Enable pod postStart hook to delay application startup until proxy is ready to accept traffic")
195210
}
196211

197212
func (cfg *Config) BindEnv() error {

pkg/inject/envoy.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,12 @@ type envoyMutatorConfig struct {
2323
adminAccessPort int32
2424
adminAccessLogFile string
2525
preStopDelay string
26+
postStartTimeout int32
27+
postStartInterval int32
2628
readinessProbeInitialDelay int32
2729
readinessProbePeriod int32
28-
sidecarImage string
30+
sidecarImageRepository string
31+
sidecarImageTag string
2932
sidecarCPURequests string
3033
sidecarMemoryRequests string
3134
sidecarCPULimits string
@@ -44,6 +47,7 @@ type envoyMutatorConfig struct {
4447
statsDPort int32
4548
statsDAddress string
4649
statsDSocketPath string
50+
waitUntilProxyReady bool
4751
controllerVersion string
4852
k8sVersion string
4953
useDualStackEndpoint bool
@@ -107,7 +111,14 @@ func (m *envoyMutator) mutate(pod *corev1.Pod) error {
107111
if m.mutatorConfig.enableSDS && !isSDSDisabled(pod) {
108112
mutateSDSMounts(pod, &container, m.mutatorConfig.sdsUdsPath)
109113
}
110-
pod.Spec.Containers = append(pod.Spec.Containers, container)
114+
115+
// waitUntilProxyReady requires starting sidecar container first
116+
if m.mutatorConfig.waitUntilProxyReady {
117+
pod.Spec.Containers = append([]corev1.Container{container}, pod.Spec.Containers...)
118+
} else {
119+
pod.Spec.Containers = append(pod.Spec.Containers, container)
120+
}
121+
111122
return nil
112123
}
113124

@@ -132,7 +143,10 @@ func (m *envoyMutator) buildTemplateVariables(pod *corev1.Pod) EnvoyTemplateVari
132143
AdminAccessPort: m.mutatorConfig.adminAccessPort,
133144
AdminAccessLogFile: m.mutatorConfig.adminAccessLogFile,
134145
PreStopDelay: m.mutatorConfig.preStopDelay,
135-
SidecarImage: m.mutatorConfig.sidecarImage,
146+
PostStartTimeout: m.mutatorConfig.postStartTimeout,
147+
PostStartInterval: m.mutatorConfig.postStartInterval,
148+
SidecarImageRepository: m.mutatorConfig.sidecarImageRepository,
149+
SidecarImageTag: m.mutatorConfig.sidecarImageTag,
136150
EnableXrayTracing: m.mutatorConfig.enableXrayTracing,
137151
XrayDaemonPort: m.mutatorConfig.xrayDaemonPort,
138152
XraySamplingRate: m.mutatorConfig.xraySamplingRate,
@@ -151,6 +165,7 @@ func (m *envoyMutator) buildTemplateVariables(pod *corev1.Pod) EnvoyTemplateVari
151165
K8sVersion: m.mutatorConfig.k8sVersion,
152166
UseDualStackEndpoint: useDualStackEndpoint,
153167
EnableAdminAccessForIpv6: m.mutatorConfig.enableAdminAccessIPv6,
168+
WaitUntilProxyReady: m.mutatorConfig.waitUntilProxyReady,
154169
}
155170
}
156171

0 commit comments

Comments
 (0)