Skip to content

Commit

Permalink
entrypoint: add liveness probe endpoint to the sidecar (#33)
Browse files Browse the repository at this point in the history
* entrypoint: add liveness probe endpoint to the sidecar

This change uses the liveness probes as a way to guarantee a regular
period of uninterrupted CPU for the sidecar to complete its prometheus
scrapes and flushes to GMP.

Change-Id: Ic6f0ed38ade237d6179dea12268cd2575d221146

* entrypoint: handle serving errors

Change-Id: Id31f5294721e6583af84aa4127b8bae21a243267
  • Loading branch information
ridwanmsharif authored May 28, 2024
1 parent 18777a5 commit 46199e0
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 41 deletions.
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ RUN make build

FROM alpine:latest
RUN apk add --no-cache ca-certificates
RUN apk add openssl=3.1.4-r6 && apk upgrade openssl --no-cache
COPY --from=builder /sidecar/bin/rungmpcol /rungmpcol
COPY --from=builder /sidecar/bin/run-gmp-entrypoint /run-gmp-entrypoint

Expand Down
24 changes: 24 additions & 0 deletions entrypoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"path/filepath"
Expand All @@ -34,6 +35,9 @@ var userConfigFile = "/etc/rungmp/config.yaml"
var otelConfigFile = "/run/rungmp/otel.yaml"
var configRefreshInterval = 20 * time.Second
var selfMetricsPort = 0
var livenessProbePort = 13133
var livenessProbePath = "/liveness"
var delayLivenessProbe = 5 * time.Second

func getRawUserConfig(userConfigFile string) (string, error) {
_, err := os.Stat(userConfigFile)
Expand Down Expand Up @@ -84,6 +88,16 @@ func generateOtelConfig(ctx context.Context, userConfigFile string) error {
return nil
}

// The container is allocated CPU for the duration of the healthcheck. Delaying
// the response to this probe allows the container to complete telemetry flushes
// that may have been throttled.
//
// TODO(b/342463831): Use a more reliable way of checking if telemetry is being
// flushed instead of using a static sleep.
func healthcheckHandler(_ http.ResponseWriter, _ *http.Request) {
time.Sleep(delayLivenessProbe)
}

func main() {
// SIGINT handles Ctrl+C locally.
// SIGTERM handles Cloud Run termination signal.
Expand All @@ -101,6 +115,16 @@ func main() {
log.Fatal(err)
}

entrypointMux := http.NewServeMux()
entrypointMux.HandleFunc(livenessProbePath, healthcheckHandler)

go func() {
err := http.ListenAndServe(fmt.Sprintf(":%d", livenessProbePort), entrypointMux)
if err != nil && err != http.ErrServerClosed {
log.Fatal(err)
}
}()

// Spin up new-subprocess that runs the OTel collector and store the PID.
// This OTel collector should use the generated config.
var procAttr os.ProcAttr
Expand Down
31 changes: 16 additions & 15 deletions run-service-simple.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,22 @@ spec:
metadata:
annotations:
run.googleapis.com/execution-environment: gen2
run.googleapis.com/cpu-throttling: 'false'
run.googleapis.com/cpu-throttling: "false"
run.googleapis.com/container-dependencies: '{"collector":["app"]}'
spec:
containers:
- image: "%SAMPLE_APP_IMAGE%"
name: app
startupProbe:
httpGet:
path: /startup
port: 8000
livenessProbe:
httpGet:
path: /liveness
port: 8000
ports:
- containerPort: 8000
- image: us-docker.pkg.dev/cloud-ops-agents-artifacts/cloud-run-gmp-sidecar/cloud-run-gmp-sidecar:1.1.1
name: collector
- image: "%SAMPLE_APP_IMAGE%"
name: app
startupProbe:
httpGet:
path: /startup
port: 8000
livenessProbe:
httpGet:
path: /liveness
port: 8000
ports:
- containerPort: 8000
- image: us-docker.pkg.dev/cloud-ops-agents-artifacts/cloud-run-gmp-sidecar/cloud-run-gmp-sidecar:1.1.1
name: collector
# TODO(b/342463134): Post release 1.2.0, update the collector container to use the healthcheck endpoint
55 changes: 30 additions & 25 deletions run-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,37 @@ spec:
metadata:
annotations:
run.googleapis.com/execution-environment: gen2
run.googleapis.com/cpu-throttling: 'false'
run.googleapis.com/container-dependencies: '{"collector":["app"]}'
run.googleapis.com/secrets: '%SECRET%:projects/%PROJECT%/secrets/%SECRET%'
run.googleapis.com/secrets: "%SECRET%:projects/%PROJECT%/secrets/%SECRET%"
spec:
containers:
- image: "%SAMPLE_APP_IMAGE%"
name: app
startupProbe:
httpGet:
path: /startup
port: 8000
livenessProbe:
httpGet:
path: /liveness
port: 8000
ports:
- containerPort: 8000
- image: "%OTELCOL_IMAGE%"
name: collector
volumeMounts:
- mountPath: /etc/rungmp/
name: config
- image: "%SAMPLE_APP_IMAGE%"
name: app
startupProbe:
httpGet:
path: /startup
port: 8000
livenessProbe:
httpGet:
path: /liveness
port: 8000
ports:
- containerPort: 8000
- image: "%OTELCOL_IMAGE%"
name: collector
livenessProbe:
httpGet:
path: /liveness
port: 13133
timeoutSeconds: 30
periodSeconds: 30
volumeMounts:
- mountPath: /etc/rungmp/
name: config
volumes:
- name: config
secret:
items:
- key: latest
path: config.yaml
secretName: '%SECRET%'
- name: config
secret:
items:
- key: latest
path: config.yaml
secretName: "%SECRET%"

0 comments on commit 46199e0

Please sign in to comment.