[dev] Introduce script and CI step using trivy to scan and enforce 0 CRITICAL in images (#20712)

geropl · web-flow · commit 9dd5f747eb6d · 2025-03-28T03:44:56.000-04:00
* [scripts] Introduce trivy-scan-images.sh

Tool: gitpod/catfood.gitpod.cloud

* [trivy] Fitting trivyignore.yaml

Tool: gitpod/catfood.gitpod.cloud

* [trivy] Add scan and enforcement of "CRITICAL" vulns at build time

Tool: gitpod/catfood.gitpod.cloud

* Fix base repo ref

Tool: gitpod/catfood.gitpod.cloud

* Replace docker run by oci-tool fetch file

Tool: gitpod/catfood.gitpod.cloud
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -303,9 +303,33 @@ jobs:
           GITHUB_EMAIL: roboquat@gitpod.io
           VERSION: ${{ needs.configuration.outputs.version }}
 
+  trivy-scan:
+    name: "Scan Images for Vulnerabilities"
+    needs:
+      - configuration
+      - build-gitpod
+      - create-runner
+    runs-on: ${{ needs.create-runner.outputs.label }}
+    container:
+      image: eu.gcr.io/gitpod-core-dev/dev/dev-environment:main-gha.30393
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Environment
+        uses: ./.github/actions/setup-environment
+        with:
+          identity_provider: ${{ github.ref == 'refs/heads/main' && secrets.CORE_DEV_PROVIDER || secrets.DEV_PREVIEW_PROVIDER }}
+          service_account: ${{ github.ref == 'refs/heads/main' && secrets.CORE_DEV_SA || secrets.DEV_PREVIEW_SA }}
+          leeway_segment_key: ${{ secrets.LEEWAY_SEGMENT_KEY }}
+      - name: Scan Images for Vulnerabilities
+        shell: bash
+        run: |
+          INSTALLER_IMAGE_BASE_REPO=${{ needs.configuration.outputs.image_repo_base }}
+          ./scripts/trivy/trivy-scan-images.sh ${{ needs.configuration.outputs.version }} CRITICAL
+          exit $?
+
   install-app:
     runs-on: ${{ needs.create-runner.outputs.label }}
-    needs: [ configuration, build-gitpod, create-runner ]
+    needs: [ configuration, build-gitpod, trivy-scan, create-runner ]
     if: ${{ needs.configuration.outputs.is_main_branch == 'true' }}
     strategy:
       fail-fast: false
@@ -343,6 +367,7 @@ jobs:
       - configuration
       - build-previewctl
       - build-gitpod
+      - trivy-scan
       - infrastructure
       - create-runner
     runs-on: ${{ needs.create-runner.outputs.label }}
@@ -490,6 +515,7 @@ jobs:
       - build-previewctl
       - infrastructure
       - build-gitpod
+      - trivy-scan
       - install-app
       - install
       - monitoring
diff --git a/scripts/trivy/scan-installer-config.yaml b/scripts/trivy/scan-installer-config.yaml
@@ -0,0 +1,56 @@
+apiVersion: v1
+domain: example.com
+authProviders: []
+blockNewUsers:
+  enabled: false
+  passlist: []
+certificate:
+  kind: secret
+  name: https-certificates
+containerRegistry:
+  enableAdditionalECRAuth: false
+  inCluster: false
+  privateBaseImageAllowList: []
+  subassemblyBucket: ""
+  external:
+    url: "registry.example.com"
+    certificate:
+      kind: secret
+      name: registry-certificate
+database:
+  inCluster: false
+  external:
+    certificate:
+      kind: secret
+      name: database-certificate
+disableDefinitelyGp: true
+kind: Full
+metadata:
+  region: local
+  shortname: default
+objectStorage:
+  inCluster: false
+  resources:
+    requests:
+      memory: 2Gi
+  s3:
+    endpoint: "s3.example.com"
+    bucket: "gitpod-storage"
+    credentials:
+      kind: secret
+      name: object-storage-credentials
+observability:
+  logLevel: info
+openVSX:
+  url: https://open-vsx.org
+repository: example.org
+workspace:
+  maxLifetime: 36h0m0s
+  resources:
+    requests:
+      cpu: "1"
+      memory: 2Gi
+  runtime:
+    containerdRuntimeDir: /var/lib/containerd/io.containerd.runtime.v2.task/k8s.io
+    containerdSocketDir: /run/containerd
+    fsShiftMethod: shiftfs
diff --git a/scripts/trivy/trivy-scan-images.sh b/scripts/trivy/trivy-scan-images.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Copyright (c) 2025 Gitpod GmbH. All rights reserved.
+# Licensed under the GNU Affero General Public License (AGPL).
+# See License.AGPL.txt in the project root for license information.
+
+set -euo pipefail
+
+# Check if VERSION and FAIL_ON are provided
+if [[ $# -lt 2 ]]; then
+  echo "Usage: $0 VERSION FAIL_ON [TRIVY_ARGS...]"
+  echo "  VERSION: The version to scan (e.g., main-gha.32006)"
+  echo "  FAIL_ON: Severity threshold to fail on (empty, HIGH, or CRITICAL)"
+  echo "  TRIVY_ARGS: Additional arguments to pass to Trivy"
+  echo "Example: $0 main-gha.32006 HIGH"
+  exit 1
+fi
+
+INSTALLER_IMAGE_BASE_REPO="${INSTALLER_IMAGE_BASE_REPO:-eu.gcr.io/gitpod-dev-artifact}"
+
+# Extract VERSION and FAIL_ON from arguments and remove them from args list
+VERSION="$1"
+FAIL_ON="$2"
+shift 2
+
+# Validate FAIL_ON value
+if [[ -n "$FAIL_ON" ]] && [[ "$FAIL_ON" != "HIGH" ]] && [[ "$FAIL_ON" != "CRITICAL" ]]; then
+  echo "Error: FAIL_ON must be either empty, 'HIGH', or 'CRITICAL'"
+  exit 1
+fi
+
+
+if ! command -v jq &> /dev/null; then
+  echo "jq not found. Please install jq to continue."
+  exit 1
+fi
+
+# Set up working directory
+SCAN_DIR=$(mktemp -d -t trivy-scan-XXXXXX)
+echo "Working directory: $SCAN_DIR"
+
+# Directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALLER_CONFIG_PATH="$SCRIPT_DIR/scan-installer-config.yaml"
+TRIVYIGNORE_PATH="$SCRIPT_DIR/trivyignore.yaml"
+
+# Ensure Trivy is installed
+TRIVY_CMD="trivy"
+if ! command -v "$TRIVY_CMD" &> /dev/null; then
+  echo "Trivy not found. Installing..."
+  mkdir -p "$SCAN_DIR/bin"
+  TRIVY_CMD="$SCAN_DIR/bin/trivy"
+  curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b "$SCAN_DIR/bin"
+fi
+
+OCI_TOOL_CMD="oci-tool"
+OCI_TOOL_VERSION="0.2.0"
+if  ! command -v "$OCI_TOOL_CMD" &> /dev/null; then
+  mkdir -p "$SCAN_DIR/bin"
+  OCI_TOOL_CMD="$SCAN_DIR/bin/oci-tool"
+  curl -fsSL https://github.com/csweichel/oci-tool/releases/download/v${OCI_TOOL_VERSION}/oci-tool_${OCI_TOOL_VERSION}_linux_amd64.tar.gz | tar xz -C "$(dirname "$OCI_TOOL_CMD")" && chmod +x "$OCI_TOOL_CMD"
+fi
+
+echo "=== Gathering list of all images for $VERSION"
+
+# Extract installer binary from installer image
+INSTALLER_IMAGE="$INSTALLER_IMAGE_BASE_REPO/build/installer:${VERSION}"
+INSTALLER="$SCAN_DIR/installer"
+"$OCI_TOOL_CMD" fetch file -o "$INSTALLER" --platform=linux-amd64 "${INSTALLER_IMAGE}" app/installer
+echo ""
+chmod +x "$INSTALLER"
+
+# Run the installer docker image to get the list of images
+"$INSTALLER" mirror list -c "$INSTALLER_CONFIG_PATH" > "$SCAN_DIR/mirror.json"
+
+# Extract original image references
+jq -r '.[].original' "$SCAN_DIR/mirror.json" > "$SCAN_DIR/images.txt"
+
+# Remove empty lines
+sed -i '/^\s*$/d' "$SCAN_DIR/images.txt"
+
+# Filter out specific image patterns
+echo "=== Filtered out images:"
+TOTAL_BEFORE=$(wc -l < "$SCAN_DIR/images.txt")
+
+# Apply all filters at once using extended regex
+grep -v -E "/build/ide/|/gitpod/workspace-|/library/mysql|/library/redis|/cloudsql-docker/gce-proxy" "$SCAN_DIR/images.txt" > "$SCAN_DIR/filtered_images.txt"
+
+TOTAL_AFTER=$(wc -l < "$SCAN_DIR/filtered_images.txt")
+FILTERED=$((TOTAL_BEFORE - TOTAL_AFTER))
+
+echo "  Total filtered: $FILTERED"
+
+# Use filtered list for scanning
+mv "$SCAN_DIR/filtered_images.txt" "$SCAN_DIR/images.txt"
+
+# Count total images
+TOTAL_IMAGES=$(wc -l < "$SCAN_DIR/images.txt")
+echo "=== Found $TOTAL_IMAGES images to scan"
+
+# Create results directory
+RESULT_FILE="$SCAN_DIR/result.jsonl"
+
+# Scan all images with Trivy
+COUNTER=0
+FAILED=0
+while IFS= read -r IMAGE_REF; do
+  ((COUNTER=COUNTER+1))
+
+  echo "= Scanning $IMAGE_REF [$COUNTER / $TOTAL_IMAGES]"
+
+  # Run Trivy on the image
+  scan_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+  set +e
+  trivy_output=$("$TRIVY_CMD" image "$IMAGE_REF" --ignorefile "$TRIVYIGNORE_PATH" --scanners vuln --format json "$@" | jq -c)
+  scan_status=$?
+
+  # Create a JSON object for the current scan
+  if [ $scan_status -eq 0 ]; then
+      # Check if trivy_output is valid JSON
+      if echo "$trivy_output" | jq empty > /dev/null 2>&1; then
+          # Direct approach - create the combined JSON object using jq directly
+          jq -c --arg image "$IMAGE_REF" --arg scan_time "$scan_time" \
+              '. + {image: $image, scan_time: $scan_time}' <<< "$trivy_output" | jq >> "$RESULT_FILE"
+      else
+          # If trivy output is not valid JSON, treat as error
+          echo "Warning: Trivy returned invalid JSON for $IMAGE_REF"
+          jq -n --arg image "$IMAGE_REF" \
+                --arg scan_time "$scan_time" \
+                --arg error "Invalid JSON output from Trivy" \
+                --arg details "$trivy_output" \
+                '{image: $image, scan_time: $scan_time, error: $error, error_details: $details}' | jq >> "$RESULT_FILE"
+          ((FAILED=FAILED+1))
+      fi
+
+  else
+      # For error cases, create a simple JSON object
+      jq -n --arg image "$IMAGE_REF" \
+            --arg scan_time "$scan_time" \
+            --arg error "Trivy scan failed" \
+            --arg details "$trivy_output" \
+            '{image: $image, scan_time: $scan_time, error: $error, error_details: $details}' >> "$RESULT_FILE"
+          ((FAILED=FAILED+1))
+  fi
+  set -e
+
+  echo ""
+done < "$SCAN_DIR/images.txt"
+
+# Generate summary report
+echo "=== Scan Summary ==="
+echo "Scan directory: $SCAN_DIR"
+echo "Results file: $RESULT_FILE"
+echo "Total ignored images: $FILTERED"
+echo "Total scanned images: $TOTAL_IMAGES"
+echo "Failed scans: $FAILED"
+echo "Triviy binary: $TRIVY_CMD"
+echo "Triviy version: $($TRIVY_CMD version)"
+echo ""
+
+# Count vulnerabilities by severity
+echo "=== Vulnerability Summary ==="
+CRITICAL="$(jq -r 'if .Results != null then [.Results[].Vulnerabilities // [] | .[] | select(.Severity == "CRITICAL")] | length else 0 end' "$RESULT_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum}')"
+HIGH="$(jq -r 'if .Results != null then [.Results[].Vulnerabilities // [] | .[] | select(.Severity == "HIGH")] | length else 0 end' "$RESULT_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum}')"
+echo "CRITICAL: $CRITICAL"
+echo "HIGH: $HIGH"
+echo ""
+
+echo "=== Scan completed ==="
+if [[ $FAILED -gt 0 ]]; then
+  echo "ERROR: $FAILED scans failed"
+  exit 1
+fi
+
+# Check if we should fail based on vulnerability counts
+if [[ "$FAIL_ON" == "CRITICAL" ]] && [[ $CRITICAL -gt 0 ]]; then
+  echo "FAIL: Found $CRITICAL CRITICAL vulnerabilities, and FAIL_ON=CRITICAL was specified"
+  exit 1
+elif [[ "$FAIL_ON" == "HIGH" ]] && [[ $((CRITICAL + HIGH)) -gt 0 ]]; then
+  echo "FAIL: Found $CRITICAL CRITICAL and $HIGH HIGH vulnerabilities, and FAIL_ON=HIGH was specified"
+  exit 1
+fi
+
+echo "0 $FAIL_ON or higher vulnerabilities found."
+exit 0
diff --git a/scripts/trivy/trivyignore.yaml b/scripts/trivy/trivyignore.yaml
@@ -0,0 +1,85 @@
+vulnerabilities:
+  # Typescript / typeorm
+  - id: CVE-2022-33171
+    paths:
+      - "app/node_modules/typeorm/package.json"
+    statement: |
+      This vulnerability in TypeORM's findOne / findOneOrFail functions can improperly interpret a crafted JSON object
+      and concatenate it into raw SQL, potentially allowing SQL injection attacks.
+
+      In Gitpod’s usage, TypeORM is not exposed to arbitrary user input. For example, DB migrations run preset queries;
+      the server/bridge code does not hand raw JSON from external sources to findOne. Therefore, there is no path for
+      injecting malicious JSON into a query, rendering the vulnerability non-exploitable.
+
+  # image-builder-bob
+  - id: CVE-2024-32002
+    statement: |
+      A vulnerability allowing remote code execution when cloning a malicious repository (often leveraging submodules
+      and symlinks on case-insensitive filesystems) that can place malicious scripts in .git/hooks/.	On Windows/macOS
+      (case-insensitive FS), a malicious repo can use symlinks or tricky paths to cause Git to write hook scripts in
+      the main .git directory during clone, which Git then executes automatically. This results in code execution on
+      the system performing the clone.
+
+      The standard exploit path involving .Git vs. .git doesn’t work on Linux. Also, Gitpod’s builder typically
+      fetches known repositories or user-provided repos in a controlled environment. Even if a user cloned a hostile
+      repo in their own workspace, they’d compromise only themselves (in a sandboxed container). No broader exploit
+      of the underlying host is possible.
+
+  - id: CVE-2024-23652
+    paths:
+      - "usr/bin/buildctl"
+      - "usr/bin/buildkitd"
+    statement: |
+      A vulnerability where the RUN --mount cleanup logic could remove directories outside of the build context,
+      leading to potential arbitrary file deletion on the host if BuildKit is used with untrusted Dockerfiles.
+
+      Gitpod runs BuildKit inside containers in a Kubernetes environment. Even if a malicious Dockerfile tries to
+      exploit this, it can only affect the container’s filesystem (which the user already controls). There’s no path to
+      escalate beyond the container to the node’s host filesystem, so this has no material security impact in Gitpod’s
+      isolated build setup.
+
+  - id: CVE-2024-23653
+    paths:
+      - "usr/bin/buildctl"
+      - "usr/bin/buildkitd"
+    statement: |
+      A flaw allowing a build container to run with elevated privileges without the required security.insecure
+      entitlement. This can grant a malicious Dockerfile or BuildKit client more privileges than intended during the
+      build.
+
+      In Gitpod, BuildKit operates under tight Kubernetes constraints. Even if a user tries to request privileged mode,
+      it won’t escalate to the actual host or break out of the container. The user is effectively “attacking” their
+      own build container. They already have control there, so there’s no additional privilege escalation beyond their
+      existing user container in Gitpod’s architecture.
+
+
+  - id: CVE-2024-45337
+    paths:
+      - "usr/bin/buildctl"
+      - "usr/bin/buildkitd"
+      - "usr/bin/kube-rbac-proxy"
+    statement: |
+      An SSH authentication bypass issue if PublicKeyCallback is used incorrectly in Go’s SSH server library. Misuse
+      can lead to validating the wrong public key and granting access under the wrong identity.
+
+      Neither BuildKit nor kube-rbac-proxy runs an SSH server that uses PublicKeyCallback for authentication.
+      Kube-rbac-proxy does HTTP-based RBAC checks. BuildKit may include x/crypto/ssh but does not expose an SSH server
+      requiring PublicKeyCallback. Hence, there’s no exploit path for this bug in these components.
+
+  - id: CVE-2024-24790
+    statement: |
+      A logic flaw in Go’s net/netip packages causing misclassification of certain IPv4-mapped IPv6 addresses (e.g., failing to mark them as loopback/private). Could lead to security checks that rely on IP classification (like IsPrivate, IsLoopback) being bypassed.
+
+      In these images (Cloud SQL Proxy, bob-runc, BuildKit, kube-rbac-proxy), there’s no code path that enforces security decisions using IsLoopback/IsPrivate from Go’s netip. They either accept connections in a controlled environment (Cloud SQL Proxy -> GCP) or use different authentication mechanisms (kube-rbac-proxy). Hence, no external attack can exploit this misclassification.
+
+  - id: CVE-2024-45491
+    statement: |
+      Integer overflow in dtdCopy when dealing with a large number of default attributes in a DTD, causing potential out-of-bounds write or memory corruption in Expat on 32-bit systems.
+
+      The container runs on a 64-bit Alpine Linux environment, not a 32-bit build, so the overflow is not triggered the same way. Moreover, there is no component automatically parsing untrusted XML with Expat. Hence, no practical exploitation path in Gitpod’s usage.
+
+  - id: CVE-2024-45492
+    statement: |
+      Similar integer overflow in the nextScaffoldPart function of Expat, triggered by enormous or deeply nested content models in the DTD. Can lead to denial of service or potential corruption.
+
+      As with CVE-2024-45491, the environment is 64-bit Alpine, and no untrusted XML parsing occurs by default. Thus, the bug cannot be reached in a way that leads to exploitation.