lucebox-hub/.github/workflows/docker.yml at main · easel/lucebox-hub · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
name: Docker prebuilds

# Builds the cuda12 lucebox-hub Docker image defined in docker-bake.hcl
# and pushes it to GHCR. The bake file is the source of
# truth for arch matrices and CUDA pinning; this workflow only handles
# fetching submodules, freeing runner disk, signing in to the registry, and
# wiring the cache.

on:
  # Build + push to GHCR when a GitHub Release is published. The release tag
  # becomes one of the image tags via docker/metadata-action's `type=ref,
  # event=tag` + `type=semver` rules below.
  release:
    types: [published]
  # Build-only CI guard on PRs that touch the docker surface. We never push
  # from a PR — even if we wanted to, GITHUB_TOKEN on PRs from forks lacks
  # `packages:write`. The point is to catch Dockerfile / bake-file / arch-
  # list regressions before they land on main.
  pull_request:
    paths:
      - Dockerfile
      - docker-bake.hcl
      - .dockerignore
      - .github/workflows/docker.yml
      - dflash/CMakeLists.txt
      - dflash/src/**
      - dflash/test/**
      - dflash/include/**
      - dflash/scripts/**
      - dflash/deps/**
      - dflash/pyproject.toml
      - pyproject.toml
      - uv.lock
      - lucebox.sh
      - lucebox/**
  # Manual trigger for one-off rebuilds or pre-release smoke tests. The
  # `push` input controls whether the resulting images land in GHCR or only
  # populate the buildx cache.
  workflow_dispatch:
    inputs:
      push:
        description: "Push images to GHCR after build"
        type: boolean
        default: false

# Single in-flight build per ref. New pushes cancel the previous run so we
# don't queue 30-min compiles.
concurrency:
  group: docker-${{ github.ref }}
  cancel-in-progress: true

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository_owner }}/lucebox-hub

jobs:
  build:
    name: ${{ matrix.variant }}
    # ubuntu-latest = 4 CPU / 16 GB RAM / 14 GB free disk on the GitHub-
    # hosted plan. The disk-free step at the top of the job claws back
    # ~30 GB, which is enough to land a 14 GB image with build cache.
    # CPU is the harder constraint: the fat-binary arch list can take hours
    # on hosted runners. If you outgrow this:
    #   • Larger GitHub-hosted runners (`ubuntu-latest-8-cores`, paid)
    #     halve wall time.
    #   • A self-hosted runner with the host's nvcc avoids the
    #     containerised CUDA toolkit pull entirely.
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    strategy:
      fail-fast: false
      matrix:
        variant: [cuda12]
    steps:
      - name: Free runner disk space
        # The default ubuntu-latest image keeps ~25 GB of preinstalled
        # tooling (Android SDK, .NET, Haskell, ghc, etc.) we don't need.
        # Pinned action; check upstream releases before bumping.
        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
        with:
          tool-cache: true
          android: true
          dotnet: true
          haskell: true
          large-packages: false   # slow; preinstalled apt packages we don't need
          swap-storage: true

      - uses: actions/checkout@v4
        with:
          # Submodule contents are needed by the cmake build (llama.cpp ggml
          # subtree, mit-han-lab Block-Sparse-Attention). The Dockerfile
          # asserts they're present before running cmake.
          submodules: recursive

      - uses: docker/setup-buildx-action@v3

      - name: Log in to GHCR
        # Skip on PR runs: we never push from a PR and the token from a fork
        # PR can't `packages:write` anyway.
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Derive image metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          # Suffix every tag with the variant so future CUDA stacks can
          # coexist under the same image name. Examples:
          #   ghcr.io/<owner>/lucebox-hub:cuda12
          #   ghcr.io/<owner>/lucebox-hub:v0.2.0-cuda12
          #   ghcr.io/<owner>/lucebox-hub:main-cuda12
          #   ghcr.io/<owner>/lucebox-hub:sha-abc1234-cuda12
          flavor: |
            latest=false
            suffix=-${{ matrix.variant }},onlatest=true
          tags: |
            type=raw,value=${{ matrix.variant }},suffix=,priority=1000,enable=${{ github.event_name == 'release' }}
            type=ref,event=branch
            type=ref,event=tag
            type=ref,event=pr
            type=sha,prefix=sha-
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}

      - name: Build and push
        uses: docker/bake-action@v5
        with:
          files: |
            docker-bake.hcl
            ${{ steps.meta.outputs.bake-file }}
          targets: ${{ matrix.variant }}
          push: ${{ github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.push) }}
          # gha cache stores layer blobs in the workflow's Actions cache,
          # scoped by variant so future CUDA stacks don't evict each other.
          # mode=max also caches multi-stage intermediate layers (the
          # builder stage with the 30-min nvcc compile), which is the whole
          # point of doing this.
          set: |
            ${{ matrix.variant }}.cache-from=type=gha,scope=${{ matrix.variant }}
            ${{ matrix.variant }}.cache-to=type=gha,scope=${{ matrix.variant }},mode=max