Skip to content

Commit 89ff5b7

Browse files
erikamovohrite
andauthored
Compile dbt documentation in Github actions (#3814)
* Extract environment variables from terraform workflow yml * Split dbt related github workflow items * Upload dbt artifacts and documentation to GCS * Allow staging github actions service account to run bigquery jobs * Correctly interpolate runner operating system in cache key * Compile staging against staging * Use google credentials output for keyfile * Do not use service account to build dbt * Add permission to service account to create BigQuery job on cal-itp-data-infra-staging to fix error when compiling dbt * Apply custom roles to service account * Add permissions for staging to read production resources * Allow staging service account to act like a production analyst * Allow github service account to read filtered data and bigquery metadata * Store CI artifacts on github * Remove old Composer environment references Signed-off-by: Erika Pacheco <[email protected]> Co-authored-by: Doc Ritezel <[email protected]>
1 parent e2afb8f commit 89ff5b7

File tree

10 files changed

+523
-140
lines changed

10 files changed

+523
-140
lines changed

.github/workflows/build-warehouse-image.yml

Lines changed: 42 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
name: Test, visualize, and build dbt project
1+
name: Build dbt image
22

33
on:
44
push:
55
branches:
6-
- 'main'
6+
- main
77
paths:
88
- '.github/workflows/build-warehouse-image.yml'
99
- 'warehouse/**'
@@ -16,77 +16,57 @@ concurrency:
1616
group: ${{ github.workflow }}-${{ github.ref }}
1717
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
1818

19+
env:
20+
PYTHON_VERSION: '3.11'
21+
POETRY_VERSION: '2.0.1'
22+
1923
jobs:
20-
check:
21-
name: check python
24+
lint:
2225
runs-on: ubuntu-latest
2326
steps:
24-
- run: sudo apt-get install -y libgraphviz-dev graphviz-dev
25-
- uses: actions/checkout@v3
26-
- uses: actions/setup-python@v4
27-
with:
28-
python-version: '3.11'
29-
- run: curl -sSL https://install.python-poetry.org | POETRY_VERSION=2.0.1 python -
30-
- run: cd warehouse && poetry install && poetry run mypy scripts
27+
- name: Checkout
28+
uses: actions/checkout@v4
3129

32-
compile:
33-
name: check dbt
34-
runs-on: ubuntu-latest
35-
steps:
36-
- run: sudo apt-get install -y libgraphviz-dev graphviz graphviz-dev
37-
- uses: actions/checkout@v3
38-
- uses: actions/setup-python@v4
39-
with:
40-
python-version: '3.11'
41-
- run: curl -sSL https://install.python-poetry.org | POETRY_VERSION=2.0.1 python -
42-
- uses: 'google-github-actions/auth@v2'
43-
with:
44-
credentials_json: '${{ secrets.GCP_SA_KEY }}'
45-
- uses: google-github-actions/setup-gcloud@v2
46-
- name: Compile dbt project
47-
working-directory: warehouse
48-
run: |
49-
poetry install
50-
poetry run dbt deps
51-
poetry run dbt compile --target prod --full-refresh
52-
poetry run dbt docs generate --target prod --no-compile
53-
- uses: 'google-github-actions/upload-cloud-storage@v1'
30+
- name: Setup Graphviz
31+
uses: ts-graphviz/setup-graphviz@v2
32+
33+
- name: Setup Python
34+
uses: actions/setup-python@v5
5435
with:
55-
path: './warehouse/logs/'
56-
destination: 'calitp-ci-artifacts/${{github.workflow}}/run_id=${{github.run_id}}/job=${{github.job}}/logs/'
57-
- uses: 'google-github-actions/upload-cloud-storage@v1'
36+
python-version: ${{ env.PYTHON_VERSION }}
37+
38+
- name: Cache poetry
39+
uses: actions/cache@v3
5840
with:
59-
path: './warehouse/target/'
60-
glob: '*.json'
61-
destination: 'calitp-ci-artifacts/${{github.workflow}}/run_id=${{github.run_id}}/job=${{github.job}}/target/'
62-
# Only do visualization if we actually changed models and we are merging against main
63-
- uses: tj-actions/changed-files@v41
64-
if: ${{ github.event_name == 'pull_request' }}
65-
id: changed-files-warehouse
41+
path: ~/.cache/pypoetry
42+
key: poetry-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-poetry-${{ env.POETRY_VERSION }}
43+
44+
- name: Setup Poetry
45+
uses: abatilo/actions-poetry@v3
6646
with:
67-
files: 'warehouse/models/**/*.sql'
68-
# install a specific version of node before cml https://github.com/iterative/cml/issues/1377
69-
- uses: actions/setup-node@v1
47+
poetry-version: ${{ env.POETRY_VERSION }}
48+
49+
- name: Cache python packages
50+
uses: actions/cache@v3
7051
with:
71-
node-version: '16'
72-
- uses: iterative/setup-cml@v1
73-
if: steps.changed-files-warehouse.outputs.any_changed == 'true'
74-
- name: Create GitHub comment
75-
if: steps.changed-files-warehouse.outputs.any_changed == 'true'
76-
env:
77-
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
78-
run: |
79-
cd warehouse
80-
gsutil cp -r gs://calitp-dbt-artifacts/latest/ .
81-
poetry run python scripts/visualize.py ci-report
82-
cml comment update target/report.md
52+
path: ~/.local
53+
key: python-cache-${{ runner.os }}-python-${{ env.PYTHON_VERSION }}-lock-${{ hashFiles('poetry.lock') }}-${{ hashFiles('.github/workflows/*.yml') }}
54+
55+
- name: Install dependencies
56+
working-directory: warehouse
57+
run: poetry install
58+
59+
- name: Run mypy
60+
working-directory: warehouse
61+
run: poetry run mypy scripts
8362

84-
build_push:
85-
name: package warehouse image
63+
docker:
8664
runs-on: ubuntu-latest
87-
needs: [check]
65+
needs: [lint]
8866
steps:
89-
- uses: actions/checkout@v3
67+
- name: Checkout
68+
uses: actions/checkout@v4
69+
9070
- uses: tj-actions/changed-files@v41
9171
with:
9272
files: |
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Deploy Airflow Requirements
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- airflow/requirements.txt
9+
10+
env:
11+
SERVICE_ACCOUNT: github-actions-services-accoun@cal-itp-data-infra.iam.gserviceaccount.com
12+
WORKLOAD_IDENTITY_PROVIDER: projects/1005246706141/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
13+
PROJECT_ID: cal-itp-data-infra
14+
COMPOSER_ENVIRONMENT: calitp-airflow2-prod-composer2-20250402
15+
COMPOSER_REGION: us-west2
16+
17+
jobs:
18+
sync:
19+
runs-on: ubuntu-latest
20+
21+
permissions:
22+
contents: read
23+
id-token: write
24+
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
28+
29+
- name: Authenticate Google Service Account
30+
uses: google-github-actions/auth@v2
31+
with:
32+
project_id: ${{ env.PROJECT_ID }}
33+
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
34+
service_account: ${{ env.SERVICE_ACCOUNT }}
35+
36+
- name: Setup GCloud utilities
37+
uses: google-github-actions/setup-gcloud@v2
38+
39+
- name: Update Composer Dependencies
40+
run: |
41+
gcloud composer environments update ${{ env.COMPOSER_ENVIRONMENT }} \
42+
--update-pypi-packages-from-file airflow/requirements.txt \
43+
--location ${{ env.COMPOSER_REGION }} \
44+
--project ${{ env.PROJECT_ID }}

.github/workflows/deploy-airflow.yml

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,35 @@ on:
88
- .github/workflows/deploy-airflow.yml
99
- 'airflow/**'
1010

11+
env:
12+
SERVICE_ACCOUNT: github-actions-services-accoun@cal-itp-data-infra.iam.gserviceaccount.com
13+
WORKLOAD_IDENTITY_PROVIDER: projects/1005246706141/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
14+
PROJECT_ID: cal-itp-data-infra
15+
AIRFLOW_BUCKET: us-west2-calitp-airflow2-pr-f6bb9855-bucket
16+
1117
jobs:
12-
deploy:
18+
build:
1319
runs-on: ubuntu-latest
14-
steps:
15-
- name: Check out repo
16-
uses: actions/checkout@v2
17-
with:
18-
fetch-depth: 0
1920

20-
- uses: 'google-github-actions/auth@v2'
21-
with:
22-
credentials_json: '${{ secrets.GCP_SA_KEY }}'
21+
permissions:
22+
contents: read
23+
id-token: write
2324

24-
- uses: google-github-actions/setup-gcloud@v2
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
2528

26-
# Only update requirements if they have changed; Composer throws an error if there are no changes to apply
27-
- uses: tj-actions/changed-files@v41
28-
if: ${{ github.ref == 'refs/heads/main' }}
29-
id: changed-requirements
29+
- name: Authenticate Google Service Account
30+
uses: google-github-actions/auth@v2
3031
with:
31-
files: 'airflow/requirements.txt'
32+
project_id: ${{ env.PROJECT_ID }}
33+
workload_identity_provider: ${{ env.WORKLOAD_IDENTITY_PROVIDER }}
34+
service_account: ${{ env.SERVICE_ACCOUNT }}
3235

33-
- name: Deploy Airflow dependencies to Composer
34-
if: steps.changed-requirements.outputs.any_changed == 'true'
35-
run: gcloud composer environments update calitp-airflow2-prod-composer2-20250402 --update-pypi-packages-from-file airflow/requirements.txt --location us-west2 --project cal-itp-data-infra
36+
- name: Setup GCloud utilities
37+
uses: google-github-actions/setup-gcloud@v2
3638

3739
- name: Push Airflow code to Composer
3840
run: |
39-
gsutil -m rsync -d -c -r airflow/dags gs://$AIRFLOW_BUCKET/dags
40-
gsutil -m rsync -d -c -r airflow/plugins gs://$AIRFLOW_BUCKET/plugins
41-
env:
42-
AIRFLOW_BUCKET: "us-west2-calitp-airflow2-pr-f6bb9855-bucket"
41+
gsutil -m rsync -d -c -r airflow/dags gs://${{ env.AIRFLOW_BUCKET }}/dags
42+
gsutil -m rsync -d -c -r airflow/plugins gs://${{ env.AIRFLOW_BUCKET }}/plugins

0 commit comments

Comments
 (0)