From 79be47e53e5e217c348a94012eb3abd5a46fe16e Mon Sep 17 00:00:00 2001 From: Gudge Date: Fri, 19 Jun 2026 10:18:13 -0700 Subject: [PATCH] ci: seed public cargo feed from Cargo.lock on lockfile changes Fork PRs lose System.AccessToken and only run the GitHub Actions gates, which build against real crates.io. They never exercise the network-isolated ADO build that redirects crates.io to the anonymous-read MxcDependencies feed, so a fork-PR lockfile bump can introduce a brand-new transitive crate that was never cached in the public feed. The next in-repo PR or main push then fails cargo fetch with a 401 (as seen for futures-task/slab after #534). Add Seed.Cargo.Feed.yml + scripts/ci/seed-cargo-feed.ps1 to authenticated- download every locked crate's .crate file into the feed (which persists each version) whenever Cargo.lock changes, plus a daily safety-net run. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .azure-pipelines/README.md | 31 +++++- .azure-pipelines/Seed.Cargo.Feed.yml | 81 +++++++++++++++ scripts/ci/seed-cargo-feed.ps1 | 145 +++++++++++++++++++++++++++ 3 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 .azure-pipelines/Seed.Cargo.Feed.yml create mode 100644 scripts/ci/seed-cargo-feed.ps1 diff --git a/.azure-pipelines/README.md b/.azure-pipelines/README.md index d239b4626..2306f94e6 100644 --- a/.azure-pipelines/README.md +++ b/.azure-pipelines/README.md @@ -22,4 +22,33 @@ from crates.io and npmjs, helping ensure secure and vetted consumption of third developer iteration. - The ADO pipeline can also be triggered on PRs via `/azp run` (see [docs/pull-requests.md](../docs/pull-requests.md)) when reviewers want - to run the official build against a change before merge. \ No newline at end of file + to run the official build against a change before merge. + +### Public crates.io mirror feed (fork/PR builds) + +Fork PRs lose `System.AccessToken`, so the network-isolated ADO build cannot +authenticate to the internal feed. Those builds instead redirect crates.io to +the **public, anonymous-read** `MxcDependencies` feed +([`.cargo/config.public.toml`](.cargo/config.public.toml)). Anonymous clients +can only read crate versions that have already been **saved** to that feed; +pulling a not-yet-cached version from the crates.io upstream requires +authentication and otherwise returns HTTP 401. + +Because fork PRs only run the GitHub Actions gates (which build against real +crates.io), a fork-PR lockfile bump can introduce a brand-new transitive crate +that was never cached in the public feed — and the next in-repo PR or `main` +push then fails `cargo fetch` with a 401. + +[`Seed.Cargo.Feed.yml`](Seed.Cargo.Feed.yml) closes that gap. It runs on `main` +whenever `src/Cargo.lock` changes (and on a daily schedule), and authenticated- +downloads every locked crate's `.crate` file via +[`scripts/ci/seed-cargo-feed.ps1`](../scripts/ci/seed-cargo-feed.ps1), which +permanently saves each version into the feed. It requires the variable group +`MXC-Public-Feed-Seeding` with a secret `publicFeedPat` (a PAT with Packaging +Read scope on the org backing the feed). The script can also be run locally to +seed the feed on demand: + +```pwsh +$env:CARGO_FEED_PAT = '' +pwsh ./scripts/ci/seed-cargo-feed.ps1 +``` \ No newline at end of file diff --git a/.azure-pipelines/Seed.Cargo.Feed.yml b/.azure-pipelines/Seed.Cargo.Feed.yml new file mode 100644 index 000000000..1f42db9c5 --- /dev/null +++ b/.azure-pipelines/Seed.Cargo.Feed.yml @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# +# Seeds the public MxcDependencies cargo feed from src/Cargo.lock. +# +# Why: fork PRs lose System.AccessToken and so only run the GitHub Actions +# gates, which build against real crates.io — they never exercise the +# network-isolated MXC-PR-Build ADO pipeline that redirects crates.io to the +# anonymous-read MxcDependencies feed (.azure-pipelines/.cargo/config.public.toml). +# A fork-PR lockfile bump can therefore merge a brand-new transitive crate that +# was never cached in the public feed, and the next in-repo PR / `main` push +# then 401s in `cargo fetch`. This pipeline closes that gap by pulling every +# locked crate into the feed via an authenticated fetch whenever Cargo.lock +# changes (plus a daily safety-net run). +# +# Setup: requires a variable group `MXC-Public-Feed-Seeding` defining the secret +# `publicFeedPat` — a PAT with Packaging (Read) scope on the dev.azure.com/shine-oss +# org that backs the public feed. Manage it in Dart alongside MXC-ESRP-Signing. + +trigger: + branches: + include: + - main + paths: + include: + - src/Cargo.lock + - scripts/ci/seed-cargo-feed.ps1 + - .azure-pipelines/Seed.Cargo.Feed.yml + - .azure-pipelines/.cargo/config.public.toml + +pr: none + +schedules: + - cron: "0 6 * * *" + displayName: Daily feed seed (safety net) + branches: + include: + - main + always: true + +name: $(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r) + +resources: + repositories: + - repository: 1ESPipelineTemplates + type: git + name: 1ESPipelineTemplates/1ESPipelineTemplates + ref: refs/tags/release + +variables: + - group: MXC-Public-Feed-Seeding + +extends: + template: v1/1ES.Unofficial.PipelineTemplate.yml@1ESPipelineTemplates + parameters: + pool: + name: Azure-Pipelines-1ESPT-ExDShared + image: windows-2022 + os: windows + + customBuildTags: + - ES365AIMigrationTooling + + stages: + - stage: Seed + displayName: 'Seed public cargo feed' + jobs: + - job: seed + displayName: 'Seed MxcDependencies from Cargo.lock' + steps: + - checkout: self + + - task: PowerShell@2 + displayName: 'Seed cargo feed from Cargo.lock' + inputs: + pwsh: true + filePath: scripts/ci/seed-cargo-feed.ps1 + env: + # Secret variables are not auto-exported to the environment; + # map it explicitly so the PAT never appears on a command line. + CARGO_FEED_PAT: $(publicFeedPat) diff --git a/scripts/ci/seed-cargo-feed.ps1 b/scripts/ci/seed-cargo-feed.ps1 new file mode 100644 index 000000000..f0a9dbd27 --- /dev/null +++ b/scripts/ci/seed-cargo-feed.ps1 @@ -0,0 +1,145 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +<# +.SYNOPSIS + Seed the public MxcDependencies cargo feed with every crates.io package + pinned in src/Cargo.lock. + +.DESCRIPTION + The fork/PR ("Unofficial") ADO build redirects crates.io to the public, + anonymous-read MxcDependencies feed (see .azure-pipelines/.cargo/config.public.toml). + That feed is an upstream-caching proxy: anonymous clients can only READ + crate versions that have already been saved to the feed — pulling a new + version from the crates.io upstream requires authentication, otherwise the + feed returns HTTP 401. + + A crate version is persisted into the feed only when its `.crate` FILE is + downloaded by an authenticated client (reading the sparse index alone is + not enough). This script walks src/Cargo.lock and authenticated-downloads + every crates.io `.crate` file, which permanently saves each version so the + anonymous CI lane never 401s on a not-yet-cached crate. + + Why this is needed: fork PRs lose System.AccessToken and therefore only run + the GitHub Actions gates, which build against real crates.io. They never + exercise the network-isolated feed, so a fork-PR lockfile bump can merge a + brand-new transitive crate that was never cached in the public feed — and + the next in-repo PR or `main` push then fails `cargo fetch` with a 401. + + Authenticated downloads of already-persisted crates simply return the cached + file, so this script is idempotent and safe to re-run. + +.PARAMETER LockFile + Path to Cargo.lock. Defaults to src/Cargo.lock relative to this script. + +.PARAMETER ConfigToml + Path to the cargo config that declares the public feed's sparse index. + The index URL is read from here so there is a single source of truth. + Defaults to .azure-pipelines/.cargo/config.public.toml. + +.PARAMETER IndexUrl + Override for the sparse-index URL. When omitted it is parsed from ConfigToml. + +.PARAMETER Pat + A Personal Access Token with Packaging (Read) scope on the Azure DevOps + org that backs the public feed. Defaults to the CARGO_FEED_PAT environment + variable so the token never has to appear on the command line. + +.PARAMETER ThrottleLimit + Maximum number of concurrent downloads. Defaults to 12. + +.EXAMPLE + $env:CARGO_FEED_PAT = '' + pwsh ./scripts/ci/seed-cargo-feed.ps1 +#> +[CmdletBinding()] +param( + [string]$LockFile = (Join-Path $PSScriptRoot '..\..\src\Cargo.lock'), + [string]$ConfigToml = (Join-Path $PSScriptRoot '..\..\.azure-pipelines\.cargo\config.public.toml'), + [string]$IndexUrl, + [string]$Pat = $env:CARGO_FEED_PAT, + [int]$ThrottleLimit = 12 +) + +$ErrorActionPreference = 'Stop' + +if ([string]::IsNullOrWhiteSpace($Pat)) { + Write-Host '##[error]No PAT supplied. Set the CARGO_FEED_PAT environment variable or pass -Pat.' + exit 1 +} + +# Azure DevOps accepts a PAT as the password of HTTP Basic auth (any username). +$auth = 'Basic ' + [Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes("pat:$Pat")) + +# Resolve the sparse-index URL from config.public.toml unless one was passed. +if ([string]::IsNullOrWhiteSpace($IndexUrl)) { + $tomlText = Get-Content -Raw -LiteralPath $ConfigToml + if ($tomlText -match 'index\s*=\s*"sparse\+([^"]+)"') { + $IndexUrl = $Matches[1] + } else { + Write-Host "##[error]Could not find a sparse index URL in $ConfigToml" + exit 1 + } +} +$IndexUrl = $IndexUrl.TrimEnd('/') +Write-Host "Public feed index: $IndexUrl" + +# The crate-download URL template lives in the feed's sparse-index config.json +# (the `dl` field), e.g. https://.../cargo/api/v1/crates/{crate}/{version}/download. +# Reading it (rather than hardcoding org GUIDs) keeps the script robust to feed +# re-provisioning. +$config = Invoke-RestMethod -Uri "$IndexUrl/config.json" -Headers @{ Authorization = $auth } +$dlTemplate = $config.dl +if ([string]::IsNullOrWhiteSpace($dlTemplate)) { + Write-Host "##[error]Feed config.json did not return a 'dl' download template." + exit 1 +} + +# Parse Cargo.lock for crates.io packages (skip workspace/git/path members). +$pkgs = [System.Collections.Generic.List[object]]::new() +$name = $null; $ver = $null; $src = $null +foreach ($line in Get-Content -LiteralPath $LockFile) { + if ($line -eq '[[package]]') { + if ($name -and $src -like 'registry+*crates.io-index') { + $pkgs.Add([pscustomobject]@{ name = $name; version = $ver }) + } + $name = $null; $ver = $null; $src = $null + continue + } + if ($line -match '^name = "(.+)"$') { $name = $Matches[1] } + elseif ($line -match '^version = "(.+)"$') { $ver = $Matches[1] } + elseif ($line -match '^source = "(.+)"$') { $src = $Matches[1] } +} +if ($name -and $src -like 'registry+*crates.io-index') { + $pkgs.Add([pscustomobject]@{ name = $name; version = $ver }) +} + +Write-Host "##[section]Seeding $($pkgs.Count) crates.io packages from $LockFile" + +# Download each `.crate` file authenticated. The download is what persists the +# version into the feed; already-persisted crates just return the cache. +$failures = $pkgs | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel { + $ProgressPreference = 'SilentlyContinue' + $pkg = $_ + $url = $using:dlTemplate + $url = $url.Replace('{crate}', $pkg.name).Replace('{version}', $pkg.version) + $tmp = [System.IO.Path]::GetTempFileName() + try { + Invoke-WebRequest -Uri $url -Headers @{ Authorization = $using:auth } ` + -UseBasicParsing -OutFile $tmp -ErrorAction Stop | Out-Null + $null + } catch { + "$($pkg.name) $($pkg.version) :: $($_.Exception.Message)" + } finally { + Remove-Item $tmp -Force -ErrorAction SilentlyContinue + } +} + +$failures = @($failures | Where-Object { $_ }) +if ($failures.Count -gt 0) { + Write-Host "##[error]Failed to seed $($failures.Count) crate(s):" + $failures | ForEach-Object { Write-Host " $_" } + exit 1 +} + +Write-Host "Successfully seeded all $($pkgs.Count) crates into the public feed."