diff --git a/docs/front-matter.md b/docs/front-matter.md index cdcf9eb0..aa81206a 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -16,20 +16,17 @@ engine: copilot # Engine identifier. Defaults to copilot. Currently only 'copilo # id: copilot # model: claude-opus-4.7 # timeout-minutes: 30 -workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `checkout:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. +workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `repos:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. pool: AZS-1ES-L-MMS-ubuntu-22.04 # Agent pool name (string format). Defaults to AZS-1ES-L-MMS-ubuntu-22.04. # pool: # Alternative object format (required for 1ES if specifying os) # name: AZS-1ES-L-MMS-ubuntu-22.04 # os: linux # Operating system: "linux" or "windows". Defaults to "linux". -repositories: # a list of repository resources available to the pipeline (for pre/post jobs, templates, etc.) - - repository: reponame - type: git - name: my-org/my-repo - - repository: another-repo - type: git - name: my-org/another-repo -checkout: # optional list of repository aliases for the agent to checkout and work with (must be subset of repositories) - - reponame # only checkout reponame, not another-repo +repos: # compact repository declarations (replaces repositories: + checkout:) + - my-org/my-repo # shorthand: alias="my-repo", type=git, ref=refs/heads/main, checkout=true + - reponame=my-org/another-repo # shorthand with explicit alias + - name: my-org/templates # object form for full control + ref: refs/heads/release/2.x + checkout: false # declared as resource only, not checked out by the agent tools: # optional tool configuration bash: ["cat", "ls", "grep"] # bash command allow-list (defaults to safe built-in list) edit: true # enable file editing tool (default: true) @@ -152,19 +149,83 @@ Build the project and run all tests... ## Workspace Defaults The `workspace:` field controls which directory the agent runs in. When it is -not set explicitly, the compiler chooses a default based on the `checkout:` -list: +not set explicitly, the compiler chooses a default based on which repositories +are checked out (entries in `repos:` with `checkout: true`, which is the +default): -- If `checkout:` is empty (i.e. only the pipeline's own repository is checked - out via the implicit `self`), `workspace:` defaults to **`root`** — the - agent runs in the pipeline's working directory root. -- If `checkout:` lists one or more additional repository aliases, - `workspace:` defaults to **`repo`** — the agent runs inside the first - checked-out repository's directory. +- If no additional repositories are checked out (i.e. only the pipeline's own + repository is checked out via the implicit `self`), `workspace:` defaults to + **`root`** — the agent runs in the pipeline's working directory root. +- If one or more additional repositories are checked out, `workspace:` defaults + to **`repo`** — the agent runs inside the trigger repository's directory. Set `workspace:` explicitly to `root`, `repo` (alias `self`), or a specific checked-out repository alias to override this behavior. +## Repositories (`repos:`) + +The `repos:` field provides a compact way to declare additional repository +resources and control which ones the agent checks out. It replaces the legacy +`repositories:` + `checkout:` pair. + +Each entry can be: + +| Form | Syntax | Description | +|------|--------|-------------| +| **Shorthand** | `- org/repo` | Alias derived from last segment, type=git, ref=refs/heads/main, checkout=true | +| **Shorthand with alias** | `- alias=org/repo` | Explicit alias before `=` | +| **Object** | `- name: org/repo` | Full control over all fields | + +Object fields: + +| Field | Default | Description | +|------------|------------------------|-------------| +| `name` | *(required)* | Full `org/repo` name (maps to ADO `name:`) | +| `alias` | last segment of `name` | Repository alias (maps to ADO `repository:`) | +| `type` | `git` | ADO repository resource type | +| `ref` | `refs/heads/main` | Branch or tag reference | +| `checkout` | `true` | Whether the agent job clones this repo | + +### Examples + +Three repos, all checked out (most common case): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - my-org/docs +``` + +Mixed: two checked out, one resource-only (used by templates): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - name: my-org/pipeline-templates + checkout: false +``` + +Custom ref and explicit alias: + +```yaml +repos: + - name: my-org/docs + alias: docs-v2 + ref: refs/heads/release/2.x +``` + +### Legacy syntax (auto-rewritten) + +The legacy `repositories:` + `checkout:` fields are auto-converted to +`repos:` by the [`repos_unified` codemod](codemods.md). On the next +`ado-aw compile`, any source that still uses the legacy fields is +rewritten in place to the new shape — each `repositories:` entry +becomes a `repos:` entry, with `checkout: false` added for entries +that weren't listed under `checkout:`. Mixing the legacy fields with +an existing `repos:` block is rejected; pick one shape. + ## Filter Validation The compiler validates filter configurations at compile time and will emit diff --git a/src/compile/codemods/0001_repos_unified.rs b/src/compile/codemods/0001_repos_unified.rs new file mode 100644 index 00000000..88511c32 --- /dev/null +++ b/src/compile/codemods/0001_repos_unified.rs @@ -0,0 +1,435 @@ +//! `repositories:` + `checkout:` → unified `repos:` +//! +//! Before this codemod, additional repository resources had to be +//! declared twice: once under `repositories:` (mirroring ADO's +//! `resources.repositories` schema) and again under `checkout:` (the +//! alias list deciding which to actually clone). This codemod folds +//! both into a single `repos:` block where each entry carries its own +//! `checkout: true|false` flag (defaulting to `true`). +//! +//! Conversion rules: +//! - Each `repositories:` entry becomes a `repos:` entry preserving +//! `repository` (→ `alias`), `name`, `type`, and `ref`. +//! - Entries listed in `checkout:` keep `checkout: true` (the default, +//! so the field is omitted on output). +//! - Entries NOT listed in `checkout:` get an explicit `checkout: false`. +//! - `checkout:` aliases that don't match any `repositories:` entry are +//! rejected (the legacy compiler also rejected these via +//! `validate_checkout_list`). +//! - Mixing the legacy fields with an already-present `repos:` is +//! rejected — the user must pick one shape. +//! - Sources with neither legacy field are no-ops (idempotent). + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{take_key, Codemod, CodemodContext}; + +pub static CODEMOD: Codemod = Codemod { + id: "repos_unified", + summary: "repositories: + checkout: -> unified repos:", + introduced_in: "0.28.0", + apply: apply_codemod, +}; + +fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + let has_repos = fm.contains_key(Value::String("repos".to_string())); + let has_repositories = fm.contains_key(Value::String("repositories".to_string())); + let has_checkout = fm.contains_key(Value::String("checkout".to_string())); + + // No legacy fields → already in the new shape (or doesn't use any + // additional repos at all). No-op (idempotent). + if !has_repositories && !has_checkout { + return Ok(false); + } + + // Mixing the legacy fields with the new `repos:` is ambiguous — + // refuse rather than guess which one wins. + if has_repos { + bail!( + "front matter has both the new `repos:` field and the legacy \ + `repositories:`/`checkout:` fields. Pick one: remove the \ + legacy fields to use `repos:`, or remove `repos:` to let \ + this codemod convert the legacy fields." + ); + } + + // `checkout:` without any `repositories:` is incoherent — the + // aliases would have nothing to refer to. The legacy compiler + // would have failed `validate_checkout_list` for the same reason. + if has_checkout && !has_repositories { + bail!( + "front matter has `checkout:` but no `repositories:`. \ + Either remove `checkout:` or add the corresponding \ + `repositories:` entries (then re-run compile to migrate \ + to `repos:`)." + ); + } + + let repositories = take_key(fm, "repositories").expect("checked above"); + let checkout = take_key(fm, "checkout"); + + let repositories_seq = match repositories { + Value::Sequence(s) => s, + Value::Null => Vec::new(), + other => bail!( + "front matter `repositories:` must be a sequence, got {}", + describe(&other) + ), + }; + + // Collect the checkout alias allow-list. Order doesn't matter for + // membership; we preserve `repositories:` order in the output. + let checkout_aliases: Vec = match checkout { + None | Some(Value::Null) => Vec::new(), + Some(Value::Sequence(s)) => { + let mut out = Vec::with_capacity(s.len()); + for v in s { + match v { + Value::String(name) => out.push(name), + other => bail!( + "front matter `checkout:` entries must be strings, got {}", + describe(&other) + ), + } + } + out + } + Some(other) => bail!( + "front matter `checkout:` must be a sequence of strings, got {}", + describe(&other) + ), + }; + + // Trivially-empty sources: an empty `repositories:` (and either no + // `checkout:` or an empty one) carries no semantic content. We + // already removed the vacuous keys from the mapping above, but + // report this as a no-op so the caller doesn't surface a + // "deprecated shapes" warning or rewrite the file just to drop + // empty stubs. + if repositories_seq.is_empty() && checkout_aliases.is_empty() { + return Ok(false); + } + + // Track which checkout aliases we've matched so we can flag + // dangling references (alias listed in checkout but absent from + // repositories). + let mut matched: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + + let mut repos_seq: Vec = Vec::with_capacity(repositories_seq.len()); + for repo in repositories_seq { + let mut repo_map = match repo { + Value::Mapping(m) => m, + other => bail!( + "front matter `repositories:` entries must be mappings, got {}", + describe(&other) + ), + }; + + // The legacy `repository:` key becomes the new `alias:` key. + let alias_value = repo_map.remove(Value::String("repository".to_string())); + let alias_str = match alias_value.as_ref() { + Some(Value::String(s)) => Some(s.clone()), + Some(other) => bail!( + "front matter `repositories:` entry has non-string `repository:` field ({}); \ + manual migration required", + describe(other) + ), + None => None, + }; + + // Build the new entry. Preserve insertion order: + // alias, type, name, ref, checkout. + let mut new_entry = Mapping::new(); + if let Some(alias) = alias_str.as_deref() { + new_entry.insert( + Value::String("alias".to_string()), + Value::String(alias.to_string()), + ); + } + if let Some(v) = repo_map.remove(Value::String("type".to_string())) { + new_entry.insert(Value::String("type".to_string()), v); + } + if let Some(v) = repo_map.remove(Value::String("name".to_string())) { + new_entry.insert(Value::String("name".to_string()), v); + } else { + bail!( + "front matter `repositories:` entry is missing the required `name:` field; \ + manual migration required" + ); + } + if let Some(v) = repo_map.remove(Value::String("ref".to_string())) { + new_entry.insert(Value::String("ref".to_string()), v); + } + // Carry over any unknown keys verbatim so the codemod is + // forward-compatible with future ADO `resources.repositories` + // fields we don't yet model. + for (k, v) in repo_map { + new_entry.insert(k, v); + } + + // Determine checkout flag. If `checkout:` was absent entirely, + // legacy semantics treat all repositories as resource-only + // (the agent job didn't clone any). If `checkout:` was + // present, only listed aliases get cloned. + let do_checkout = if !has_checkout { + // Legacy: no `checkout:` block at all means nothing was + // cloned by the agent. + false + } else if let Some(alias) = alias_str.as_deref() { + let listed = checkout_aliases.iter().any(|a| a == alias); + if listed { + matched.insert(alias.to_string()); + } + listed + } else { + // Anonymous entry (no `repository:` alias) cannot be + // referenced from `checkout:` — treat as resource-only. + false + }; + + // Only emit the `checkout` field when it deviates from the + // default of `true`. Keeps rewritten output compact. + if !do_checkout { + new_entry.insert( + Value::String("checkout".to_string()), + Value::Bool(false), + ); + } + + repos_seq.push(Value::Mapping(new_entry)); + } + + // Surface dangling checkout aliases (listed but no matching repo). + for alias in &checkout_aliases { + if !matched.contains(alias) { + bail!( + "front matter `checkout:` references alias `{}` that does not appear \ + in `repositories:`; manual migration required", + alias + ); + } + } + + // Insert `repos:` only when we actually have entries; an empty + // `repositories:` should not produce an empty `repos:` block. + if !repos_seq.is_empty() { + fm.insert( + Value::String("repos".to_string()), + Value::Sequence(repos_seq), + ); + } + + Ok(true) +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + let changed = apply_codemod(&mut m, &CodemodContext {}).expect("apply"); + assert!(changed, "expected codemod to fire on input:\n{}", input); + m + } + + fn run_noop(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + let changed = apply_codemod(&mut m, &CodemodContext {}).expect("apply"); + assert!(!changed, "expected codemod to be a no-op on input:\n{}", input); + m + } + + fn run_err(input: &str) -> String { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + format!( + "{}", + apply_codemod(&mut m, &CodemodContext {}).unwrap_err() + ) + } + + fn repos(m: &Mapping) -> &Vec { + m.get(Value::String("repos".into())) + .expect("repos key") + .as_sequence() + .expect("repos sequence") + } + + #[test] + fn converts_full_legacy_block_with_checkout_subset() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tools\n type: git\n name: my-org/tools\n\ + - repository: schemas\n type: git\n name: my-org/schemas\n\ + - repository: docs\n type: git\n name: my-org/docs\n\ + checkout:\n- tools\n- schemas\n", + ); + // Legacy keys removed + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + + let r = repos(&after); + assert_eq!(r.len(), 3); + + let r0 = r[0].as_mapping().unwrap(); + assert_eq!(r0.get(Value::String("alias".into())).unwrap().as_str(), Some("tools")); + assert_eq!(r0.get(Value::String("name".into())).unwrap().as_str(), Some("my-org/tools")); + assert_eq!(r0.get(Value::String("type".into())).unwrap().as_str(), Some("git")); + // checked out -> default, no explicit `checkout:` key. + assert!(r0.get(Value::String("checkout".into())).is_none()); + + let r2 = r[2].as_mapping().unwrap(); + // `docs` is NOT in checkout list -> explicit `checkout: false`. + assert_eq!( + r2.get(Value::String("checkout".into())), + Some(&Value::Bool(false)) + ); + } + + #[test] + fn converts_repositories_only_to_resource_only_entries() { + // `repositories:` without `checkout:` means no entry was + // cloned by the agent in the legacy semantics. + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tpl\n type: git\n name: org/tpl\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!( + r[0].as_mapping().unwrap().get(Value::String("checkout".into())), + Some(&Value::Bool(false)), + "without an explicit checkout list, repos default to resource-only" + ); + } + + #[test] + fn preserves_ref_field() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: docs\n type: git\n name: org/docs\n ref: refs/heads/release/2.x\n\ + checkout: [docs]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("ref".into())).unwrap().as_str(), + Some("refs/heads/release/2.x") + ); + } + + #[test] + fn rejects_mixing_repos_and_legacy_fields() { + let err = run_err( + "name: x\n\ + repos:\n- org/foo\n\ + repositories:\n- repository: bar\n type: git\n name: org/bar\n", + ); + assert!(err.contains("Pick one"), "got: {}", err); + } + + #[test] + fn rejects_checkout_without_repositories() { + let err = run_err("name: x\ncheckout: [foo]\n"); + assert!(err.contains("`checkout:` but no `repositories:`"), "got: {}", err); + } + + #[test] + fn rejects_dangling_checkout_alias() { + let err = run_err( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n\ + checkout: [b]\n", + ); + assert!(err.contains("does not appear in `repositories:`"), "got: {}", err); + } + + #[test] + fn no_legacy_fields_is_noop() { + let after = run_noop("name: x\ndescription: y\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + } + + #[test] + fn already_using_repos_alone_is_noop() { + // Idempotency: a file with only `repos:` (no legacy fields) is + // a no-op. + let after = run_noop( + "name: x\nrepos:\n- my-org/tools\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!(r[0].as_str(), Some("my-org/tools")); + } + + #[test] + fn empty_repositories_sequence_does_not_emit_repos_key() { + // A trivially-empty `repositories: []` carries no semantic + // content — the codemod cleans up the stub key but reports + // changed=false so the caller doesn't surface a rewrite + // warning. + let after = run_noop("name: x\nrepositories: []\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + } + + #[test] + fn rejects_non_mapping_repository_entry() { + let err = run_err( + "name: x\nrepositories:\n- a-string-not-a-mapping\n", + ); + assert!(err.contains("must be mappings"), "got: {}", err); + } + + #[test] + fn carries_over_unknown_repository_keys() { + // Forward-compat: don't drop fields we don't yet model. + let after = run( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n trigger: none\n\ + checkout: [a]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("trigger".into())).unwrap().as_str(), + Some("none") + ); + } + + #[test] + fn idempotent_when_run_twice() { + // Critical codemod invariant: running twice produces the same + // final state as running once. + let mut m: Mapping = serde_yaml::from_str( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n\ + checkout: [a]\n", + ) + .unwrap(); + let first = apply_codemod(&mut m, &CodemodContext {}).expect("first"); + assert!(first, "first run should fire"); + let snapshot = m.clone(); + let second = apply_codemod(&mut m, &CodemodContext {}).expect("second"); + assert!(!second, "second run should be a no-op"); + assert_eq!(m, snapshot, "second run must not mutate"); + } +} diff --git a/src/compile/codemods/helpers.rs b/src/compile/codemods/helpers.rs index 59331fa2..7485f27a 100644 --- a/src/compile/codemods/helpers.rs +++ b/src/compile/codemods/helpers.rs @@ -1,6 +1,6 @@ -//! Helpers for writing migrations against `serde_yaml::Mapping`. +//! Helpers for writing codemods against `serde_yaml::Mapping`. //! -//! Migrations should prefer these over raw `Mapping` manipulation so that +//! Codemods should prefer these over raw `Mapping` manipulation so that //! conflicts (e.g. both old and new keys present) are surfaced rather than //! silently overwritten. @@ -9,7 +9,6 @@ use serde_yaml::{Mapping, Value}; /// Conflict policy used by [`rename_key`] when the destination key is /// already present. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] #[derive(Debug, Clone, Copy)] pub enum ConflictPolicy { @@ -22,17 +21,14 @@ pub enum ConflictPolicy { } /// Remove and return the value at `key`, or `None` if absent. -// TODO(codemods): remove when the first real codemod is registered. -#[allow(dead_code)] pub fn take_key(m: &mut Mapping, key: &str) -> Option { m.remove(Value::String(key.to_string())) } /// Insert `value` at `key`, returning `Err` if the key already exists. /// -/// Prefer this over `Mapping::insert` in migrations: silent overwrite is +/// Prefer this over `Mapping::insert` in codemods: silent overwrite is /// almost always a bug when transforming user data. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn insert_no_overwrite( m: &mut Mapping, @@ -65,10 +61,9 @@ pub fn insert_no_overwrite( /// Returns `Ok(false)` when `old` was absent (no-op). /// /// The mapping is left **unchanged** on the error path. Callers can -/// rely on this invariant when chaining migrations: a failed rename +/// rely on this invariant when chaining codemods: a failed rename /// won't leave the mapping in a half-mutated state for the next call /// to inspect. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn rename_key( m: &mut Mapping, diff --git a/src/compile/codemods/mod.rs b/src/compile/codemods/mod.rs index 6e308eca..b952ac28 100644 --- a/src/compile/codemods/mod.rs +++ b/src/compile/codemods/mod.rs @@ -33,15 +33,10 @@ use anyhow::{Context, Result}; use serde_yaml::Mapping; mod helpers; -// `#[allow(unused_imports)]` here mirrors the per-item -// `#[allow(dead_code)]` annotations in `helpers.rs`. While the -// CODEMODS registry is empty, no in-crate caller exercises these -// re-exports — the lint warns. Once the first real codemod lands -// and uses one of these helpers, both the re-export attribute and -// the per-item `dead_code` allows on `helpers.rs` should be -// removed. -// TODO(codemods): remove when the first real codemod is registered. -#[allow(unused_imports)] +#[path = "0001_repos_unified.rs"] +mod m0001_repos_unified; + +#[allow(unused_imports)] // Re-exported for future codemods; only `take_key` is in-tree use. pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; /// Forward-compatible context passed to every codemod. Currently @@ -74,7 +69,6 @@ pub struct Codemod { pub summary: &'static str, /// Compiler version that introduced this codemod (e.g. "0.27.0"). /// Provenance only; not consumed by the runner. - // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub introduced_in: &'static str, /// The transformation. Returns `Ok(true)` when the codemod @@ -89,7 +83,9 @@ pub struct Codemod { /// having run first (e.g. A renames `foo` → `bar`, B operates on /// `bar`); idempotency means any codemod can re-run on any source /// without harm. -pub static CODEMODS: &[&'static Codemod] = &[]; +pub static CODEMODS: &[&'static Codemod] = &[ + &m0001_repos_unified::CODEMOD, +]; /// Result of running the codemod registry on a single front-matter /// mapping. @@ -109,7 +105,6 @@ pub struct AppliedCodemod { impl CodemodReport { /// An empty report (no codemod fired). - // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn empty() -> Self { Self { @@ -123,8 +118,6 @@ impl CodemodReport { } /// IDs of codemods that ran, in order. Helpful for tests. - // TODO(codemods): remove when the first real codemod is registered. - #[allow(dead_code)] pub fn applied_ids(&self) -> Vec<&'static str> { self.applied.iter().map(|a| a.id).collect() } @@ -134,7 +127,6 @@ impl CodemodReport { /// /// Equivalent to [`apply_codemods_with`] called with the global /// [`CODEMODS`] registry. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn apply_codemods(fm: &mut Mapping) -> Result { apply_codemods_with(fm, CODEMODS) diff --git a/src/compile/common.rs b/src/compile/common.rs index a79b8589..9aed8350 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -4,7 +4,7 @@ use anyhow::{Context, Result}; use std::collections::{HashMap, HashSet}; use std::path::Path; -use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository}; +use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository, ReposItem}; use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; @@ -574,6 +574,124 @@ pub fn generate_checkout_self() -> String { "- checkout: self".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` lowering +// ────────────────────────────────────────────────────────────────────────────── + +/// Lower a `repos:` list into the internal `(Vec, Vec)` pair +/// consumed by the rest of the compiler. Also validates aliases for collisions. +pub fn lower_repos(items: &[ReposItem]) -> Result<(Vec, Vec)> { + let mut repositories: Vec = Vec::new(); + let mut checkout: Vec = Vec::new(); + let mut seen_aliases: HashSet = HashSet::new(); + + for item in items { + let (name, alias, repo_type, repo_ref, do_checkout) = match item { + ReposItem::Shorthand(s) => { + let (alias, name) = parse_shorthand(s)?; + (name, alias, "git".to_string(), "refs/heads/main".to_string(), true) + } + ReposItem::Full(entry) => { + let alias = match &entry.alias { + Some(a) => a.clone(), + None => derive_alias(&entry.name)?, + }; + ( + entry.name.clone(), + alias, + entry.repo_type.clone(), + entry.repo_ref.clone(), + entry.checkout, + ) + } + }; + + // Reject duplicate aliases + if !seen_aliases.insert(alias.clone()) { + anyhow::bail!( + "Duplicate repository alias '{}' in repos. \ + Use the `alias` field (or `alias=org/repo` shorthand) to disambiguate.", + alias + ); + } + + // Reject reserved names + if RESERVED_WORKSPACE_NAMES.contains(&alias.as_str()) { + anyhow::bail!( + "Repository alias '{}' is reserved by the 'workspace:' resolver ({:?}). \ + Rename the alias to avoid ambiguity.", + alias, + RESERVED_WORKSPACE_NAMES + ); + } + + repositories.push(Repository { + repository: alias.clone(), + repo_type, + name, + repo_ref, + }); + + if do_checkout { + checkout.push(alias); + } + } + + Ok((repositories, checkout)) +} + +/// Parse a shorthand string: `"org/repo"` → (derived alias, name), or +/// `"alias=org/repo"` → (alias, name). +fn parse_shorthand(s: &str) -> Result<(String, String)> { + if let Some((alias, name)) = s.split_once('=') { + let alias = alias.trim().to_string(); + let name = name.trim().to_string(); + if alias.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty alias before '='", s); + } + if name.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty name after '='", s); + } + Ok((alias, name)) + } else { + let alias = derive_alias(s)?; + Ok((alias, s.to_string())) + } +} + +/// Derive the alias from a full `org/repo` name (last path segment). +fn derive_alias(name: &str) -> Result { + // Trim trailing slashes to handle "org/repo/" gracefully + let trimmed = name.trim_end_matches('/'); + let alias = trimmed + .rsplit('/') + .next() + .unwrap_or(trimmed) + .to_string(); + if alias.is_empty() { + anyhow::bail!( + "Cannot derive a repository alias from '{}'. \ + Provide an explicit `alias` field.", + name + ); + } + Ok(alias) +} + +/// Resolve the `repos:` field in a `FrontMatter` into the canonical +/// `(Vec, Vec)` pair consumed by the rest of the compiler. +/// +/// The legacy `repositories:` + `checkout:` fields are converted to `repos:` +/// by the `repos_unified` codemod (`src/compile/codemods/0001_repos_unified.rs`) +/// before typed deserialization, so by the time this function runs the only +/// shape it sees is `repos:`. +pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec, Vec)> { + if front_matter.repos.is_empty() { + return Ok((Vec::new(), Vec::new())); + } + lower_repos(&front_matter.repos) +} + /// Names that are reserved by the `workspace:` resolver and therefore cannot /// be used as repository aliases / `checkout:` entries. If a user defines a /// repository named `repo` and writes `workspace: repo`, the special-cased @@ -5942,4 +6060,230 @@ mod tests { assert!(out.contains("name: org/repo-a"), "out: {out}"); assert!(out.contains("ref: refs/heads/develop"), "out: {out}"); } + + // ────────────────────────────────────────────────────────────────────── + // Tests for compact `repos:` lowering + // ────────────────────────────────────────────────────────────────────── + + use super::{lower_repos, resolve_repos, parse_shorthand, derive_alias}; + use crate::compile::types::{ReposItem, RepoEntry}; + + #[test] + fn test_repos_shorthand_simple() { + let items = vec![ReposItem::Shorthand("my-org/my-repo".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "my-repo"); + assert_eq!(repos[0].name, "my-org/my-repo"); + assert_eq!(repos[0].repo_type, "git"); + assert_eq!(repos[0].repo_ref, "refs/heads/main"); + assert_eq!(checkout, vec!["my-repo"]); + } + + #[test] + fn test_repos_shorthand_with_alias() { + let items = vec![ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "schemas"); + assert_eq!(repos[0].name, "my-org/internal-schemas"); + assert_eq!(checkout, vec!["schemas"]); + } + + #[test] + fn test_repos_object_form_defaults() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs"); + assert_eq!(repos[0].name, "my-org/docs"); + assert_eq!(checkout, vec!["docs"]); + } + + #[test] + fn test_repos_object_form_no_checkout() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/big-monorepo".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "big-monorepo"); + assert!(checkout.is_empty()); + } + + #[test] + fn test_repos_object_form_custom_ref() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: Some("docs-v2".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/release/2.x".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs-v2"); + assert_eq!(repos[0].repo_ref, "refs/heads/release/2.x"); + assert_eq!(checkout, vec!["docs-v2"]); + } + + #[test] + fn test_repos_rejects_duplicate_aliases() { + let items = vec![ + ReposItem::Shorthand("org/tools".to_string()), + ReposItem::Shorthand("other-org/tools".to_string()), + ]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("Duplicate repository alias 'tools'"), "{err}"); + } + + #[test] + fn test_repos_rejects_reserved_alias() { + let items = vec![ReposItem::Shorthand("org/self".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + let items = vec![ReposItem::Shorthand("org/repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in shorthand form + let items = vec![ReposItem::Shorthand("self=org/some-repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in object form + let items = vec![ReposItem::Full(RepoEntry { + name: "org/fine-repo".to_string(), + alias: Some("root".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + } + + #[test] + fn test_repos_multiple_mixed() { + let items = vec![ + ReposItem::Shorthand("my-org/tools".to_string()), + ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string()), + ReposItem::Full(RepoEntry { + name: "my-org/templates".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + }), + ]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(checkout, vec!["tools", "schemas"]); + assert_eq!(repos[2].repository, "templates"); + } + + #[test] + fn test_parse_shorthand_simple() { + let (alias, name) = parse_shorthand("org/my-repo").unwrap(); + assert_eq!(alias, "my-repo"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_with_equals() { + let (alias, name) = parse_shorthand("custom=org/my-repo").unwrap(); + assert_eq!(alias, "custom"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_empty_alias_rejected() { + let err = parse_shorthand("=org/repo").unwrap_err(); + assert!(err.to_string().contains("empty alias"), "{err}"); + } + + #[test] + fn test_parse_shorthand_empty_name_rejected() { + let err = parse_shorthand("alias=").unwrap_err(); + assert!(err.to_string().contains("empty name"), "{err}"); + } + + #[test] + fn test_derive_alias_basic() { + assert_eq!(derive_alias("org/my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("a/b/c").unwrap(), "c"); + } + + #[test] + fn test_derive_alias_trailing_slash() { + // Trailing slash should be trimmed gracefully + assert_eq!(derive_alias("org/repo/").unwrap(), "repo"); + } + + #[test] + fn test_resolve_repos_empty() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert!(repos.is_empty()); + assert!(checkout.is_empty()); + } + + #[test] + fn test_resolve_repos_compact_syntax() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repos: + - my-org/tools + - schemas=my-org/internal-schemas + - name: my-org/docs + ref: refs/heads/v2 + checkout: false +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(repos[0].name, "my-org/tools"); + assert_eq!(repos[1].repository, "schemas"); + assert_eq!(repos[1].name, "my-org/internal-schemas"); + assert_eq!(repos[2].repository, "docs"); + assert_eq!(repos[2].repo_ref, "refs/heads/v2"); + assert_eq!(checkout, vec!["tools", "schemas"]); + } + + #[test] + fn test_resolve_repos_legacy_via_codemod() { + // Legacy `repositories:` + `checkout:` now flow through the + // `repos_unified` codemod and arrive at typed deserialization + // already in the unified `repos:` shape. + use crate::compile::parse_markdown; + let source = "---\n\ + name: test\n\ + description: test\n\ + repositories:\n - repository: tools\n type: git\n name: my-org/tools\n\ + checkout:\n - tools\n\ + ---\nbody\n"; + let (fm, _) = parse_markdown(source).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(checkout, vec!["tools"]); + } } diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 63bfa15a..58cbfbf7 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -7,6 +7,7 @@ //! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; +pub(crate) use common::resolve_repos; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; @@ -150,9 +151,14 @@ async fn compile_pipeline_inner( debug!("Target: {:?}", front_matter.target); debug!("Engine: {} (model: {})", front_matter.engine.engine_id(), front_matter.engine.model().unwrap_or("default")); debug!("Schedule: {:?}", front_matter.schedule()); - debug!("Repositories: {}", front_matter.repositories.len()); debug!("MCP servers configured: {}", front_matter.mcp_servers.len()); + // Resolve repos: new compact syntax or legacy repositories: + checkout: + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + debug!("Repositories: {}", front_matter.repositories.len()); + // Validate checkout list against repositories common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; @@ -529,6 +535,11 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); + // Resolve repos (compact or legacy) + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; let compiler: Box = match front_matter.target { diff --git a/src/compile/types.rs b/src/compile/types.rs index 46ddb8c0..0cac5d3a 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -602,11 +602,20 @@ pub struct FrontMatter { /// Runtime configuration for language environments (e.g., Lean 4) #[serde(default)] pub runtimes: Option, - /// Additional repository resources - #[serde(default)] + /// Compact repository declarations. + /// Each entry declares a repository resource and optionally whether to check it out. + #[serde(default)] + pub repos: Vec, + /// Lowered `Vec` form, populated by `lower_repos()` in + /// `compile/common.rs` after the codemod registry has converted any + /// legacy `repositories:` + `checkout:` shape into the unified + /// `repos:` shape. Not deserialized from YAML directly. + #[serde(skip)] pub repositories: Vec, - /// Repositories to checkout (subset of repositories) - #[serde(default)] + /// Lowered checkout-alias list, populated by `lower_repos()` from + /// `repos:` entries with `checkout: true`. Not deserialized from + /// YAML directly. + #[serde(skip)] pub checkout: Vec, /// MCP server configurations #[serde(default, rename = "mcp-servers")] @@ -697,6 +706,9 @@ impl SanitizeConfigTrait for FrontMatter { if let Some(ref mut r) = self.runtimes { r.sanitize_config_fields(); } + for item in &mut self.repos { + item.sanitize(); + } for repo in &mut self.repositories { repo.sanitize_config_fields(); } @@ -795,6 +807,99 @@ fn default_ref() -> String { "refs/heads/main".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` syntax — a single block that replaces both `repositories:` +// and `checkout:` with sensible defaults. +// ────────────────────────────────────────────────────────────────────────────── + +/// Object form for a `repos:` entry. +#[derive(Debug, Deserialize, Clone)] +pub struct RepoEntry { + /// Full repo name in the form `org/repo` (maps to ADO `name:`). + pub name: String, + /// Optional alias (maps to ADO `repository:`). Defaults to the last segment of `name`. + #[serde(default)] + pub alias: Option, + /// ADO repository resource type. Defaults to `"git"`. + #[serde(default = "default_repo_type", rename = "type")] + pub repo_type: String, + /// Branch/tag ref. Defaults to `"refs/heads/main"`. + #[serde(default = "default_ref", rename = "ref")] + pub repo_ref: String, + /// Whether the agent job checks out this repository. Defaults to `true`. + #[serde(default = "default_checkout")] + pub checkout: bool, +} + +fn default_repo_type() -> String { + "git".to_string() +} + +fn default_checkout() -> bool { + true +} + +/// A single item in the `repos:` list — either a string shorthand or an object. +#[derive(Debug, Clone)] +pub enum ReposItem { + /// String shorthand: `"org/repo"` or `"alias=org/repo"`. + Shorthand(String), + /// Full object form with explicit fields. + Full(RepoEntry), +} + +impl<'de> Deserialize<'de> for ReposItem { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + use serde::de; + + struct ReposItemVisitor; + + impl<'de> de::Visitor<'de> for ReposItemVisitor { + type Value = ReposItem; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string shorthand (\"org/repo\" or \"alias=org/repo\") or an object with at least a `name` field") + } + + fn visit_str(self, value: &str) -> std::result::Result { + Ok(ReposItem::Shorthand(value.to_string())) + } + + fn visit_map(self, map: M) -> std::result::Result + where + M: de::MapAccess<'de>, + { + let entry = RepoEntry::deserialize(de::value::MapAccessDeserializer::new(map))?; + Ok(ReposItem::Full(entry)) + } + } + + deserializer.deserialize_any(ReposItemVisitor) + } +} + +impl ReposItem { + /// Sanitize all user-provided fields through `sanitize_config`. + pub fn sanitize(&mut self) { + match self { + ReposItem::Shorthand(s) => { + *s = crate::sanitize::sanitize_config(s); + } + ReposItem::Full(entry) => { + entry.name = crate::sanitize::sanitize_config(&entry.name); + if let Some(ref mut a) = entry.alias { + *a = crate::sanitize::sanitize_config(a); + } + entry.repo_type = crate::sanitize::sanitize_config(&entry.repo_type); + entry.repo_ref = crate::sanitize::sanitize_config(&entry.repo_ref); + } + } + } +} + /// MCP configuration - can be `true` for simple enablement or an object with options #[derive(Debug, Deserialize, Clone)] #[serde(untagged)] diff --git a/src/main.rs b/src/main.rs index fa6253c8..6dcb07c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -244,7 +244,19 @@ async fn run_execute( ); } - let front_matter = parsed.front_matter; + let mut front_matter = parsed.front_matter; + + // Sanitize before lowering repos, mirroring compile_pipeline_inner + // and check_pipeline so unsanitized fields never flow into the + // execution context. + use crate::sanitize::SanitizeConfig; + front_matter.sanitize_config_fields(); + + // Resolve compact repos: syntax into the legacy fields for execution + let (resolved_repos, resolved_checkout) = compile::resolve_repos(&front_matter) + .with_context(|| "Failed to resolve repository configuration")?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; println!("Loaded tool configs from: {}", source.display()); diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 5f62a873..7d3a55c4 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -324,8 +324,8 @@ fn test_fixture_complete_agent() { assert!(content.contains("description:"), "Should have description"); assert!(content.contains("schedule:"), "Should have schedule"); assert!( - content.contains("repositories:"), - "Should have repositories" + content.contains("repos:"), + "Should have repos" ); assert!( content.contains("mcp-servers:"), diff --git a/tests/fixtures/complete-agent.md b/tests/fixtures/complete-agent.md index 44b497f9..8e38d797 100644 --- a/tests/fixtures/complete-agent.md +++ b/tests/fixtures/complete-agent.md @@ -1,39 +1,35 @@ --- -name: "Complete Test Agent" -description: "A complete test agent with all features enabled" +name: Complete Test Agent +description: A complete test agent with all features enabled on: schedule: daily around 14:00 -repositories: - - repository: test-repo-1 - type: git - name: test-org/test-repo-1 - ref: refs/heads/main - - repository: test-repo-2 - type: git - name: test-org/test-repo-2 - ref: refs/heads/develop -checkout: - - test-repo-1 +teardown: +- bash: echo "Teardown step" + displayName: Teardown +setup: +- bash: echo "Setup step" + displayName: Setup mcp-servers: ado: true es-chat: true bluebird: true icm: allowed: - - create_incident - - get_incident + - create_incident + - get_incident kusto: allowed: - - query + - query custom-tool: - container: "node:20-slim" - entrypoint: "node" - entrypoint-args: ["server.js"] + container: node:20-slim + entrypoint: node + entrypoint-args: + - server.js allowed: - - custom_function_1 - - custom_function_2 + - custom_function_1 + - custom_function_2 env: - NODE_ENV: "test" + NODE_ENV: test permissions: read: my-read-arm-connection write: my-write-arm-connection @@ -43,17 +39,21 @@ safe-outputs: create-work-item: work-item-type: Task steps: - - bash: echo "Preparing context" - displayName: "Prepare context" +- bash: echo "Preparing context" + displayName: Prepare context post-steps: - - bash: echo "Finalizing" - displayName: "Finalize" -setup: - - bash: echo "Setup step" - displayName: "Setup" -teardown: - - bash: echo "Teardown step" - displayName: "Teardown" +- bash: echo "Finalizing" + displayName: Finalize +repos: +- alias: test-repo-1 + type: git + name: test-org/test-repo-1 + ref: refs/heads/main +- alias: test-repo-2 + type: git + name: test-org/test-repo-2 + ref: refs/heads/develop + checkout: false --- ## Complete Test Agent