diff --git a/.github/workflows/ado-script.yml b/.github/workflows/ado-script.yml index ac271372..781e95aa 100644 --- a/.github/workflows/ado-script.yml +++ b/.github/workflows/ado-script.yml @@ -39,9 +39,9 @@ jobs: - name: Verify generated TypeScript is up to date run: | - if ! git diff --exit-code -- scripts/ado-script/src/shared/types.gen.ts; then + if ! git diff --exit-code -- scripts/ado-script/src/shared/types.gen.ts scripts/ado-script/src/shared/types-prompt.gen.ts; then echo "" - echo "::error::types.gen.ts is out of date with the Rust IR." + echo "::error::Generated TS types are out of date with the Rust IR." echo "Run 'cd scripts/ado-script && npm run codegen' and commit the result." exit 1 fi diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index deeb371a..3075277d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -69,8 +69,8 @@ jobs: npm ci npm run build # `npm run build` runs codegen + ncc + copies dist/gate/index.js - # to ../gate.js (i.e. scripts/gate.js), which is then included in - # scripts.zip by the next step. + # to ../gate.js and dist/prompt/index.js to ../prompt.js, both of + # which are then included in scripts.zip by the next step. - name: Package scripts bundle run: | diff --git a/.gitignore b/.gitignore index 78d855eb..77bbec01 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target examples/sample-agent.yml scripts/gate.js +scripts/prompt.js *.pyc __pycache__/ diff --git a/AGENTS.md b/AGENTS.md index 8b8743e9..516a6b58 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -55,6 +55,7 @@ Every compiled pipeline runs as three sequential jobs: │ │ ├── gitattributes.rs # .gitattributes management for compiled pipelines │ │ ├── filter_ir.rs # Filter expression IR: Fact/Predicate types, lowering, validation, codegen │ │ ├── pr_filters.rs # PR trigger filter generation (native ADO + gate steps) +│ │ ├── prompt_ir.rs # PromptSpec IR: schemars-typed runtime contract for prompt.js │ │ ├── extensions/ # CompilerExtension trait and infrastructure extensions │ │ │ ├── mod.rs # Trait, Extension enum, collect_extensions(), re-exports │ │ │ ├── github.rs # Always-on GitHub MCP extension @@ -120,8 +121,9 @@ Every compiled pipeline runs as three sequential jobs: ├── ado-aw-derive/ # Proc-macro crate: #[derive(SanitizeConfig)], #[derive(SanitizeContent)] ├── examples/ # Example agent definitions ├── scripts/ # Supporting scripts shipped as release artifacts -│ ├── ado-script/ # TypeScript workspace for bundled gate.js (and future bundles) -│ └── gate.js # Bundled gate evaluator (built from scripts/ado-script/, see docs/ado-script.md) +│ ├── ado-script/ # TypeScript workspace for bundled gate.js + prompt.js +│ ├── gate.js # Bundled gate evaluator (Setup job; see docs/ado-script.md) +│ └── prompt.js # Bundled prompt renderer (Agent job; see docs/ado-script.md) ├── tests/ # Integration tests and fixtures ├── docs/ # Per-concept reference documentation (see index below) ├── Cargo.toml # Rust dependencies @@ -133,7 +135,7 @@ Every compiled pipeline runs as three sequential jobs: - **Language**: Rust (2024 edition) - Note: Rust 2024 edition exists and is the edition used by this project - **CLI Framework**: clap v4 with derive macros - **Error Handling**: anyhow for ergonomic error propagation -- **Bundled scripts**: TypeScript + ncc (`scripts/ado-script/`) — compiled gate evaluator and future internal helpers; see [`docs/ado-script.md`](docs/ado-script.md). +- **Bundled scripts**: TypeScript + ncc (`scripts/ado-script/`) — compiled gate evaluator (`gate.js`), prompt renderer (`prompt.js`), and future internal helpers; see [`docs/ado-script.md`](docs/ado-script.md). - **Async Runtime**: tokio with full features - **YAML Parsing**: serde_yaml - **MCP Server**: rmcp with server and transport-io features @@ -185,8 +187,9 @@ index to jump to the right page. specification: `Fact`/`Predicate` types, three-pass compilation (lower → validate → codegen), gate step generation, adding new filter types. - [`docs/ado-script.md`](docs/ado-script.md) — `ado-script` workspace - (`scripts/ado-script/`): the bundled TypeScript runtime helpers (today: - `gate.js`), schemars-driven type codegen, and the A2 design decision. + (`scripts/ado-script/`): the bundled TypeScript runtime helpers + (`gate.js` for trigger gates, `prompt.js` for runtime prompt + rendering), schemars-driven type codegen, and the A2 design decision. - [`docs/local-development.md`](docs/local-development.md) — local development setup notes. diff --git a/docs/ado-script.md b/docs/ado-script.md index 90317a41..0f60ee4d 100644 --- a/docs/ado-script.md +++ b/docs/ado-script.md @@ -2,8 +2,13 @@ `ado-script` is the umbrella name for **internal**, compiler-targeted TypeScript bundles that ado-aw emits into compiled pipelines as runtime -helpers. The first (and currently only) bundle is **`gate.js`**, the -trigger-filter gate evaluator. +helpers. There are currently two bundles: + +- **`gate.js`** — the trigger-filter gate evaluator (Setup job). +- **`prompt.js`** — the runtime prompt renderer that reads the agent + `.md` from the workspace, strips front matter, applies variable + substitution, and assembles the prompt with extension supplements + (Agent job; default behaviour as of v0.22). > Internal-only: `ado-script` is not a user-facing front-matter feature. > Authors do **not** write `ado-script:` blocks in their agent markdown. @@ -46,26 +51,34 @@ scripts/ado-script/ # TS workspace ├── tsconfig.json # NodeNext, ESNext target ├── src/ │ ├── shared/ # Reusable across all bundles -│ │ ├── types.gen.ts # AUTO-GENERATED from Rust IR +│ │ ├── types.gen.ts # AUTO-GENERATED from Rust IR (gate) +│ │ ├── types-prompt.gen.ts # AUTO-GENERATED from Rust IR (prompt) │ │ ├── auth.ts # ADO token / collection URI plumbing │ │ ├── ado-client.ts # azure-devops-node-api wrapper + retries │ │ ├── env-facts.ts # Pipeline-variable readers │ │ ├── policy.ts # Failure-policy state machine │ │ └── vso-logger.ts # ##vso[…] command emitters -│ └── gate/ # gate.js entry point +│ ├── gate/ # gate.js entry point +│ │ ├── index.ts # main() +│ │ ├── bypass.ts # build-reason auto-pass +│ │ ├── facts.ts # fact acquisition (env + REST) +│ │ ├── predicates.ts # 11 predicate evaluators +│ │ └── selfcancel.ts # best-effort build cancellation +│ └── prompt/ # prompt.js entry point │ ├── index.ts # main() -│ ├── bypass.ts # build-reason auto-pass -│ ├── facts.ts # fact acquisition (env + REST) -│ ├── predicates.ts # 11 predicate evaluators -│ └── selfcancel.ts # best-effort build cancellation -├── test/ # End-to-end smoke tests -└── dist/gate/index.js # ncc-bundled output (gitignored) +│ ├── frontmatter.ts # YAML front-matter stripper +│ └── substitute.ts # ${{ parameters.* }} / $(VAR) substitution +├── test/ # End-to-end smoke tests (gate + prompt) +└── dist/ # ncc-bundled output (gitignored) + ├── gate/index.js + └── prompt/index.js ``` The release workflow (`.github/workflows/release.yml`) runs `npm ci && -npm run build` and copies `dist/gate/index.js` to `scripts/gate.js`, -which is then included in the `scripts.zip` release asset that pipelines -download at runtime. +npm run build` and copies `dist/gate/index.js` to `scripts/gate.js` and +`dist/prompt/index.js` to `scripts/prompt.js`, both of which are then +included in the `scripts.zip` release asset that pipelines download at +runtime. ## Schema codegen — preventing drift @@ -91,16 +104,20 @@ The pipeline: └──────────────────────────────┘ ``` -`npm run codegen` runs both stages. The `ado-script` CI workflow -(`.github/workflows/ado-script.yml`) regenerates the file and runs +`npm run codegen` runs the full pipeline: it exports both the +`gate-spec.schema.json` and `prompt-spec.schema.json` from Rust, then +runs `json2ts` to regenerate `src/shared/types.gen.ts` (gate) and +`src/shared/types-prompt.gen.ts` (prompt). The `ado-script` CI workflow +(`.github/workflows/ado-script.yml`) regenerates both files and runs `git diff --exit-code` to fail on drift. If you change the IR shape in Rust, you must run `cd scripts/ado-script && npm run codegen` and -commit the regenerated `types.gen.ts`. +commit the regenerated `*.gen.ts` files. -The Rust subcommand that emits the schema is intentionally hidden: +The Rust subcommands that emit the schemas are intentionally hidden: ```sh -cargo run -- export-gate-schema --output schema/gate-spec.schema.json +cargo run -- export-gate-schema --output schema/gate-spec.schema.json +cargo run -- export-prompt-schema --output schema/prompt-spec.schema.json ``` ## How the gate bundle is wired into emitted pipelines @@ -120,6 +137,30 @@ steps when any `filters:` block is active: The IR-to-bash codegen lives in `compile_gate_step_external` (`src/compile/filter_ir.rs:~1100`). +## How the prompt bundle is wired into emitted pipelines + +`generate_prepare_agent_prompt` in `src/compile/common.rs` injects a +parallel three-step bundle into the **Agent job** when +`inlined-imports: false` (the default): + +1. **`NodeTool@0`** — same Node 20.x install as for `gate.js`. + Idempotent across multiple invocations in the same job. +2. **`curl` download** — fetches `scripts.zip` and extracts + `prompt.js` to `/tmp/ado-aw-scripts/prompt.js`. Each pool agent + downloads its own copy; the Setup and Agent jobs run on independent + agents so the download isn't shared. +3. **`bash: node '/tmp/ado-aw-scripts/prompt.js'`** — runs the renderer + with `ADO_AW_PROMPT_SPEC` (base64 JSON of the `PromptSpec`) plus one + `ADO_AW_PARAM_: ${{ parameters. }}` env per declared + parameter. + +When `inlined-imports: true`, the same helper instead emits the legacy +heredoc step that embeds the prompt body and supplements directly into +the YAML; `prompt.js` is not invoked. + +Both download steps share the helper `scripts_download_step()` in +`src/compile/extensions/mod.rs` so URL/version stay in lockstep. + ## Adding a new internal use site Suppose we want a `poll.js` bundle (e.g. for polling external systems): @@ -132,17 +173,28 @@ Suppose we want a `poll.js` bundle (e.g. for polling external systems): ``` and extend `build` to also run it and copy `dist/poll/index.js` to `../poll.js`. -3. Add tests under `src/poll/__tests__/`. +3. Add tests under `src/poll/__tests__/` (unit) and `test/` (smoke, + gated on `dist/poll/index.js` existing). 4. Wire from a new `CompilerExtension` (or extend an existing one) that - downloads and invokes `poll.js` as a runtime step. -5. Update `.github/workflows/release.yml` if the zip exclusion list + downloads and invokes `poll.js` as a runtime step. Use the shared + `scripts_download_step()` and `node_tool_step()` helpers. +5. If the contract is non-trivial, follow the `gate.js` / + `prompt.js` pattern: define a `Spec` struct in Rust with + `#[derive(Serialize, JsonSchema)]`, add a hidden + `export-poll-schema` CLI, and extend `npm run codegen` to regenerate + `types-poll.gen.ts`. For trivial contracts (a couple of env vars), + hand-written types are fine. +6. Update `.github/workflows/release.yml` if the zip exclusion list needs to include the new `dist/poll` directory. ## Bundle-size budget -Each bundled artifact must stay **under 5 MB**. The current `gate.js` is -~1.1 MB, dominated by `azure-devops-node-api`. If a future bundle blows -the budget: +Each bundled artifact must stay **under 5 MB**. Current sizes: + +- `gate.js` is ~1.1 MB, dominated by `azure-devops-node-api`. +- `prompt.js` is ~5 KB (no SDK dependency). + +If a future bundle blows the budget: - First, check ncc's `--minify` and `--target` flags. - If still too large, weigh dropping the SDK in favor of hand-rolled diff --git a/docs/extending.md b/docs/extending.md index f4a235f2..ef2c72ed 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -53,6 +53,18 @@ Execution job (before the agent runs). `setup_steps()` injects into the Setup job (before the Execution job starts). Use `setup_steps()` for pre-activation gates or checks that must complete before the agent is launched. +**`prompt_supplement()` and runtime substitution**: the markdown returned +from this method is appended to the agent prompt at runtime by the +`prompt.js` ado-script bundle. It is rendered through the same +substitution pipeline as the user's prompt body, so supplements may +contain `${{ parameters.NAME }}` (for declared parameters) and +`$(VAR)` / `$(VAR.SUB)` references that will be resolved at pipeline +runtime. See `docs/template-markers.md` for the full substitution +contract. When `inlined-imports: true`, the supplement is instead +embedded into the YAML at compile time via `wrap_prompt_append` and ADO +substitution rules apply (no parameters, no runtime variable +substitution). + **Phase ordering**: Extensions are sorted by phase — runtimes (`ExtensionPhase::Runtime`) execute before tools (`ExtensionPhase::Tool`). This guarantees runtime install steps run before tool steps that may depend diff --git a/docs/front-matter.md b/docs/front-matter.md index 0a1fe5dc..0b3adeac 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -135,6 +135,7 @@ parameters: # optional ADO runtime parameters (surfaced in UI displayName: "Clear agent memory" type: boolean default: false +inlined-imports: false # opt out of dynamic prompt injection (default: false) --- @@ -143,6 +144,33 @@ parameters: # optional ADO runtime parameters (surfaced in UI Build the project and run all tests... ``` +## Dynamic Prompt Injection (`inlined-imports`) + +By default, the agent's prompt body is **not** embedded in the compiled +pipeline YAML. Instead, the pipeline reads the source `.md` from the +checked-out workspace at runtime, strips its front matter, applies +variable substitution, and assembles the final prompt in a step backed +by the `prompt.js` ado-script bundle. This means body-only edits to the +agent's `.md` no longer require recompiling the pipeline. + +Set `inlined-imports: true` to opt out and embed the prompt body and +extension supplements directly into the YAML at compile time (legacy +behaviour). Use the inlined form when: + +- The Agent pool can't reach `github.com` to download `scripts.zip`. +- You need a fully self-contained pipeline file (offline archival). +- You want to inspect the exact rendered prompt by reading the YAML. + +Substitution patterns recognised at runtime by `prompt.js` (default +mode only): + +| Pattern | Resolved via | Notes | +|-------------------------------|------------------------------------------------------|---------------------------------------------------------------------------------------------| +| `${{ parameters.NAME }}` | env `ADO_AW_PARAM_` | Only declared parameters substitute; others left verbatim with a warning. | +| `$(VAR)` / `$(VAR.SUB)` | env `` (ADO native) | Unset variables left verbatim with a warning. Secrets are not auto-exposed and stay verbatim. | +| `$[ ... ]` | not substituted | Left verbatim with one warning per render. | +| `\$(...)` | escape | Backslash stripped; `$(...)` left literal. | + ## Workspace Defaults The `workspace:` field controls which directory the agent runs in. When it is diff --git a/docs/template-markers.md b/docs/template-markers.md index e18f7ac3..70c071b9 100644 --- a/docs/template-markers.md +++ b/docs/template-markers.md @@ -305,9 +305,29 @@ resources: - release/* ``` -## {{ agent_content }} +## {{ prepare_agent_prompt }} -Should be replaced with the markdown body (agent instructions) extracted from the source markdown file, excluding the YAML front matter. This content provides the agent with its task description and guidelines. +Replaced with the YAML step(s) that prepare `/tmp/awf-tools/agent-prompt.md` for the agent. + +The expansion depends on the `inlined-imports` front-matter field: + +- **`inlined-imports: false`** (default) — emits a three-step bundle: + 1. `NodeTool@0` to install Node 20.x. + 2. A `curl` download of `scripts.zip` from the matching `githubnext/ado-aw` release, extracting `prompt.js` to `/tmp/ado-aw-scripts/prompt.js`. + 3. A bash step that runs `node /tmp/ado-aw-scripts/prompt.js` with `ADO_AW_PROMPT_SPEC` (a base64-encoded `PromptSpec` JSON) and one `ADO_AW_PARAM_: ${{ parameters. }}` env entry per declared parameter. The renderer reads the source `.md` from the workspace, strips its front matter, applies variable substitution, appends extension supplements, and writes the result to `/tmp/awf-tools/agent-prompt.md`. + + Substitution patterns recognised at runtime by `prompt.js`: + + | Pattern | Resolved via | Notes | + |-------------------------------|------------------------------------------------------|---------------------------------------------------------------------------------------------| + | `${{ parameters.NAME }}` | env `ADO_AW_PARAM_` | Only declared parameters substitute; others left verbatim with a warning. | + | `$(VAR)` / `$(VAR.SUB)` | env `` (ADO native) | Unset variables left verbatim with a warning. Secrets aren't auto-exposed and stay verbatim. | + | `$[ ... ]` | not substituted | Left verbatim with one warning per render. | + | `\$(...)` | escape | Backslash stripped; `$(...)` left literal. | + +- **`inlined-imports: true`** — emits a single legacy heredoc step that writes the markdown body verbatim into `/tmp/awf-tools/agent-prompt.md`. Extension prompt supplements are emitted as separate `cat >>` heredoc steps via `wrap_prompt_append`. No variable substitution beyond what ADO macros already do natively. + +This marker replaces the older `{{ agent_content }}` placeholder, which is no longer emitted by the compiler. ## {{ mcpg_config }} diff --git a/scripts/ado-script/package.json b/scripts/ado-script/package.json index 58837669..b55a0675 100644 --- a/scripts/ado-script/package.json +++ b/scripts/ado-script/package.json @@ -7,10 +7,11 @@ "node": ">=20.0.0" }, "scripts": { - "build": "npm run codegen && npm run build:gate && cp dist/gate/index.js ../gate.js", + "build": "npm run codegen && npm run build:gate && npm run build:prompt && cp dist/gate/index.js ../gate.js && cp dist/prompt/index.js ../prompt.js", "build:gate": "ncc build src/gate/index.ts -o dist/gate -m -t", - "build:check": "ls -lh ../gate.js && wc -c ../gate.js", - "codegen": "mkdir -p schema && cargo run --quiet --manifest-path ../../Cargo.toml -- export-gate-schema --output schema/gate-spec.schema.json && npx json2ts schema/gate-spec.schema.json -o src/shared/types.gen.ts --bannerComment \"// AUTO-GENERATED from Rust IR via cargo run -- export-gate-schema. Do not edit; run npm run codegen.\"", + "build:prompt": "ncc build src/prompt/index.ts -o dist/prompt -m -t", + "build:check": "ls -lh ../gate.js ../prompt.js && wc -c ../gate.js ../prompt.js", + "codegen": "mkdir -p schema && cargo run --quiet --manifest-path ../../Cargo.toml -- export-gate-schema --output schema/gate-spec.schema.json && cargo run --quiet --manifest-path ../../Cargo.toml -- export-prompt-schema --output schema/prompt-spec.schema.json && npx json2ts schema/gate-spec.schema.json -o src/shared/types.gen.ts --bannerComment \"// AUTO-GENERATED from Rust IR via cargo run -- export-gate-schema. Do not edit; run npm run codegen.\" && npx json2ts schema/prompt-spec.schema.json -o src/shared/types-prompt.gen.ts --bannerComment \"// AUTO-GENERATED from Rust IR via cargo run -- export-prompt-schema. Do not edit; run npm run codegen.\"", "test": "vitest run", "test:smoke": "npm run build && vitest run -c vitest.config.smoke.ts", "lint": "echo TODO", diff --git a/scripts/ado-script/src/prompt/__tests__/frontmatter.test.ts b/scripts/ado-script/src/prompt/__tests__/frontmatter.test.ts new file mode 100644 index 00000000..42573a03 --- /dev/null +++ b/scripts/ado-script/src/prompt/__tests__/frontmatter.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from "vitest"; +import { stripFrontMatter } from "../frontmatter.js"; + +describe("stripFrontMatter", () => { + it("strips a basic front matter block", () => { + const src = "---\nname: foo\n---\nbody line\n"; + expect(stripFrontMatter(src)).toBe("body line\n"); + }); + + it("returns content unchanged when no front matter", () => { + expect(stripFrontMatter("hello\nworld\n")).toBe("hello\nworld\n"); + }); + + it("handles CRLF line endings", () => { + const src = "---\r\nname: foo\r\n---\r\nbody\r\n"; + expect(stripFrontMatter(src)).toBe("body\r\n"); + }); + + it("strips a UTF-8 BOM and the front matter", () => { + const src = "\uFEFF---\nname: foo\n---\nbody\n"; + expect(stripFrontMatter(src)).toBe("body\n"); + }); + + it("throws on unterminated front matter", () => { + expect(() => stripFrontMatter("---\nname: foo\n")).toThrow( + /Unterminated/, + ); + }); + + it("returns empty string for front-matter-only input", () => { + expect(stripFrontMatter("---\nname: foo\n---\n")).toBe(""); + }); + + it("preserves multiple body paragraphs", () => { + const src = "---\nname: x\n---\nfirst\n\nsecond paragraph\n"; + expect(stripFrontMatter(src)).toBe("first\n\nsecond paragraph\n"); + }); + + it("does not strip --- lines that appear later in the body", () => { + const src = "no front matter here\n---\nstill body\n"; + // No opening fence at byte 0 → return unchanged. + expect(stripFrontMatter(src)).toBe(src); + }); +}); diff --git a/scripts/ado-script/src/prompt/__tests__/substitute.test.ts b/scripts/ado-script/src/prompt/__tests__/substitute.test.ts new file mode 100644 index 00000000..118b9f23 --- /dev/null +++ b/scripts/ado-script/src/prompt/__tests__/substitute.test.ts @@ -0,0 +1,113 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { substitute } from "../substitute.js"; + +describe("substitute", () => { + const originalEnv = process.env; + + beforeEach(() => { + process.env = { ...originalEnv }; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + it("substitutes a declared parameter", () => { + process.env.ADO_AW_PARAM_TARGET = "main"; + const out = substitute( + "branch: ${{ parameters.target }}", + ["target"], + () => {}, + ); + expect(out).toBe("branch: main"); + }); + + it("converts hyphen in param name to underscore in env name", () => { + process.env.ADO_AW_PARAM_DRY_RUN = "yes"; + const out = substitute( + "x: ${{ parameters.dry-run }}", + ["dry-run"], + () => {}, + ); + expect(out).toBe("x: yes"); + }); + + it("leaves an undeclared parameter verbatim with a warning", () => { + const warnings: string[] = []; + const out = substitute( + "x: ${{ parameters.unknown }}", + [], + (m) => warnings.push(m), + ); + expect(out).toBe("x: ${{ parameters.unknown }}"); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0]).toContain("unknown"); + }); + + it("leaves a declared but unmapped parameter verbatim with a warning", () => { + delete process.env.ADO_AW_PARAM_TARGET; + const warnings: string[] = []; + const out = substitute( + "x: ${{ parameters.target }}", + ["target"], + (m) => warnings.push(m), + ); + expect(out).toBe("x: ${{ parameters.target }}"); + expect(warnings.length).toBe(1); + }); + + it("substitutes pipeline variables via dot-to-underscore convention", () => { + process.env.BUILD_BUILDID = "12345"; + const out = substitute("build: $(Build.BuildId)", [], () => {}); + expect(out).toBe("build: 12345"); + }); + + it("leaves unset pipeline variables verbatim with a warning", () => { + delete process.env.MISSING; + const warnings: string[] = []; + const out = substitute("v: $(Missing)", [], (m) => warnings.push(m)); + expect(out).toBe("v: $(Missing)"); + expect(warnings.length).toBe(1); + }); + + it("respects backslash-escaped $(...) literals", () => { + process.env.FOO = "should not be used"; + const out = substitute("literal: \\$(Foo)", [], () => {}); + expect(out).toBe("literal: $(Foo)"); + }); + + it("warns on $[ ... ] expressions and leaves them verbatim", () => { + const warnings: string[] = []; + const out = substitute( + "exp: $[ counter('foo', 0) ]", + [], + (m) => warnings.push(m), + ); + expect(out).toBe("exp: $[ counter('foo', 0) ]"); + expect(warnings.some((w) => w.includes("$[..."))).toBe(true); + }); + + it("substitutes parameters appearing multiple times", () => { + process.env.ADO_AW_PARAM_X = "Y"; + const src = "${{ parameters.x }} appears twice: ${{ parameters.x }}"; + expect(substitute(src, ["x"], () => {})).toBe("Y appears twice: Y"); + }); + + it("does not match $( without an identifier", () => { + const out = substitute("foo $() bar", [], () => {}); + expect(out).toBe("foo $() bar"); + }); + + it("does not match malformed parameter expressions", () => { + const warnings: string[] = []; + const out = substitute( + "x: ${{ parameters.123bad }}", + ["123bad"], + (m) => warnings.push(m), + ); + // Identifier must start with letter or underscore — regex won't match, + // so the literal is left intact and no warning emitted. + expect(out).toBe("x: ${{ parameters.123bad }}"); + expect(warnings.length).toBe(0); + }); +}); diff --git a/scripts/ado-script/src/prompt/frontmatter.ts b/scripts/ado-script/src/prompt/frontmatter.ts new file mode 100644 index 00000000..a026c5b0 --- /dev/null +++ b/scripts/ado-script/src/prompt/frontmatter.ts @@ -0,0 +1,37 @@ +/** + * Strip the leading YAML front-matter block from a markdown source. + * + * Front matter is defined as the region between an opening `---` line at + * the very start of the file (optionally preceded by a UTF-8 BOM) and the + * next standalone `---` line. Both LF and CRLF line endings are accepted. + * + * Returns the body unchanged when no front matter is present. Throws if a + * front matter opens but never closes. + */ +export function stripFrontMatter(source: string): string { + // Strip leading BOM if present so our line-prefix checks line up. + const text = source.replace(/^\uFEFF/, ""); + + // Front matter must start at byte 0 with `---` followed by EOL. + const opener = /^---(?:\r\n|\n)/; + const m = opener.exec(text); + if (!m) { + return source; + } + + const afterOpen = m[0].length; + + // Find the closing fence: `---` on its own line. + const closer = /(?:\r\n|\n)---(?=\r\n|\n|$)/; + const c = closer.exec(text.slice(afterOpen)); + if (!c) { + throw new Error("Unterminated YAML front matter (missing closing '---')"); + } + const closeIdx = afterOpen + c.index + c[0].length; + + // Skip the EOL immediately after the closing fence, if any. + let body = text.slice(closeIdx); + body = body.replace(/^(?:\r\n|\n)/, ""); + + return body; +} diff --git a/scripts/ado-script/src/prompt/index.ts b/scripts/ado-script/src/prompt/index.ts new file mode 100644 index 00000000..0bddfadc --- /dev/null +++ b/scripts/ado-script/src/prompt/index.ts @@ -0,0 +1,102 @@ +/** + * Prompt renderer entry point. + * + * Reads a base64-encoded `PromptSpec` from `ADO_AW_PROMPT_SPEC` env, + * loads the source markdown from the workspace, strips its YAML front + * matter, applies variable substitution, appends extension supplements, + * and writes the result to the configured output path. On any + * unrecoverable error, logs via VSO and exits non-zero. + */ +import * as fs from "node:fs"; +import * as path from "node:path"; +import type { PromptSpec } from "../shared/types-prompt.gen.js"; +import { stripFrontMatter } from "./frontmatter.js"; +import { substitute } from "./substitute.js"; +import { complete, logError, logWarning } from "../shared/vso-logger.js"; + +/** + * The `version` value this build of `prompt.js` understands. Must be + * kept in lockstep with `PROMPT_SPEC_VERSION` in + * `src/compile/prompt_ir.rs`. + */ +const SUPPORTED_VERSION = 1; + +function decodeSpec(raw: string): PromptSpec { + const json = Buffer.from(raw, "base64").toString("utf8"); + return JSON.parse(json) as PromptSpec; +} + +export async function main(): Promise { + const raw = process.env.ADO_AW_PROMPT_SPEC; + if (!raw) { + logError("ADO_AW_PROMPT_SPEC env var missing"); + complete("Failed"); + process.exit(1); + } + + let spec: PromptSpec; + try { + spec = decodeSpec(raw); + } catch (e) { + logError( + `Failed to decode ADO_AW_PROMPT_SPEC: ${(e as Error).message}`, + ); + complete("Failed"); + process.exit(1); + } + + if (spec.version !== SUPPORTED_VERSION) { + logError( + `Unsupported PromptSpec version ${spec.version}; this prompt.js supports version ${SUPPORTED_VERSION}. ` + + `Recompile your pipeline with a matching ado-aw release.`, + ); + complete("Failed"); + process.exit(1); + } + + if (!fs.existsSync(spec.source_path)) { + logError(`Source markdown not found: ${spec.source_path}`); + complete("Failed"); + process.exit(1); + } + + const source = fs.readFileSync(spec.source_path, "utf8"); + let body: string; + try { + body = stripFrontMatter(source); + } catch (e) { + logError( + `Failed to strip front matter from ${spec.source_path}: ${(e as Error).message}`, + ); + complete("Failed"); + process.exit(1); + } + + const parts: string[] = [body.trim()]; + for (const supp of spec.supplements) { + parts.push(supp.content.trim()); + } + let rendered = parts.filter((s) => s.length > 0).join("\n\n"); + + rendered = substitute(rendered, spec.parameters, (msg) => logWarning(msg)); + + if (rendered.trim().length === 0) { + logError( + `Rendered prompt is empty (source: ${spec.source_path}). ` + + `Front-matter-only files are not valid agents.`, + ); + complete("Failed"); + process.exit(1); + } + + fs.mkdirSync(path.dirname(spec.output_path), { recursive: true }); + fs.writeFileSync(spec.output_path, rendered); + + complete("Succeeded", `prompt rendered: ${spec.output_path}`); +} + +main().catch((e) => { + logError(`prompt renderer crashed: ${(e as Error).message}`); + complete("Failed"); + process.exit(1); +}); diff --git a/scripts/ado-script/src/prompt/substitute.ts b/scripts/ado-script/src/prompt/substitute.ts new file mode 100644 index 00000000..a4d5a886 --- /dev/null +++ b/scripts/ado-script/src/prompt/substitute.ts @@ -0,0 +1,87 @@ +/** + * Apply runtime variable substitution to rendered prompt content. + * + * Patterns recognised: + * + * - `${{ parameters.NAME }}` where NAME matches `[A-Za-z_][A-Za-z0-9_-]*` + * and NAME is in the `parameters` allow-list: + * resolved from env `ADO_AW_PARAM_`. + * Names not in the allow-list, or in the allow-list but with no env + * value, are left verbatim with a per-occurrence warning. + * + * - `$(VAR)` and `$(VAR.SUB)` where the captured name matches + * `[A-Za-z_][A-Za-z0-9_.]*`: resolved from env + * `` (ADO native convention). Unset + * variables are left verbatim with a warning. + * + * - `$[ ... ]`: left verbatim with one warning per render. + * + * - `\$(...)`: backslash-escaped form. The backslash is stripped and + * the literal `$(...)` is left untouched (no env lookup). + * + * Warnings are reported via the `warn` callback so the caller can route + * them to the VSO logger. + */ +export function substitute( + source: string, + parameters: string[], + warn: (msg: string) => void, +): string { + const allowed = new Set(parameters); + const env = process.env; + + // 1. $[ ... ] — leave verbatim, warn once if any present. + if (/\$\[[^\]]*\]/.test(source)) { + warn( + "Found $[...] runtime expressions in prompt; these are not substituted by prompt.js and will reach the agent verbatim.", + ); + } + + // 2. ${{ parameters.NAME }} + source = source.replace( + /\$\{\{\s*parameters\.([A-Za-z_][A-Za-z0-9_-]*)\s*\}\}/g, + (match, name: string) => { + if (!allowed.has(name)) { + warn( + `Unknown parameter '${name}' referenced in prompt; left verbatim.`, + ); + return match; + } + const envName = + "ADO_AW_PARAM_" + name.toUpperCase().replace(/-/g, "_"); + const v = env[envName]; + if (v === undefined) { + warn( + `Parameter '${name}' has no env mapping (${envName}); left verbatim.`, + ); + return match; + } + return v; + }, + ); + + // 3. $(VAR) — but skip backslash-escaped occurrences. + // Use a unique placeholder for escaped form; restore at the end. + const ESCAPE_TOKEN = "\u0000ADOAW_ESC_DOLLAR_PAREN\u0000"; + source = source.replace(/\\\$\(/g, ESCAPE_TOKEN); + + source = source.replace( + /\$\(([A-Za-z_][A-Za-z0-9_.]*)\)/g, + (match, name: string) => { + const envName = name.toUpperCase().replace(/\./g, "_"); + const v = env[envName]; + if (v === undefined) { + warn( + `Variable '${name}' has no env value (${envName}); left verbatim.`, + ); + return match; + } + return v; + }, + ); + + // Restore backslash-escaped `$(` to the literal form. + source = source.split(ESCAPE_TOKEN).join("$("); + + return source; +} diff --git a/scripts/ado-script/src/shared/types-prompt.gen.ts b/scripts/ado-script/src/shared/types-prompt.gen.ts new file mode 100644 index 00000000..66c5ac96 --- /dev/null +++ b/scripts/ado-script/src/shared/types-prompt.gen.ts @@ -0,0 +1,47 @@ +// AUTO-GENERATED from Rust IR via cargo run -- export-prompt-schema. Do not edit; run npm run codegen. + +/** + * Top-level spec consumed by `prompt.js` at pipeline runtime. + */ +export interface PromptSpec { + /** + * Absolute path where the rendered prompt should be written. + */ + output_path: string; + /** + * Declared parameter names available for `${{ parameters.NAME }}` + * substitution. Names not in this list are left verbatim by + * `prompt.js` with a runtime warning. + */ + parameters: string[]; + /** + * Absolute path to the source `.md` file in the workspace. + */ + source_path: string; + /** + * Extension prompt supplements, in render order + * (Runtimes phase first, then Tools, stable within each phase). + */ + supplements: PromptSupplement[]; + /** + * Schema version; refused on mismatch. + */ + version: number; + [k: string]: unknown; +} +/** + * One block of additional prompt content contributed by an extension. + */ +export interface PromptSupplement { + /** + * Markdown to append. May contain `${{ parameters.* }}` or `$(VAR)` + * references; substituted by `prompt.js` using the same rules as + * the body. + */ + content: string; + /** + * Extension display name (used for VSO logging only — not rendered). + */ + name: string; + [k: string]: unknown; +} diff --git a/scripts/ado-script/test/prompt-smoke.test.ts b/scripts/ado-script/test/prompt-smoke.test.ts new file mode 100644 index 00000000..6ac34be0 --- /dev/null +++ b/scripts/ado-script/test/prompt-smoke.test.ts @@ -0,0 +1,123 @@ +import { describe, expect, it } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { execFileSync } from "node:child_process"; +import { fileURLToPath } from "node:url"; + +/** + * End-to-end test that exercises the compiled `prompt.js` bundle by + * spawning a real Node process. Only runs when the bundle has been + * built (via `npm run build`); otherwise the test is a no-op so it + * doesn't fail in the unit-test profile. + */ +describe("prompt.js end-to-end (compiled bundle)", () => { + it("reads source, strips front matter, appends supplements, writes output", () => { + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const distPath = path.resolve(__dirname, "../dist/prompt/index.js"); + if (!fs.existsSync(distPath)) { + // Bundle not built; skip silently in non-smoke runs. + return; + } + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "prompt-it-")); + try { + const src = path.join(tmp, "agent.md"); + const out = path.join(tmp, "out.md"); + fs.writeFileSync( + src, + "---\nname: x\n---\nHello ${{ parameters.who }}!\n", + ); + const spec = { + version: 1, + source_path: src, + output_path: out, + supplements: [{ name: "Demo", content: "## Demo\nbody" }], + parameters: ["who"], + }; + const env: NodeJS.ProcessEnv = { + ...process.env, + ADO_AW_PROMPT_SPEC: Buffer.from(JSON.stringify(spec)).toString( + "base64", + ), + ADO_AW_PARAM_WHO: "world", + }; + execFileSync("node", [distPath], { env }); + const rendered = fs.readFileSync(out, "utf8"); + expect(rendered).toContain("Hello world!"); + expect(rendered).toContain("## Demo"); + expect(rendered).not.toContain("name: x"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("fails with non-zero exit when source is missing", () => { + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const distPath = path.resolve(__dirname, "../dist/prompt/index.js"); + if (!fs.existsSync(distPath)) { + return; + } + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "prompt-it-")); + try { + const out = path.join(tmp, "out.md"); + const spec = { + version: 1, + source_path: path.join(tmp, "missing.md"), + output_path: out, + supplements: [], + parameters: [], + }; + const env: NodeJS.ProcessEnv = { + ...process.env, + ADO_AW_PROMPT_SPEC: Buffer.from(JSON.stringify(spec)).toString( + "base64", + ), + }; + let threw = false; + try { + execFileSync("node", [distPath], { env, stdio: "pipe" }); + } catch { + threw = true; + } + expect(threw).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("fails on unknown spec version", () => { + const __dirname = path.dirname(fileURLToPath(import.meta.url)); + const distPath = path.resolve(__dirname, "../dist/prompt/index.js"); + if (!fs.existsSync(distPath)) { + return; + } + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "prompt-it-")); + try { + const src = path.join(tmp, "a.md"); + const out = path.join(tmp, "out.md"); + fs.writeFileSync(src, "---\nname: x\n---\nbody\n"); + const spec = { + version: 999, + source_path: src, + output_path: out, + supplements: [], + parameters: [], + }; + const env: NodeJS.ProcessEnv = { + ...process.env, + ADO_AW_PROMPT_SPEC: Buffer.from(JSON.stringify(spec)).toString( + "base64", + ), + }; + let threw = false; + try { + execFileSync("node", [distPath], { env, stdio: "pipe" }); + } catch { + threw = true; + } + expect(threw).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/src/compile/common.rs b/src/compile/common.rs index 5e018087..48b8f086 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1,15 +1,19 @@ //! Common helper functions shared across all compile targets. use anyhow::{Context, Result}; +use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}; use std::collections::{HashMap, HashSet}; use std::path::Path; +use super::extensions::{ + CompileContext, CompilerExtension, Extension, McpgConfig, McpgGatewayConfig, McpgServerConfig, +}; +use super::prompt_ir::{PROMPT_SPEC_VERSION, PromptSpec, PromptSupplement}; use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository}; -use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; -use crate::compile::types::McpConfig; -use crate::fuzzy_schedule; use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; +use crate::compile::types::McpConfig; use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; +use crate::fuzzy_schedule; use crate::validate; /// Parse the markdown file and extract front matter and body @@ -1311,9 +1315,17 @@ pub fn generate_teardown_job( } /// Generate prepare steps (inline), including extension steps and user-defined steps. +/// +/// When `inlined_imports` is true, extension prompt supplements are emitted as +/// per-extension `cat >>` heredoc steps via `wrap_prompt_append`, mirroring +/// the legacy embedded-prompt path. When false (the default), supplements are +/// instead delivered to `prompt.js` via the `PromptSpec` env contract emitted +/// by [`generate_prepare_agent_prompt`], so this function skips the +/// `wrap_prompt_append` calls. pub fn generate_prepare_steps( prepare_steps: &[serde_yaml::Value], extensions: &[super::extensions::Extension], + inlined_imports: bool, ) -> Result { let mut parts = Vec::new(); @@ -1322,7 +1334,9 @@ pub fn generate_prepare_steps( for step in ext.prepare_steps() { parts.push(step); } - if let Some(prompt) = ext.prompt_supplement() { + if inlined_imports + && let Some(prompt) = ext.prompt_supplement() + { parts.push(super::extensions::wrap_prompt_append(&prompt, ext.name())?); } } @@ -1334,6 +1348,125 @@ pub fn generate_prepare_steps( Ok(parts.join("\n\n")) } +/// Collect prompt supplements from extensions, preserving the same order +/// `generate_prepare_steps` would emit `wrap_prompt_append` calls in +/// (Runtimes phase first, then Tools, stable within each phase). +/// +/// Used when `inlined-imports: false` to populate +/// [`PromptSpec::supplements`] for the runtime renderer. +pub fn collect_prompt_supplements(extensions: &[Extension]) -> Vec { + let mut out = Vec::new(); + for ext in extensions { + if let Some(content) = ext.prompt_supplement() { + out.push(PromptSupplement { + name: ext.name().to_string(), + content, + }); + } + } + out +} + +/// Emit the YAML step(s) that prepare `/tmp/awf-tools/agent-prompt.md` +/// for the agent. +/// +/// When `inlined_imports` is true, embeds the prompt body in a heredoc +/// step (legacy behaviour). When false (default), emits a three-step +/// bundle: NodeTool@0 + scripts.zip download + `node prompt.js` invocation +/// with the [`PromptSpec`] passed via env. +pub fn generate_prepare_agent_prompt( + inlined_imports: bool, + markdown_body: &str, + source_path: &str, + supplements: Vec, + parameters: &[PipelineParameter], +) -> Result { + if inlined_imports { + // Re-indent body lines by 4 spaces to align under the heredoc. + let body_indented = markdown_body + .lines() + .map(|l| { + if l.is_empty() { + String::new() + } else { + format!(" {l}") + } + }) + .collect::>() + .join("\n"); + let trimmed = body_indented.trim_start_matches(' '); + return Ok(format!( + r#"- bash: | + # Write agent instructions to /tmp so it's accessible inside AWF container + cat > "/tmp/awf-tools/agent-prompt.md" << 'AGENT_PROMPT_EOF' + {trimmed} + AGENT_PROMPT_EOF + + echo "Agent prompt:" + cat "/tmp/awf-tools/agent-prompt.md" + displayName: "Prepare agent prompt""#, + )); + } + + // Runtime branch — verify the source path will resolve at runtime, + // build a PromptSpec, base64-encode, and emit the three-step bundle. + // The caller (compile_shared) resolves {{ trigger_repo_directory }} + // before passing source_path here, so a workspace-relative path always + // starts with $(Build.SourcesDirectory). A filename-only fallback (which + // happens when the input path is absolute and not normalisable to the + // repo) does not start with that prefix and is rejected. + if !source_path.starts_with("$(Build.SourcesDirectory)") { + anyhow::bail!( + "Cannot determine workspace-relative path for the agent .md \ + ({source_path}). The pipeline cannot read the source at runtime. \ + Either compile the agent from inside the repository or set \ + `inlined-imports: true` in the front matter to embed the prompt \ + at compile time." + ); + } + + let spec = PromptSpec { + version: PROMPT_SPEC_VERSION, + source_path: source_path.to_string(), + output_path: "/tmp/awf-tools/agent-prompt.md".to_string(), + supplements, + parameters: parameters.iter().map(|p| p.name.clone()).collect(), + }; + let json = serde_json::to_string(&spec).context("serializing PromptSpec to JSON")?; + let b64 = BASE64.encode(json.as_bytes()); + + let mut env_lines = Vec::new(); + env_lines.push(format!(" ADO_AW_PROMPT_SPEC: \"{b64}\"")); + for p in parameters { + let upper = p.name.to_uppercase().replace('-', "_"); + env_lines.push(format!( + " ADO_AW_PARAM_{upper}: ${{{{ parameters.{name} }}}}", + name = p.name, + )); + } + let env_block = env_lines.join("\n"); + + let node_step = super::extensions::node_tool_step( + "Install Node.js 20.x for prompt renderer", + ); + let download_step = super::extensions::scripts_download_step(); + + Ok(format!( + r#"{node_step} + +{download_step} + +- bash: | + set -euo pipefail + node /tmp/ado-aw-scripts/prompt.js + echo "Agent prompt:" + cat "/tmp/awf-tools/agent-prompt.md" + displayName: "Render agent prompt" + env: +{env_block}"#, + )) +} + /// Generate finalize steps (inline) pub fn generate_finalize_steps(finalize_steps: &[serde_yaml::Value]) -> String { if finalize_steps.is_empty() { @@ -2040,7 +2173,8 @@ pub async fn compile_shared( .is_some_and(|cm| cm.is_enabled()); let parameters = build_parameters(&front_matter.parameters, has_memory); let parameters_yaml = generate_parameters(¶meters)?; - let prepare_steps = generate_prepare_steps(&front_matter.steps, extensions)?; + let prepare_steps = + generate_prepare_steps(&front_matter.steps, extensions, front_matter.inlined_imports)?; let finalize_steps = generate_finalize_steps(&front_matter.post_steps); let pr_expression = pr_filters.and_then(|f| f.expression.as_deref()); let pipeline_expression = pipeline_filters.and_then(|f| f.expression.as_deref()); @@ -2154,6 +2288,22 @@ pub async fn compile_shared( // 14. Shared replacements let compiler_version = env!("CARGO_PKG_VERSION"); let integrity_check = generate_integrity_check(config.skip_integrity); + let prompt_supplements = collect_prompt_supplements(extensions); + // Resolve {{ trigger_repo_directory }} inside source_path before building + // the PromptSpec, because the spec is base64-encoded into a single env + // value and the outer template-substitution pass cannot reach inside the + // base64 blob. + let resolved_source_path = source_path.replace( + "{{ trigger_repo_directory }}", + &trigger_repo_directory, + ); + let prepare_agent_prompt = generate_prepare_agent_prompt( + front_matter.inlined_imports, + markdown_body, + &resolved_source_path, + prompt_supplements, + &front_matter.parameters, + )?; let replacements: Vec<(&str, &str)> = vec![ ("{{ parameters }}", ¶meters_yaml), ("{{ compiler_version }}", compiler_version), @@ -2187,7 +2337,11 @@ pub async fn compile_shared( ("{{ trigger_repo_directory }}", &trigger_repo_directory), ("{{ working_directory }}", &working_directory), ("{{ workspace }}", &working_directory), - ("{{ agent_content }}", markdown_body), + // {{ prepare_agent_prompt }} expands to either the legacy heredoc + // step (when inlined-imports: true) or a NodeTool@0 + scripts.zip + // download + node prompt.js bundle (default). It replaces the + // earlier {{ agent_content }} placeholder. + ("{{ prepare_agent_prompt }}", &prepare_agent_prompt), ("{{ acquire_ado_token }}", &acquire_read_token), ("{{ engine_env }}", &engine_env), ("{{ engine_log_dir }}", engine_log_dir), @@ -4082,7 +4236,7 @@ mod tests { "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); assert!( !result.is_empty(), "memory steps must be emitted when cache-memory enabled" @@ -4097,7 +4251,7 @@ mod tests { fn test_generate_prepare_steps_without_memory_and_no_steps_has_safeoutputs_prompt() { let fm = minimal_front_matter(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); // SafeOutputs always contributes a prompt supplement assert!( result.contains("Safe Outputs"), @@ -4111,7 +4265,7 @@ mod tests { "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); assert!( result.contains("DownloadPipelineArtifact"), "memory steps must include the artifact download task" @@ -4128,7 +4282,7 @@ mod tests { let exts = crate::compile::extensions::collect_extensions(&fm); let step: serde_yaml::Value = serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); + let result = generate_prepare_steps(&[step], &exts, true).unwrap(); assert!(!result.is_empty(), "user steps should be present"); assert!( !result.contains("agent_memory"), @@ -4144,7 +4298,7 @@ mod tests { let exts = crate::compile::extensions::collect_extensions(&fm); let step: serde_yaml::Value = serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); + let result = generate_prepare_steps(&[step], &exts, true).unwrap(); assert!( result.contains("agent_memory"), "memory reference must be present" @@ -4161,7 +4315,7 @@ mod tests { "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); assert!(result.contains("elan-init.sh"), "should include elan installer"); assert!(result.contains("Lean 4"), "should include Lean prompt"); assert!(result.contains("--default-toolchain stable"), "should default to stable"); @@ -4174,7 +4328,7 @@ mod tests { "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); assert!( result.contains("--default-toolchain leanprover/lean4:v4.29.1"), "should use specified toolchain" @@ -4187,12 +4341,202 @@ mod tests { "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let result = generate_prepare_steps(&[], &exts, true).unwrap(); assert!(result.contains("agent_memory"), "memory steps present"); assert!(result.contains("elan-init.sh"), "lean install present"); assert!(result.contains("Lean 4"), "lean prompt present"); } + #[test] + fn test_generate_prepare_steps_skips_supplements_when_runtime_branch() { + // When inlined-imports is false (default), prompt_supplement() content + // travels via PromptSpec/env, NOT via wrap_prompt_append `cat >>` steps. + let fm = minimal_front_matter(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts, false).unwrap(); + // SafeOutputs supplement should NOT appear in prepare steps when inlined=false. + assert!( + !result.contains("Safe Outputs"), + "supplements must not be emitted as cat >> steps in runtime branch; got:\n{result}" + ); + } + + // ─── generate_prepare_agent_prompt ─────────────────────────────────── + + #[test] + fn test_runtime_branch_emits_prompt_js_invocation() { + let yaml = generate_prepare_agent_prompt( + false, + "Hello world", + "$(Build.SourcesDirectory)/agents/x.md", + vec![], + &[], + ) + .unwrap(); + assert!( + yaml.contains("node /tmp/ado-aw-scripts/prompt.js"), + "runtime branch must invoke prompt.js: {yaml}" + ); + assert!( + yaml.contains("ADO_AW_PROMPT_SPEC:"), + "runtime branch must emit ADO_AW_PROMPT_SPEC env: {yaml}" + ); + // Body must NOT appear verbatim in the runtime branch output. + assert!( + !yaml.contains("Hello world"), + "runtime branch must not embed body verbatim: {yaml}" + ); + } + + #[test] + fn test_inlined_branch_embeds_body_in_heredoc() { + let yaml = generate_prepare_agent_prompt( + true, + "Hello world", + "agents/x.md", + vec![], + &[], + ) + .unwrap(); + assert!( + yaml.contains("AGENT_PROMPT_EOF"), + "inlined branch must use heredoc: {yaml}" + ); + assert!( + yaml.contains("Hello world"), + "inlined branch must embed body verbatim: {yaml}" + ); + assert!( + !yaml.contains("prompt.js"), + "inlined branch must not invoke prompt.js: {yaml}" + ); + } + + #[test] + fn test_runtime_branch_emits_param_env_mappings() { + let params = vec![ + crate::compile::types::PipelineParameter { + name: "target_repo".into(), + display_name: None, + param_type: None, + default: None, + values: None, + }, + crate::compile::types::PipelineParameter { + name: "dry-run".into(), + display_name: None, + param_type: None, + default: None, + values: None, + }, + ]; + let yaml = generate_prepare_agent_prompt( + false, + "body", + "$(Build.SourcesDirectory)/x.md", + vec![], + ¶ms, + ) + .unwrap(); + assert!( + yaml.contains("ADO_AW_PARAM_TARGET_REPO: ${{ parameters.target_repo }}"), + "param env must be uppercased: {yaml}" + ); + assert!( + yaml.contains("ADO_AW_PARAM_DRY_RUN: ${{ parameters.dry-run }}"), + "hyphen must convert to underscore in env name but stay in the param ref: {yaml}" + ); + } + + #[test] + fn test_runtime_branch_rejects_filename_only_source() { + // Plain filename (no $(Build.SourcesDirectory) prefix) means the + // pipeline cannot locate the .md at runtime; bail with a helpful + // error pointing at the inlined-imports escape hatch. + let result = generate_prepare_agent_prompt( + false, + "body", + "agent.md", + vec![], + &[], + ); + assert!(result.is_err()); + let msg = format!("{:#}", result.unwrap_err()); + assert!( + msg.contains("inlined-imports: true"), + "error must hint at the workaround: {msg}" + ); + } + + #[test] + fn test_runtime_branch_accepts_workspace_relative_source() { + let result = generate_prepare_agent_prompt( + false, + "body", + "$(Build.SourcesDirectory)/agents/foo.md", + vec![], + &[], + ); + assert!(result.is_ok(), "valid source path should succeed: {:?}", result); + } + + #[test] + fn test_inlined_branch_accepts_any_source() { + // The legacy heredoc path embeds the body directly and never reads + // the .md at runtime, so any source_path is acceptable. + let result = generate_prepare_agent_prompt( + true, + "body", + "agent.md", + vec![], + &[], + ); + assert!(result.is_ok()); + } + + #[test] + fn test_runtime_branch_includes_supplements_in_spec() { + use crate::compile::prompt_ir::PromptSupplement; + let supplements = vec![ + PromptSupplement { + name: "Foo".into(), + content: "## Foo content".into(), + }, + ]; + let yaml = generate_prepare_agent_prompt( + false, + "body", + "$(Build.SourcesDirectory)/x.md", + supplements, + &[], + ) + .unwrap(); + // The supplement content is base64-encoded inside ADO_AW_PROMPT_SPEC, + // so it shouldn't appear in plain text in the YAML. + assert!(!yaml.contains("Foo content")); + // But the spec env line should be present. + assert!(yaml.contains("ADO_AW_PROMPT_SPEC:")); + } + + #[test] + fn test_collect_prompt_supplements_preserves_extension_order() { + // Combining lean + cache-memory + (always-on safeoutputs) exercises + // ordering across phases (Runtime → Tool) and within each phase. + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let supplements = collect_prompt_supplements(&exts); + // We expect at least Lean (Runtime), then any Tool-phase supplements + // (cache-memory, SafeOutputs). Lean must come first. + assert!(!supplements.is_empty()); + assert_eq!( + supplements[0].name, "Lean 4", + "Runtime-phase Lean supplement must come first; got order: {:?}", + supplements.iter().map(|s| &s.name).collect::>() + ); + } + // ─── generate_awf_mounts ────────────────────────────────────────────── #[test] diff --git a/src/compile/extensions/mod.rs b/src/compile/extensions/mod.rs index 30f1646a..e3d2a281 100644 --- a/src/compile/extensions/mod.rs +++ b/src/compile/extensions/mod.rs @@ -689,5 +689,39 @@ pub fn wrap_prompt_append(content: &str, display_name: &str) -> Result { )) } +/// Base URL for ado-aw release artifacts (used by `scripts_download_step`). +const SCRIPTS_RELEASE_BASE_URL: &str = "https://github.com/githubnext/ado-aw/releases/download"; + +/// `NodeTool@0` step that installs Node 20.x. Required by any +/// `ado-script` bundle (currently `gate.js` and `prompt.js`). Pin to LTS +/// major; ado-aw only requires basic Node features, so any 20.x patch +/// release is acceptable. NodeTool@0 is preinstalled on +/// Microsoft-hosted and 1ES images and idempotent across multiple +/// invocations in the same job, so emitting it more than once per job +/// is safe. +pub fn node_tool_step(display_name: &str) -> String { + format!( + "- task: NodeTool@0\n inputs:\n versionSpec: \"20.x\"\n displayName: \"{display_name}\"\n condition: succeeded()" + ) +} + +/// Bash step that downloads `scripts.zip` (the bundled ado-script +/// artefacts) for the running ado-aw release into +/// `/tmp/ado-aw-scripts/` and unzips it. Used by any extension or +/// step that needs runtime-bundled scripts (currently `gate.js` and +/// `prompt.js`). Each pipeline job that needs scripts emits its own +/// download because jobs run on independent pool agents. +pub fn scripts_download_step() -> String { + let version = env!("CARGO_PKG_VERSION"); + format!( + r#"- bash: | + mkdir -p /tmp/ado-aw-scripts + curl -fsSL "{SCRIPTS_RELEASE_BASE_URL}/v{version}/scripts.zip" -o /tmp/ado-aw-scripts/scripts.zip + cd /tmp/ado-aw-scripts && unzip -o scripts.zip + displayName: "Download ado-aw scripts (v{version})" + condition: succeeded()"#, + ) +} + #[cfg(test)] mod tests; diff --git a/src/compile/extensions/trigger_filters.rs b/src/compile/extensions/trigger_filters.rs index 268ccfd4..e6e4eb9c 100644 --- a/src/compile/extensions/trigger_filters.rs +++ b/src/compile/extensions/trigger_filters.rs @@ -20,9 +20,6 @@ use crate::compile::types::{PipelineFilters, PrFilters}; /// The path where the gate evaluator is downloaded at pipeline runtime. const GATE_EVAL_PATH: &str = "/tmp/ado-aw-scripts/gate.js"; -/// Base URL for ado-aw release artifacts. -const RELEASE_BASE_URL: &str = "https://github.com/githubnext/ado-aw/releases/download"; - /// Compiler extension that delivers and runs the gate evaluator for /// complex trigger filters. pub struct TriggerFiltersExtension { @@ -63,7 +60,6 @@ impl CompilerExtension for TriggerFiltersExtension { } fn setup_steps(&self, _ctx: &CompileContext) -> Result> { - let version = env!("CARGO_PKG_VERSION"); let mut gate_steps = Vec::new(); // PR gate step @@ -97,22 +93,15 @@ impl CompilerExtension for TriggerFiltersExtension { let mut steps = Vec::new(); - // Install Node 20.x for the gate evaluator. Pin to LTS major; ado-aw - // only requires basic Node features, so any 20.x patch release is - // acceptable. NodeTool@0 is preinstalled on Microsoft-hosted and 1ES - // images. - steps.push( - "- task: NodeTool@0\n inputs:\n versionSpec: \"20.x\"\n displayName: \"Install Node.js 20.x for gate evaluator\"\n condition: succeeded()".to_string(), - ); - - steps.push(format!( - r#"- bash: | - mkdir -p /tmp/ado-aw-scripts - curl -fsSL "{RELEASE_BASE_URL}/v{version}/scripts.zip" -o /tmp/ado-aw-scripts/scripts.zip - cd /tmp/ado-aw-scripts && unzip -o scripts.zip - displayName: "Download ado-aw scripts (v{version})" - condition: succeeded()"#, + // Install Node 20.x for the gate evaluator and download + // scripts.zip (which carries gate.js plus any other bundled + // ado-script artefacts). Helpers live on the parent module so + // other use sites (e.g. the Agent-job prompt renderer) stay + // in lockstep on URL/version. + steps.push(super::node_tool_step( + "Install Node.js 20.x for gate evaluator", )); + steps.push(super::scripts_download_step()); steps.extend(gate_steps); Ok(steps) diff --git a/src/compile/mod.rs b/src/compile/mod.rs index a8fc2049..55546ae9 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod filter_ir; mod gitattributes; mod onees; pub(crate) mod pr_filters; +pub(crate) mod prompt_ir; mod standalone; pub mod types; diff --git a/src/compile/prompt_ir.rs b/src/compile/prompt_ir.rs new file mode 100644 index 00000000..55e9743e --- /dev/null +++ b/src/compile/prompt_ir.rs @@ -0,0 +1,102 @@ +//! Serializable specification for the runtime prompt renderer +//! (`scripts/prompt.js`). Mirrors the design of `filter_ir::GateSpec`. +//! +//! The compiler builds a [`PromptSpec`] at compile time, JSON-serializes it, +//! base64-encodes the result, and emits it as `ADO_AW_PROMPT_SPEC` env on the +//! prompt.js step. At pipeline runtime, prompt.js decodes the spec, validates +//! the version, reads the source markdown from the workspace, strips its +//! front matter, applies variable substitution, appends supplements, and +//! writes the rendered prompt to `output_path`. + +use schemars::JsonSchema; +use serde::Serialize; + +/// Pinned schema version. Bump only on breaking changes to [`PromptSpec`]. +/// `prompt.js` refuses to run on an unknown version. +pub const PROMPT_SPEC_VERSION: u32 = 1; + +/// Top-level spec consumed by `prompt.js` at pipeline runtime. +#[derive(Debug, Clone, Serialize, JsonSchema)] +pub struct PromptSpec { + /// Schema version; refused on mismatch. + pub version: u32, + /// Absolute path to the source `.md` file in the workspace. + pub source_path: String, + /// Absolute path where the rendered prompt should be written. + pub output_path: String, + /// Extension prompt supplements, in render order + /// (Runtimes phase first, then Tools, stable within each phase). + pub supplements: Vec, + /// Declared parameter names available for `${{ parameters.NAME }}` + /// substitution. Names not in this list are left verbatim by + /// `prompt.js` with a runtime warning. + pub parameters: Vec, +} + +/// One block of additional prompt content contributed by an extension. +#[derive(Debug, Clone, Serialize, JsonSchema)] +pub struct PromptSupplement { + /// Extension display name (used for VSO logging only — not rendered). + pub name: String, + /// Markdown to append. May contain `${{ parameters.* }}` or `$(VAR)` + /// references; substituted by `prompt.js` using the same rules as + /// the body. + pub content: String, +} + +/// Generate the JSON Schema for [`PromptSpec`] (consumed by the +/// TS workspace's codegen step). +pub fn generate_prompt_spec_schema() -> String { + let schema = schemars::schema_for!(PromptSpec); + serde_json::to_string_pretty(&schema) + .expect("PromptSpec schema must serialize") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn schema_is_valid_json() { + let s = generate_prompt_spec_schema(); + let _: serde_json::Value = + serde_json::from_str(&s).expect("schema must be valid JSON"); + } + + #[test] + fn version_is_pinned() { + assert_eq!(PROMPT_SPEC_VERSION, 1); + } + + #[test] + fn schema_contains_expected_top_level_fields() { + let s = generate_prompt_spec_schema(); + // Sanity check that key field names appear in the generated schema + // (json2ts will rely on these to produce TS types). + assert!(s.contains("\"version\"")); + assert!(s.contains("\"source_path\"")); + assert!(s.contains("\"output_path\"")); + assert!(s.contains("\"supplements\"")); + assert!(s.contains("\"parameters\"")); + } + + #[test] + fn spec_serializes_to_expected_json_keys() { + let spec = PromptSpec { + version: PROMPT_SPEC_VERSION, + source_path: "/tmp/x.md".into(), + output_path: "/tmp/y.md".into(), + supplements: vec![PromptSupplement { + name: "Demo".into(), + content: "demo".into(), + }], + parameters: vec!["foo".into()], + }; + let json = serde_json::to_string(&spec).unwrap(); + assert!(json.contains("\"version\":1")); + assert!(json.contains("\"source_path\":\"/tmp/x.md\"")); + assert!(json.contains("\"output_path\":\"/tmp/y.md\"")); + assert!(json.contains("\"supplements\"")); + assert!(json.contains("\"parameters\":[\"foo\"]")); + } +} diff --git a/src/compile/types.rs b/src/compile/types.rs index 1f9d799d..e07124a0 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -614,6 +614,18 @@ pub struct FrontMatter { /// Runtime parameters for the pipeline (surfaced in ADO UI when queuing a run) #[serde(default)] pub parameters: Vec, + /// When true, embed the prompt body and extension supplements directly + /// into the compiled YAML at compile time instead of letting prompt.js + /// assemble them at runtime. Mirrors gh-aw's `inlined-imports` field. + /// + /// Default behaviour (false) renders the prompt at pipeline runtime by + /// reading the source `.md` from the workspace, which means body-only + /// edits to the markdown no longer require recompiling. Set this to + /// `true` to opt out — for example, when the Agent pool can't reach + /// github.com to download `scripts.zip`, when you need a self-contained + /// pipeline file, or when debugging prompt rendering. + #[serde(rename = "inlined-imports", default)] + pub inlined_imports: bool, } impl FrontMatter { @@ -1355,6 +1367,49 @@ Body assert!(fm.permissions.is_none()); } + // ─── inlined-imports field ────────────────────────────────────────── + + #[test] + fn test_inlined_imports_default_false() { + let content = r#"--- +name: "Test" +description: "Test" +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + assert!(!fm.inlined_imports); + } + + #[test] + fn test_inlined_imports_true() { + let content = r#"--- +name: "Test" +description: "Test" +inlined-imports: true +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + assert!(fm.inlined_imports); + } + + #[test] + fn test_inlined_imports_false_explicit() { + let content = r#"--- +name: "Test" +description: "Test" +inlined-imports: false +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + assert!(!fm.inlined_imports); + } + // ─── CacheMemoryToolConfig deserialization ────────────────────────────── #[test] diff --git a/src/data/1es-base.yml b/src/data/1es-base.yml index 7b0af5ca..4f498116 100644 --- a/src/data/1es-base.yml +++ b/src/data/1es-base.yml @@ -127,15 +127,7 @@ extends: cp "$(Agent.TempDirectory)/staging/mcpg-config.json" /tmp/awf-tools/staging/mcpg-config.json displayName: "Prepare tooling" - - bash: | - # Write agent instructions to /tmp so it's accessible inside AWF container - cat > "/tmp/awf-tools/agent-prompt.md" << 'AGENT_PROMPT_EOF' - {{ agent_content }} - AGENT_PROMPT_EOF - - echo "Agent prompt:" - cat "/tmp/awf-tools/agent-prompt.md" - displayName: "Prepare agent prompt" + {{ prepare_agent_prompt }} - task: DockerInstaller@0 displayName: "Install Docker" diff --git a/src/data/base.yml b/src/data/base.yml index b3437792..9d76e146 100644 --- a/src/data/base.yml +++ b/src/data/base.yml @@ -98,15 +98,7 @@ jobs: cp "$(Agent.TempDirectory)/staging/mcpg-config.json" /tmp/awf-tools/staging/mcpg-config.json displayName: "Prepare tooling" - - bash: | - # Write agent instructions to /tmp so it's accessible inside AWF container - cat > "/tmp/awf-tools/agent-prompt.md" << 'AGENT_PROMPT_EOF' - {{ agent_content }} - AGENT_PROMPT_EOF - - echo "Agent prompt:" - cat "/tmp/awf-tools/agent-prompt.md" - displayName: "Prepare agent prompt" + {{ prepare_agent_prompt }} - task: DockerInstaller@0 displayName: "Install Docker" diff --git a/src/main.rs b/src/main.rs index 44cfd952..4367e699 100644 --- a/src/main.rs +++ b/src/main.rs @@ -140,6 +140,14 @@ enum Commands { #[arg(short, long)] output: Option, }, + /// Export the prompt spec JSON Schema (build-time tool for the + /// scripts/ado-script TypeScript workspace). + #[command(hide = true)] + ExportPromptSchema { + /// Output path; if omitted, prints to stdout. + #[arg(short, long)] + output: Option, + }, } #[derive(Parser, Debug)] @@ -343,6 +351,7 @@ async fn main() -> Result<()> { Some(Commands::Init { .. }) => "init", Some(Commands::Configure { .. }) => "configure", Some(Commands::ExportGateSchema { .. }) => "export-gate-schema", + Some(Commands::ExportPromptSchema { .. }) => "export-prompt-schema", None => "ado-aw", }; @@ -462,6 +471,21 @@ async fn main() -> Result<()> { None => print!("{}", schema), } } + Commands::ExportPromptSchema { output } => { + let schema = compile::prompt_ir::generate_prompt_spec_schema(); + match output { + Some(path) => { + if let Some(parent) = path + .parent() + .filter(|parent| !parent.as_os_str().is_empty()) + { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&path, &schema)?; + } + None => print!("{}", schema), + } + } } Ok(()) }