diff --git a/Cargo.lock b/Cargo.lock index 5620727c..d5ddb719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "sha2", "similar", "subtle", "tempfile", @@ -204,6 +205,15 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bstr" version = "1.12.1" @@ -313,6 +323,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "convert_case" version = "0.10.0" @@ -374,6 +390,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "darling" version = "0.21.3" @@ -431,6 +456,17 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", +] + [[package]] name = "dirs" version = "6.0.0" @@ -800,6 +836,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hybrid-array" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" version = "1.8.1" @@ -1853,6 +1898,17 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2212,6 +2268,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unicode-ident" version = "1.0.22" diff --git a/Cargo.toml b/Cargo.toml index 9d12cae9..a01eca6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ rand = "0.10.1" base64 = "0.22.1" glob-match = "0.2.1" similar = "3.1.0" +sha2 = "0.11.0" [dev-dependencies] reqwest = { version = "0.12", features = ["blocking"] } diff --git a/docs/safe-outputs.md b/docs/safe-outputs.md index 4ba50349..d8dfb6c8 100644 --- a/docs/safe-outputs.md +++ b/docs/safe-outputs.md @@ -418,6 +418,44 @@ safe-outputs: max: 1 # Maximum per run (default: 1) ``` +### upload-build-artifact +Attaches a workspace file to an Azure DevOps build as a build attachment via the +ADO build attachments REST API +(`PUT /_apis/build/builds/{buildId}/attachments/{type}/{name}`). + +**Omit `build_id` to target the current pipeline run** — the executor resolves +the build ID from the `BUILD_BUILDID` environment variable automatically. When +`build_id` is provided, the file is attached to that specific build — useful for +posthumously decorating a finished build with a generated report, screenshot, or +log bundle. + +The tool stages the file during Stage 1 (MCP) by copying it into the +safe-outputs directory; Stage 3 reads the staged copy and uploads it via the REST +API. + +**Agent parameters:** +- `build_id` *(optional)* - Target build ID. Omit to attach to the current pipeline run. Must be positive when specified. +- `artifact_name` - Artifact name (1–100 chars, alphanumeric / `-` / `_` / `.`, no leading `.`) +- `file_path` - Relative path to the file in the workspace (no directory traversal) + +**Configuration options (front matter):** +```yaml +safe-outputs: + upload-build-artifact: + max-file-size: 52428800 # Maximum file size in bytes (default: 50 MB) + allowed-extensions: [] # Optional — restrict file types (e.g., [".png", ".pdf", ".log"]) + allowed-artifact-names: [] # Optional — restrict names (suffix `*` = prefix match) + allowed-build-ids: [] # Optional — restrict target builds (skipped when targeting current build) + name-prefix: "" # Optional — prepended to the agent-supplied artifact name + attachment-type: "agent-artifact" # Optional — {type} segment in the attachments URL (default: "agent-artifact") + max: 3 # Maximum per run (default: 3) +``` + +**Notes:** +- Single-file only; directory uploads are not supported. +- When `build_id` is omitted and `allowed-build-ids` is configured, the allow-list check is skipped — the current build is implicitly trusted. +- The default `attachment-type` is `agent-artifact` so executor contributions are visually distinguishable from the build's own artifacts. + ### cache-memory (moved to `tools:`) Memory is now configured as a first-class tool under `tools: cache-memory:` instead of `safe-outputs: memory:`. See the [Cache Memory section](./tools.md#cache-memory-cache-memory) in `docs/tools.md` for details. diff --git a/src/execute.rs b/src/execute.rs index b8f9b46f..51db139a 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -17,7 +17,8 @@ use crate::safeoutputs::{ ExecutionContext, ExecutionResult, Executor, LinkWorkItemsResult, MissingDataResult, MissingToolResult, NoopResult, QueueBuildResult, ReplyToPrCommentResult, ReportIncompleteResult, ResolvePrThreadResult, SubmitPrReviewResult, ToolResult, - UpdatePrResult, UpdateWikiPageResult, UpdateWorkItemResult, UploadWorkitemAttachmentResult, + UpdatePrResult, UpdateWikiPageResult, UpdateWorkItemResult, UploadBuildArtifactResult, + UploadWorkitemAttachmentResult, }; // Re-export memory types for use by main.rs @@ -91,6 +92,7 @@ pub async fn execute_safe_outputs( AddBuildTagResult, CreateBranchResult, UpdatePrResult, + UploadBuildArtifactResult, UploadWorkitemAttachmentResult, SubmitPrReviewResult, ReplyToPrCommentResult, @@ -273,6 +275,7 @@ pub async fn execute_safe_output( "add-build-tag" => AddBuildTagResult, "create-branch" => CreateBranchResult, "update-pr" => UpdatePrResult, + "upload-build-artifact" => UploadBuildArtifactResult, "upload-workitem-attachment" => UploadWorkitemAttachmentResult, "submit-pr-review" => SubmitPrReviewResult, "reply-to-pr-review-comment" => ReplyToPrCommentResult, diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 00000000..0a7780dd --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,34 @@ +//! Cryptographic hash utilities shared across the crate. +//! +//! Used by safe-output tools to record and verify file integrity between +//! Stage 1 (MCP, in-sandbox) and Stage 3 (executor, outside sandbox). + +use sha2::{Digest, Sha256}; + +/// Compute the SHA-256 hex digest of a byte slice. +pub(crate) fn sha256_hex(data: &[u8]) -> String { + let hash = Sha256::digest(data); + hash.iter().map(|b| format!("{:02x}", b)).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sha256_empty() { + // SHA-256 of empty input is well-known. + assert_eq!( + sha256_hex(b""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ); + } + + #[test] + fn test_sha256_hello() { + assert_eq!( + sha256_hex(b"hello"), + "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + ); + } +} diff --git a/src/main.rs b/src/main.rs index c5a927d0..a3890389 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ mod ecosystem_domains; mod engine; mod execute; mod fuzzy_schedule; +mod hash; mod init; mod logging; mod mcp; diff --git a/src/mcp.rs b/src/mcp.rs index 5eaf12f8..0b796584 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -27,6 +27,7 @@ use crate::safeoutputs::{ QueueBuildResult, SubmitPrReviewParams, SubmitPrReviewResult, ToolResult, UpdatePrParams, UpdatePrResult, UpdateWorkItemParams, UpdateWorkItemResult, + UploadBuildArtifactParams, UploadBuildArtifactResult, DEFAULT_MAX_FILE_SIZE, UploadWorkitemAttachmentParams, UploadWorkitemAttachmentResult, anyhow_to_mcp_error, }; @@ -690,6 +691,10 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che anyhow_to_mcp_error(anyhow::anyhow!("Failed to write patch file: {}", e)) })?; + // Compute SHA-256 of the patch for cross-stage integrity verification. + let patch_sha256 = + crate::hash::sha256_hex(patch_content.as_bytes()); + // Generate source branch name from sanitized title + short unique suffix let title_slug = slugify_title(&sanitized.title); let short_id = generate_short_id(); @@ -699,7 +704,7 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che format!("agent/{}-{}", title_slug, short_id) }; - // Create the result with patch file reference + // Create the result with patch file reference and integrity hash let result = CreatePrResult::new( sanitized.title.clone(), sanitized.description.clone(), @@ -708,6 +713,7 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che repository.to_string(), sanitized.labels, Some(merge_base), + patch_sha256, ); // Write to safe outputs @@ -983,6 +989,154 @@ uploaded and linked during safe output processing. File size and type restrictio ))])) } + #[tool( + name = "upload-build-artifact", + description = "Attach a workspace file to an Azure DevOps build as a build attachment. \ +Omit `build_id` to target the current pipeline run (the executor resolves it from the \ +BUILD_BUILDID environment variable automatically). When `build_id` is provided, the file is \ +attached to that specific build — useful for posthumously decorating a finished build with a \ +generated report, screenshot, or log bundle. The file will be staged now and uploaded via the \ +ADO build attachments REST API during safe output processing. File size, extension, \ +artifact-name and build-id restrictions may apply per the workflow's safe-outputs config." + )] + async fn upload_build_artifact( + &self, + params: Parameters, + ) -> Result { + info!( + "Tool called: upload-build-artifact - artifact '{}' file '{}' build {:?}", + params.0.artifact_name, params.0.file_path, params.0.build_id + ); + + // Validate the agent-supplied params (build id, artifact name charset, + // path traversal / absolute / null bytes, etc.) before touching the + // filesystem. + crate::safeoutputs::Validate::validate(¶ms.0).map_err(anyhow_to_mcp_error)?; + + // Resolve the agent-supplied file path against the bounding directory + // (the agent's workspace root inside the sandbox) and verify it + // canonicalises to a location *inside* that directory — guarding + // against symlink escapes. + let resolved = self.bounding_directory.join(¶ms.0.file_path); + let canonical = resolved.canonicalize().map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!( + "File '{}' could not be located inside the workspace: {}", + params.0.file_path, + e + )) + })?; + let canonical_root = self.bounding_directory.canonicalize().map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to canonicalize bounding directory: {}", + e + )) + })?; + if !canonical.starts_with(&canonical_root) { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "File '{}' resolves outside the workspace (symlink escape)", + params.0.file_path + ))); + } + + // Reject directories — upload-build-artifact is single-file only. + let metadata = tokio::fs::metadata(&canonical).await.map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!("Failed to stat '{}': {}", params.0.file_path, e)) + })?; + if metadata.is_dir() { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "File '{}' is a directory; upload-build-artifact only supports single files", + params.0.file_path + ))); + } + let file_size = metadata.len(); + + // Defense-in-depth: reject files exceeding the default max size at + // Stage 1 to prevent a misbehaving agent from filling the staging + // disk before Stage 3 gets a chance to enforce the operator's limit. + if file_size > DEFAULT_MAX_FILE_SIZE { + return Err(anyhow_to_mcp_error(anyhow::anyhow!( + "File '{}' is {} bytes, exceeding the maximum staging size of {} bytes", + params.0.file_path, + file_size, + DEFAULT_MAX_FILE_SIZE + ))); + } + + // Generate a unique staged filename and copy the file into the + // safe-outputs directory. Stage 3 reads it back from there because + // the agent's sandbox workspace is no longer accessible by then. + // The staged name preserves the original extension and embeds a + // short random suffix to avoid collisions across multiple calls. + let extension = std::path::Path::new(¶ms.0.file_path) + .extension() + .and_then(|s| s.to_str()) + .map(|s| { + s.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .take(16) + .collect::() + }) + .unwrap_or_default(); + // Staged filename embeds the artifact name (up to 100 chars validated + // above) plus a fixed prefix and 8-char random suffix. The resulting + // max length (~140 chars) is well within filesystem limits. + let staged_filename = if extension.is_empty() { + format!( + "upload-build-artifact-{}-{}", + params.0.artifact_name, + generate_short_id() + ) + } else { + format!( + "upload-build-artifact-{}-{}.{}", + params.0.artifact_name, + generate_short_id(), + extension + ) + }; + // Read the source file, compute its SHA-256 for cross-stage integrity + // verification, then write it to the staging directory. Hashing the + // source before writing (rather than re-reading the staged copy) + // eliminates a TOCTOU window between copy and re-read. + let source_bytes = tokio::fs::read(&canonical).await.map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to read source file '{}': {}", + params.0.file_path, + e + )) + })?; + let staged_sha256 = crate::hash::sha256_hex(&source_bytes); + + let staged_path = self.output_directory.join(&staged_filename); + tokio::fs::write(&staged_path, &source_bytes).await.map_err(|e| { + anyhow_to_mcp_error(anyhow::anyhow!( + "Failed to stage file '{}' into safe-outputs directory: {}", + params.0.file_path, + e + )) + })?; + + let result = UploadBuildArtifactResult::new( + params.0.build_id, + params.0.artifact_name.clone(), + params.0.file_path.clone(), + staged_filename.clone(), + file_size, + staged_sha256, + ); + self.write_safe_output_file(&result).await + .map_err(|e| anyhow_to_mcp_error(anyhow::anyhow!("Failed to write safe output: {}", e)))?; + + let build_desc = match params.0.build_id { + Some(id) => format!("build #{}", id), + None => "the current build".to_string(), + }; + Ok(CallToolResult::success(vec![Content::text(format!( + "Artifact '{}' queued from file '{}' ({} bytes) for {}. The file will be uploaded during safe output processing.", + result.artifact_name, result.file_path, file_size, build_desc + ))])) + } + #[tool( name = "submit-pr-review", description = "Submit a pull request review with a decision (approve, request-changes, \ diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index d5aa0adb..154b4fa1 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -272,6 +272,8 @@ struct CreatePrResultFields { agent_labels: Vec, #[serde(skip_serializing_if = "Option::is_none")] base_commit: Option, + /// SHA-256 hex digest of the patch file, recorded at staging time. + patch_sha256: String, } impl Validate for CreatePrResultFields {} @@ -307,6 +309,10 @@ tool_result! { /// branches and is resolved when the PR is merged. #[serde(skip_serializing_if = "Option::is_none")] base_commit: Option, + /// SHA-256 hex digest of the patch file recorded at Stage 1. + /// Stage 3 re-hashes the file and rejects mismatches — catches + /// patch file tampering between stages. + patch_sha256: String, } } @@ -330,6 +336,7 @@ impl CreatePrResult { repository: String, agent_labels: Vec, base_commit: Option, + patch_sha256: String, ) -> Self { Self { name: Self::NAME.to_string(), @@ -340,6 +347,7 @@ impl CreatePrResult { repository, agent_labels, base_commit, + patch_sha256, } } } @@ -674,6 +682,19 @@ impl Executor for CreatePrResult { .context("Failed to read patch file")?; debug!("Patch content size: {} bytes", patch_content.len()); + // SHA-256 integrity check: verify the patch file hasn't been tampered + // with between Stage 1 and Stage 3. + let live_hash = + crate::hash::sha256_hex(patch_content.as_bytes()); + if live_hash != self.patch_sha256 { + return Ok(ExecutionResult::failure(format!( + "Patch file SHA-256 mismatch: expected {}, got {} — \ + the file may have been tampered with between stages", + self.patch_sha256, live_hash + ))); + } + debug!("Patch file SHA-256 verified: {}", live_hash); + // Excluded files are handled via --exclude flags on git am / git apply, // which filters them at the git level rather than post-processing patch content. // This is the same approach used by gh-aw (via :(exclude) pathspecs). @@ -2397,4 +2418,62 @@ new file mode 100755 fn test_truncate_error_body_empty_input() { assert_eq!(truncate_error_body("", 5), ""); } + + #[tokio::test] + async fn test_executor_rejects_patch_sha256_mismatch() { + let dir = tempfile::tempdir().unwrap(); + let patch_file = "test-repo-20260501.patch"; + let patch_content = "diff --git a/file.txt b/file.txt\n--- a/file.txt\n+++ b/file.txt\n@@ -1 +1 @@\n-old\n+new\n"; + std::fs::write(dir.path().join(patch_file), patch_content).unwrap(); + + // Record the hash of different content. + let wrong_hash = crate::hash::sha256_hex(b"completely different patch"); + + let result = CreatePrResult::new( + "Test PR".to_string(), + "Description of the PR".to_string(), + "agent/test-pr-12345678".to_string(), + patch_file.to_string(), + "self".to_string(), + vec![], + None, + wrong_hash, + ); + + let ctx = ExecutionContext { + ado_org_url: Some("https://dev.azure.com/test".to_string()), + ado_organization: Some("test".to_string()), + ado_project: Some("TestProject".to_string()), + access_token: Some("fake-token".to_string()), + source_directory: dir.path().to_path_buf(), + working_directory: dir.path().to_path_buf(), + tool_configs: std::collections::HashMap::new(), + repository_id: Some("repo-id".to_string()), + repository_name: Some("test-repo".to_string()), + allowed_repositories: std::collections::HashMap::new(), + agent_stats: None, + dry_run: false, + build_id: None, + build_number: None, + build_reason: None, + definition_name: None, + source_branch: None, + source_branch_name: None, + source_version: None, + triggered_by_build_id: None, + triggered_by_definition_name: None, + triggered_by_build_number: None, + triggered_by_project_id: None, + pull_request_id: None, + pull_request_source_branch: None, + pull_request_target_branch: None, + }; + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("SHA-256 mismatch"), + "expected SHA-256 mismatch failure, got: {}", + outcome.message + ); + } } diff --git a/src/safeoutputs/mod.rs b/src/safeoutputs/mod.rs index b91a2a8e..1331e1ef 100644 --- a/src/safeoutputs/mod.rs +++ b/src/safeoutputs/mod.rs @@ -42,6 +42,7 @@ pub const WRITE_REQUIRING_SAFE_OUTPUTS: &[&str] = tool_names![ AddBuildTagResult, CreateBranchResult, UpdatePrResult, + UploadBuildArtifactResult, UploadWorkitemAttachmentResult, SubmitPrReviewResult, ReplyToPrCommentResult, @@ -75,6 +76,7 @@ pub const ALL_KNOWN_SAFE_OUTPUTS: &[&str] = all_safe_output_names![ AddBuildTagResult, CreateBranchResult, UpdatePrResult, + UploadBuildArtifactResult, UploadWorkitemAttachmentResult, SubmitPrReviewResult, ReplyToPrCommentResult, @@ -262,6 +264,7 @@ mod submit_pr_review; mod update_pr; mod update_wiki_page; mod update_work_item; +mod upload_build_artifact; mod upload_workitem_attachment; pub use add_build_tag::*; @@ -287,6 +290,7 @@ pub use submit_pr_review::*; pub use update_pr::*; pub use update_wiki_page::*; pub use update_work_item::*; +pub use upload_build_artifact::*; pub use upload_workitem_attachment::*; #[cfg(test)] @@ -344,6 +348,7 @@ mod tests { assert!(AddBuildTagResult::REQUIRES_WRITE); assert!(CreateBranchResult::REQUIRES_WRITE); assert!(UpdatePrResult::REQUIRES_WRITE); + assert!(UploadBuildArtifactResult::REQUIRES_WRITE); assert!(UploadWorkitemAttachmentResult::REQUIRES_WRITE); assert!(SubmitPrReviewResult::REQUIRES_WRITE); assert!(ReplyToPrCommentResult::REQUIRES_WRITE); diff --git a/src/safeoutputs/upload_build_artifact.rs b/src/safeoutputs/upload_build_artifact.rs new file mode 100644 index 00000000..e84b0abc --- /dev/null +++ b/src/safeoutputs/upload_build_artifact.rs @@ -0,0 +1,1191 @@ +//! Upload build artifact safe output tool. +//! +//! Lets an agent propose attaching a workspace file to an Azure DevOps build +//! via the **build attachments** REST API +//! (`PUT /_apis/build/builds/{buildId}/attachments/{type}/{name}`). +//! +//! When `build_id` is omitted the executor targets the **current** pipeline run +//! by resolving `BUILD_BUILDID` from the environment — so the same tool covers +//! both "attach to the run I'm part of" and "attach to a different build". +//! +//! The flow mirrors `create-pull-request`: +//! +//! * **Stage 1 (MCP, in the agent sandbox):** the MCP server validates the +//! agent-supplied params, resolves `file_path` against the agent's workspace, +//! rejects symlink escapes / directories, and **copies the file** into the +//! safe-outputs `output_directory` under a generated unique name. The +//! sandbox workspace is no longer accessible at execution time, so the file +//! has to be staged where Stage 3 can find it. +//! * **Stage 3 (executor, outside the sandbox):** reads the staged file from +//! `ctx.working_directory.join(staged_file)`, applies operator-supplied +//! limits (`max-file-size`, `allowed-extensions`, `allowed-artifact-names`, +//! `allowed-build-ids`, `name-prefix`, `attachment-type`), resolves the +//! target build ID, and PUTs the bytes to the ADO build attachments endpoint. + +use ado_aw_derive::SanitizeConfig; +use log::{debug, info, warn}; +use percent_encoding::utf8_percent_encode; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::PATH_SEGMENT; +use crate::sanitize::SanitizeContent; +use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; +use crate::tool_result; +use crate::validate::{is_safe_path_segment, is_valid_artifact_name}; +use anyhow::{Context, ensure}; + +/// Parameters for attaching a workspace file to an ADO build. +#[derive(Deserialize, JsonSchema)] +pub struct UploadBuildArtifactParams { + /// The build ID to attach the file to. **Omit to target the current + /// pipeline run** — the executor resolves the build ID from the + /// `BUILD_BUILDID` environment variable automatically. When provided, + /// must be a positive integer. + pub build_id: Option, + + /// The artifact name to attach the file under. Used as the `{name}` + /// segment of the ADO build attachments URL. ADO requires a non-empty + /// name made of alphanumerics, `-`, `_`, or `.`. Must be 1-100 characters + /// and must not start with `.`. + pub artifact_name: String, + + /// Path to the file in the workspace to attach. Must be a relative path + /// with no directory traversal, no absolute prefix, and no `.git` + /// segments. + pub file_path: String, +} + +impl Validate for UploadBuildArtifactParams { + fn validate(&self) -> anyhow::Result<()> { + // build_id: if present, must be positive. + if let Some(id) = self.build_id { + ensure!(id > 0, "build_id must be positive when specified"); + } + + // artifact_name: ADO requires non-empty, ≤100 chars, charset + // [A-Za-z0-9._-], and (per our hardening) no leading `.`. + ensure!( + self.artifact_name.len() <= 100, + "artifact_name must be at most 100 characters" + ); + ensure!( + !self.artifact_name.starts_with('.'), + "artifact_name must not start with '.'" + ); + ensure!( + is_valid_artifact_name(&self.artifact_name), + "artifact_name must be non-empty and contain only alphanumeric characters, '-', '_' or '.'" + ); + + // file_path: must be relative, with no traversal, no absolute prefix, + // no `.git`/hidden segments, no null bytes, no drive-letter colons. + ensure!(!self.file_path.is_empty(), "file_path must not be empty"); + ensure!( + !self.file_path.contains('\0'), + "file_path must not contain null bytes" + ); + ensure!( + !self.file_path.contains(':'), + "file_path must not contain ':'" + ); + for component in self.file_path.split(['/', '\\']) { + ensure!( + is_safe_path_segment(component), + "file_path component '{}' is not a safe path segment (no empty, '..', or leading '.' allowed)", + component + ); + } + Ok(()) + } +} + +/// Internal params struct mirroring `UploadBuildArtifactResult` fields for the +/// `tool_result!` macro's `TryFrom` plumbing. The actual MCP parameters come +/// from `UploadBuildArtifactParams`; this struct only exists so the macro can +/// wire up `Validate`/`TryFrom` while the real construction happens in MCP via +/// `UploadBuildArtifactResult::new()` after the file is staged. +#[derive(Deserialize, JsonSchema)] +struct UploadBuildArtifactResultFields { + build_id: Option, + artifact_name: String, + file_path: String, + staged_file: String, + file_size: u64, + staged_sha256: String, +} + +impl Validate for UploadBuildArtifactResultFields {} + +tool_result! { + name = "upload-build-artifact", + write = true, + params = UploadBuildArtifactResultFields, + default_max = 3, + /// Result of attaching a workspace file to an ADO build. + pub struct UploadBuildArtifactResult { + /// Build ID the file should be attached to. `None` means "current + /// build" — resolved at execution time from `BUILD_BUILDID`. + build_id: Option, + /// Artifact name as proposed by the agent (pre-prefix). + artifact_name: String, + /// Original file path proposed by the agent (used for display and the + /// extension-allowlist check). + file_path: String, + /// Filename of the staged copy inside the safe-outputs directory. + /// Stage 1 (MCP) copies the agent's file into the safe-outputs dir + /// under this name; Stage 3 reads it back from + /// `ctx.working_directory.join(staged_file)` because the agent's + /// sandbox workspace is no longer accessible by then. + staged_file: String, + /// Size in bytes of the staged file at copy time. + file_size: u64, + /// SHA-256 hex digest of the staged file recorded at copy time. + /// Stage 3 re-hashes the file and rejects mismatches — catches + /// same-size file swaps between stages. + staged_sha256: String, + } +} + +impl SanitizeContent for UploadBuildArtifactResult { + fn sanitize_content_fields(&mut self) { + // All textual fields are strictly validated to safe charsets; no + // additional textual sanitization is required. + } +} + +impl UploadBuildArtifactResult { + /// Construct a result after the agent's file has been staged into the + /// safe-outputs directory. + pub fn new( + build_id: Option, + artifact_name: String, + file_path: String, + staged_file: String, + file_size: u64, + staged_sha256: String, + ) -> Self { + Self { + name: ::NAME.to_string(), + build_id, + artifact_name, + file_path, + staged_file, + file_size, + staged_sha256, + } + } +} + +/// Default maximum file size for upload-build-artifact (50 MB). +/// Also used by the MCP handler as the Stage 1 staging cap. +pub const DEFAULT_MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; +const DEFAULT_ATTACHMENT_TYPE: &str = "agent-artifact"; + +/// Configuration for the upload-build-artifact tool (specified in front +/// matter). +/// +/// Example front matter: +/// ```yaml +/// safe-outputs: +/// upload-build-artifact: +/// max-file-size: 52428800 +/// allowed-extensions: +/// - .png +/// - .pdf +/// - .log +/// allowed-artifact-names: +/// - agent-* +/// allowed-build-ids: +/// - 12345 +/// - 67890 +/// name-prefix: "agent-" +/// attachment-type: "agent-artifact" +/// max: 5 +/// ``` +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] +pub struct UploadBuildArtifactConfig { + /// Maximum file size in bytes (default: 50 MB). + #[serde(default = "default_max_file_size", rename = "max-file-size")] + pub max_file_size: u64, + + /// Allowed file extensions (e.g., `[".png", ".pdf"]`). Empty means all + /// extensions are allowed. + #[serde(default, rename = "allowed-extensions")] + pub allowed_extensions: Vec, + + /// Restrict which artifact names may be published. Empty means any name + /// (subject to charset rules) is allowed. Entries ending with `*` match + /// by prefix; otherwise the comparison is exact. + #[serde(default, rename = "allowed-artifact-names")] + pub allowed_artifact_names: Vec, + + /// Restrict which build IDs the agent may attach to. Empty means any + /// build ID accessible to the executor's token is allowed. This check + /// is skipped when `build_id` is omitted (targeting the current build). + #[serde(default, rename = "allowed-build-ids")] + pub allowed_build_ids: Vec, + + /// Prefix prepended to the agent-supplied artifact name before publishing. + #[serde(default, rename = "name-prefix")] + pub name_prefix: Option, + + /// Value to use for the `{type}` segment of the build attachments URL. + /// Defaults to `agent-artifact`. Must satisfy the same charset rules as + /// an artifact name. + #[serde(default, rename = "attachment-type")] + pub attachment_type: Option, +} + +fn default_max_file_size() -> u64 { + DEFAULT_MAX_FILE_SIZE +} + +impl Default for UploadBuildArtifactConfig { + fn default() -> Self { + Self { + max_file_size: DEFAULT_MAX_FILE_SIZE, + allowed_extensions: Vec::new(), + allowed_artifact_names: Vec::new(), + allowed_build_ids: Vec::new(), + name_prefix: None, + attachment_type: None, + } + } +} + +#[async_trait::async_trait] +impl Executor for UploadBuildArtifactResult { + fn dry_run_summary(&self) -> String { + match self.build_id { + Some(id) => format!( + "attach '{}' as artifact '{}' to build #{}", + self.file_path, self.artifact_name, id + ), + None => format!( + "attach '{}' as artifact '{}' to current build", + self.file_path, self.artifact_name + ), + } + } + + async fn execute_impl(&self, ctx: &ExecutionContext) -> anyhow::Result { + // Resolve the effective build ID: explicit value from the agent, or + // fall back to the current pipeline's BUILD_BUILDID. + let effective_build_id: i64 = match self.build_id { + Some(id) => id, + None => { + let current = ctx.build_id.context( + "build_id was not specified and BUILD_BUILDID is not set — \ + cannot determine which build to attach the artifact to", + )?; + i64::try_from(current).context("BUILD_BUILDID value overflows i64")? + } + }; + + info!( + "Attaching '{}' as artifact '{}' to build #{}{}", + self.file_path, + self.artifact_name, + effective_build_id, + if self.build_id.is_none() { " (current build)" } else { "" } + ); + debug!( + "upload-build-artifact: build_id={}, artifact_name='{}', file_path='{}'", + effective_build_id, self.artifact_name, self.file_path + ); + + let config: UploadBuildArtifactConfig = ctx.get_tool_config("upload-build-artifact"); + debug!("Max file size: {} bytes", config.max_file_size); + debug!("Allowed extensions: {:?}", config.allowed_extensions); + debug!("Allowed artifact names: {:?}", config.allowed_artifact_names); + debug!("Allowed build IDs: {:?}", config.allowed_build_ids); + + // Check build-id allow-list (if configured). When the agent omitted + // build_id (targeting the current build), skip this check — the + // current build is implicitly trusted because the operator chose to + // run this pipeline. + if self.build_id.is_some() + && !config.allowed_build_ids.is_empty() + && !config.allowed_build_ids.contains(&effective_build_id) + { + return Ok(ExecutionResult::failure(format!( + "Build ID {} is not in the allowed-build-ids list", + effective_build_id + ))); + } + + // Validate name-prefix length before applying. A long prefix would + // be caught later by the final_name.len() > 100 check, but rejecting + // early gives operators a clearer error message. + if let Some(prefix) = &config.name_prefix { + if prefix.len() > 50 { + return Ok(ExecutionResult::failure(format!( + "name-prefix '{}...' is too long ({} chars, max 50)", + prefix.chars().take(20).collect::(), + prefix.len() + ))); + } + } + + // Apply name-prefix and re-validate the resulting name's charset (the + // prefix itself is operator-controlled and sanitized at config load, + // but we still defensively check the joined string). + let final_name = match &config.name_prefix { + Some(prefix) => format!("{}{}", prefix, self.artifact_name), + None => self.artifact_name.clone(), + }; + if final_name.starts_with('.') || final_name.len() > 100 || !is_valid_artifact_name(&final_name) + { + return Ok(ExecutionResult::failure(format!( + "Resolved artifact name '{}' is not a valid Azure DevOps artifact name", + final_name + ))); + } + debug!("Final artifact name (after prefix): {}", final_name); + + // Check artifact-name allow-list (if configured). + if !config.allowed_artifact_names.is_empty() { + let allowed = config.allowed_artifact_names.iter().any(|pattern| { + if let Some(prefix) = pattern.strip_suffix('*') { + final_name.starts_with(prefix) + } else { + *pattern == final_name + } + }); + if !allowed { + return Ok(ExecutionResult::failure(format!( + "Artifact name '{}' is not in the allowed list", + final_name + ))); + } + } + + // Validate file extension against allowed-extensions (if configured). + // Uses Path::extension() for a precise match rather than suffix + // matching on the full path — this prevents "log" from matching + // filenames like "catalog" when the operator omits the leading dot. + if !config.allowed_extensions.is_empty() { + let file_ext = std::path::Path::new(&self.file_path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + let has_valid_ext = config.allowed_extensions.iter().any(|ext| { + ext.trim_start_matches('.').eq_ignore_ascii_case(file_ext) + }); + if !has_valid_ext { + return Ok(ExecutionResult::failure(format!( + "File '{}' has an extension not in the allowed list: {:?}", + self.file_path, config.allowed_extensions + ))); + } + } + + // Resolve the attachment type. Operator config wins; otherwise use the + // default. Re-validate the charset defensively even though + // `SanitizeConfig` strips control characters, because the type is + // interpolated into a URL path segment. + let attachment_type = config + .attachment_type + .as_deref() + .unwrap_or(DEFAULT_ATTACHMENT_TYPE); + if attachment_type.is_empty() + || attachment_type.starts_with('.') + || attachment_type.len() > 100 + || !is_valid_artifact_name(attachment_type) + { + return Ok(ExecutionResult::failure(format!( + "attachment-type '{}' is not a valid value (must be non-empty, ≤100 chars, no leading '.', alphanumeric/'-'/'_'/'.')", + attachment_type + ))); + } + debug!("Attachment type: {}", attachment_type); + + // Resolve the staged file inside the safe-outputs working directory. + // Stage 1 (MCP) copied the agent's file there under `self.staged_file`; + // the sandbox workspace where the original lived is no longer + // accessible. Canonicalize and verify it stays inside + // `working_directory` so a malicious staged_file value can't escape + // (defense in depth — MCP generates the name itself). + let staged_path = ctx.working_directory.join(&self.staged_file); + debug!("Staged file path: {}", staged_path.display()); + + let canonical = staged_path.canonicalize().context( + "Failed to canonicalize staged file path — file may be missing or contains broken symlinks", + )?; + let canonical_base = ctx + .working_directory + .canonicalize() + .context("Failed to canonicalize working directory")?; + if !canonical.starts_with(&canonical_base) { + return Ok(ExecutionResult::failure(format!( + "Staged file '{}' resolves outside the safe-outputs directory", + self.staged_file + ))); + } + + // Reject directories defensively — the staged entry must always be a + // single file (Stage 1 only copies single files). + let metadata = std::fs::metadata(&canonical).context("Failed to read file metadata")?; + if metadata.is_dir() { + return Ok(ExecutionResult::failure(format!( + "Staged path '{}' is a directory; upload-build-artifact only supports single files", + self.staged_file + ))); + } + let file_size = metadata.len(); + debug!("File size: {} bytes", file_size); + + // Integrity check: compare the live file size against the size + // recorded in Stage 1. A mismatch means the staged file was modified + // between stages — fail hard rather than uploading tampered content. + if file_size != self.file_size { + return Ok(ExecutionResult::failure(format!( + "Staged file size ({} bytes) differs from size recorded at Stage 1 ({} bytes) — \ + the file may have been modified between stages", + file_size, self.file_size + ))); + } + + if file_size > config.max_file_size { + return Ok(ExecutionResult::failure(format!( + "File size ({} bytes) exceeds maximum allowed size ({} bytes)", + file_size, config.max_file_size + ))); + } + + if ctx.dry_run { + return Ok(ExecutionResult::success(format!( + "[dry-run] would attach '{}' ({} bytes) as artifact '{}' to build #{}{}", + self.file_path, + file_size, + final_name, + effective_build_id, + if self.build_id.is_none() { " (current build)" } else { "" } + ))); + } + + // Read the file bytes for upload (after the dry-run guard to avoid + // reading up to 50 MB into memory only to discard it). Uses async I/O + // to avoid blocking the tokio runtime for large files. + let file_bytes = tokio::fs::read(&canonical).await.context("Failed to read file contents")?; + + // SHA-256 integrity check: verify the staged file hasn't been swapped + // between stages. This catches same-size replacements that the size + // check alone would miss. + let live_hash = crate::hash::sha256_hex(&file_bytes); + if live_hash != self.staged_sha256 { + return Ok(ExecutionResult::failure(format!( + "Staged file SHA-256 mismatch: expected {} (recorded at Stage 1), got {} — \ + the file may have been tampered with between stages", + self.staged_sha256, live_hash + ))); + } + + // Resolve the ADO API context (org URL, project, token). + let org_url = ctx + .ado_org_url + .as_ref() + .context("AZURE_DEVOPS_ORG_URL not set")?; + let project = ctx + .ado_project + .as_ref() + .context("SYSTEM_TEAMPROJECT not set")?; + let token = ctx + .access_token + .as_ref() + .context("No access token available (SYSTEM_ACCESSTOKEN or AZURE_DEVOPS_EXT_PAT)")?; + debug!("ADO org: {}, project: {}", org_url, project); + + // Build the build-attachments URL. + // PUT {org_url}/{project}/_apis/build/builds/{buildId}/attachments/{type}/{name}?api-version=7.1-preview.1 + let url = format!( + "{}/{}/_apis/build/builds/{}/attachments/{}/{}?api-version=7.1-preview.1", + org_url.trim_end_matches('/'), + utf8_percent_encode(project, PATH_SEGMENT), + effective_build_id, + utf8_percent_encode(attachment_type, PATH_SEGMENT), + utf8_percent_encode(&final_name, PATH_SEGMENT), + ); + debug!("Attachment URL: {}", url); + + let client = reqwest::Client::new(); + info!( + "Uploading {} bytes to build #{} as attachment '{}/{}'", + file_size, effective_build_id, attachment_type, final_name + ); + let response = client + .put(&url) + .header("Content-Type", "application/octet-stream") + .basic_auth("", Some(token)) + .body(file_bytes) + .send() + .await + .context("Failed to send attachment upload request to Azure DevOps")?; + + if response.status().is_success() { + let resp_body: serde_json::Value = response.json().await.unwrap_or_else(|e| { + warn!( + "Build attachment uploaded for build #{} but the response JSON could not be parsed: {} — proceeding without attachment URL", + effective_build_id, e + ); + serde_json::Value::Null + }); + let attachment_url = resp_body + .get("url") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + info!( + "Attached '{}' to build #{} as '{}'", + self.file_path, effective_build_id, final_name + ); + + Ok(ExecutionResult::success_with_data( + format!( + "Attached '{}' to build #{} as artifact '{}'", + self.file_path, effective_build_id, final_name + ), + serde_json::json!({ + "build_id": effective_build_id, + "artifact_name": final_name, + "attachment_type": attachment_type, + "file_path": self.file_path, + "size_bytes": file_size, + "attachment_url": attachment_url, + "project": project, + }), + )) + } else { + let status = response.status(); + let error_body = response + .text() + .await + .unwrap_or_else(|_| "Unknown error".to_string()); + Ok(ExecutionResult::failure(format!( + "Failed to attach artifact to build #{} (HTTP {}): {}", + effective_build_id, status, error_body + ))) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::safeoutputs::ToolResult; + + #[test] + fn test_result_has_correct_name() { + assert_eq!(UploadBuildArtifactResult::NAME, "upload-build-artifact"); + } + + fn make_params( + build_id: Option, + artifact_name: &str, + file_path: &str, + ) -> UploadBuildArtifactParams { + UploadBuildArtifactParams { + build_id, + artifact_name: artifact_name.to_string(), + file_path: file_path.to_string(), + } + } + + /// Compute SHA-256 hex digest of a byte slice (test helper, delegates + /// to the crate-level helper). + fn test_sha256(data: &[u8]) -> String { + crate::hash::sha256_hex(data) + } + + /// Dummy SHA-256 hash for tests that use dry_run=true (hash check is + /// skipped on the dry-run path). + const DUMMY_HASH: &str = "0000000000000000000000000000000000000000000000000000000000000000"; + + #[test] + fn test_params_deserializes_with_build_id() { + let json = + r#"{"build_id": 1234, "artifact_name": "agent-report", "file_path": "out/report.pdf"}"#; + let params: UploadBuildArtifactParams = serde_json::from_str(json).unwrap(); + assert_eq!(params.build_id, Some(1234)); + assert_eq!(params.artifact_name, "agent-report"); + assert_eq!(params.file_path, "out/report.pdf"); + } + + #[test] + fn test_params_deserializes_without_build_id() { + let json = r#"{"artifact_name": "agent-report", "file_path": "out/report.pdf"}"#; + let params: UploadBuildArtifactParams = serde_json::from_str(json).unwrap(); + assert_eq!(params.build_id, None); + assert_eq!(params.artifact_name, "agent-report"); + assert_eq!(params.file_path, "out/report.pdf"); + } + + #[test] + fn test_params_validate_accepts_valid_with_build_id() { + assert!(make_params(Some(1), "agent-report", "out/report.pdf") + .validate() + .is_ok()); + } + + #[test] + fn test_params_validate_accepts_valid_without_build_id() { + assert!(make_params(None, "agent-report", "out/report.pdf") + .validate() + .is_ok()); + } + + #[test] + fn test_validation_rejects_zero_build_id() { + assert!(make_params(Some(0), "agent-report", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_negative_build_id() { + assert!(make_params(Some(-1), "agent-report", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_empty_artifact_name() { + assert!(make_params(Some(1), "", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_artifact_name_starting_with_dot() { + assert!(make_params(Some(1), ".hidden", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_artifact_name_with_space() { + assert!(make_params(Some(1), "my artifact", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_artifact_name_with_slash() { + assert!(make_params(Some(1), "my/artifact", "out/report.pdf") + .validate() + .is_err()); + } + + #[test] + fn test_validation_accepts_dotted_artifact_name() { + assert!(make_params(Some(1), "agent.report.v2", "out/report.pdf") + .validate() + .is_ok()); + } + + #[test] + fn test_validation_rejects_empty_file_path() { + assert!(make_params(Some(1), "agent-report", "") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_path_traversal() { + assert!(make_params(Some(1), "agent-report", "../etc/passwd") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_absolute_path() { + assert!(make_params(Some(1), "agent-report", "/etc/passwd") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_backslash_traversal() { + assert!(make_params(Some(1), "agent-report", "src\\..\\secret.txt") + .validate() + .is_err()); + } + + #[test] + fn test_validation_rejects_dotgit_component() { + assert!(make_params(Some(1), "agent-report", ".git/config") + .validate() + .is_err()); + } + + #[test] + fn test_result_serializes_correctly_with_build_id() { + let result = UploadBuildArtifactResult::new( + Some(1234), + "agent-report".to_string(), + "out/report.pdf".to_string(), + "upload-build-artifact-agent-report-1234.bin".to_string(), + 42, + DUMMY_HASH.to_string(), + ); + let json = serde_json::to_string(&result).unwrap(); + assert!(json.contains(r#""name":"upload-build-artifact""#)); + assert!(json.contains(r#""build_id":1234"#)); + assert!(json.contains(r#""artifact_name":"agent-report""#)); + assert!(json.contains(r#""file_path":"out/report.pdf""#)); + assert!(json.contains(r#""staged_file":"upload-build-artifact-agent-report-1234.bin""#)); + } + + #[test] + fn test_result_serializes_correctly_without_build_id() { + let result = UploadBuildArtifactResult::new( + None, + "agent-report".to_string(), + "out/report.pdf".to_string(), + "upload-build-artifact-agent-report-1234.bin".to_string(), + 42, + DUMMY_HASH.to_string(), + ); + let json = serde_json::to_string(&result).unwrap(); + assert!(json.contains(r#""name":"upload-build-artifact""#)); + assert!(json.contains(r#""build_id":null"#)); + assert!(json.contains(r#""artifact_name":"agent-report""#)); + } + + #[test] + fn test_config_defaults() { + let config = UploadBuildArtifactConfig::default(); + assert_eq!(config.max_file_size, 50 * 1024 * 1024); + assert!(config.allowed_extensions.is_empty()); + assert!(config.allowed_artifact_names.is_empty()); + assert!(config.allowed_build_ids.is_empty()); + assert!(config.name_prefix.is_none()); + assert!(config.attachment_type.is_none()); + } + + #[test] + fn test_config_deserializes_from_yaml() { + let yaml = r#" +max-file-size: 1048576 +allowed-extensions: + - .png + - .pdf +allowed-artifact-names: + - agent-* + - report +allowed-build-ids: + - 100 + - 200 +name-prefix: "agent-" +attachment-type: "agent-artifact" +"#; + let config: UploadBuildArtifactConfig = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(config.max_file_size, 1_048_576); + assert_eq!(config.allowed_extensions, vec![".png", ".pdf"]); + assert_eq!(config.allowed_artifact_names, vec!["agent-*", "report"]); + assert_eq!(config.allowed_build_ids, vec![100, 200]); + assert_eq!(config.name_prefix, Some("agent-".to_string())); + assert_eq!(config.attachment_type, Some("agent-artifact".to_string())); + } + + fn make_ctx(working_directory: std::path::PathBuf, dry_run: bool) -> ExecutionContext { + ExecutionContext { + ado_org_url: None, + ado_organization: None, + ado_project: None, + access_token: None, + source_directory: working_directory.clone(), + working_directory, + tool_configs: std::collections::HashMap::new(), + repository_id: None, + repository_name: None, + allowed_repositories: std::collections::HashMap::new(), + agent_stats: None, + dry_run, + build_id: None, + build_number: None, + build_reason: None, + definition_name: None, + source_branch: None, + source_branch_name: None, + source_version: None, + triggered_by_build_id: None, + triggered_by_definition_name: None, + triggered_by_build_number: None, + triggered_by_project_id: None, + pull_request_id: None, + pull_request_source_branch: None, + pull_request_target_branch: None, + } + } + + #[tokio::test] + async fn test_executor_reads_staged_file_with_explicit_build_id() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-deadbeef.pdf"; + std::fs::write(dir.path().join(staged), b"%PDF-1.4 hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1234), + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 14, + DUMMY_HASH.to_string(), + ); + let ctx = make_ctx(dir.path().to_path_buf(), true); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(outcome.success, "expected success, got: {:?}", outcome); + assert!(outcome.message.contains("[dry-run]")); + assert!(outcome.message.contains("agent-report")); + assert!(outcome.message.contains("#1234")); + } + + #[tokio::test] + async fn test_executor_resolves_current_build_when_build_id_omitted() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-feedf00d.pdf"; + std::fs::write(dir.path().join(staged), b"%PDF-1.4 hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + None, + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 14, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.build_id = Some(5678); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(outcome.success, "expected success, got: {:?}", outcome); + assert!(outcome.message.contains("[dry-run]")); + assert!(outcome.message.contains("#5678")); + assert!(outcome.message.contains("(current build)")); + } + + #[tokio::test] + async fn test_executor_fails_when_build_id_omitted_and_not_in_env() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-cafef00d.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + None, + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let ctx = make_ctx(dir.path().to_path_buf(), true); + // ctx.build_id is None — no BUILD_BUILDID + let err = result.execute_impl(&ctx).await.unwrap_err(); + assert!( + err.to_string().contains("BUILD_BUILDID"), + "expected BUILD_BUILDID error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_executor_rejects_missing_staged_file() { + let dir = tempfile::tempdir().unwrap(); + let result = UploadBuildArtifactResult::new( + Some(1234), + "agent-report".to_string(), + "out/report.pdf".to_string(), + "does-not-exist.pdf".to_string(), + 0, + DUMMY_HASH.to_string(), + ); + let ctx = make_ctx(dir.path().to_path_buf(), true); + let err = result.execute_impl(&ctx).await.unwrap_err(); + assert!( + err.to_string().contains("canonicalize"), + "expected canonicalize error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_executor_rejects_disallowed_build_id() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-cafef00d.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(999), + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "allowed-build-ids": [100, 200] }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("allowed-build-ids"), + "expected allowed-build-ids rejection, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_skips_build_id_allowlist_for_current_build() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-aabbccdd.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + None, // targeting current build + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.build_id = Some(999); // current build not in allowed list + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "allowed-build-ids": [100, 200] }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!( + outcome.success, + "current build should bypass allowed-build-ids, got: {:?}", + outcome + ); + } + + #[tokio::test] + async fn test_executor_accepts_allowed_build_id() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-feedf00d.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(100), + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "allowed-build-ids": [100, 200] }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(outcome.success, "expected success, got: {:?}", outcome); + } + + #[tokio::test] + async fn test_executor_rejects_oversized_file() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-big-deadbeef.bin"; + std::fs::write(dir.path().join(staged), vec![0u8; 1024]).unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1), + "agent-big".to_string(), + "out/big.bin".to_string(), + staged.to_string(), + 1024, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "max-file-size": 100 }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("exceeds maximum"), + "expected size rejection, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_rejects_disallowed_extension() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-aabb1122.exe"; + std::fs::write(dir.path().join(staged), b"MZ hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1), + "agent-report".to_string(), + "out/report.exe".to_string(), + staged.to_string(), + 8, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "allowed-extensions": [".pdf", ".png"] }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("extension not in the allowed list"), + "expected extension rejection, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_rejects_disallowed_artifact_name() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-evil-report-ccdd3344.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1), + "evil-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "allowed-artifact-names": ["agent-*", "safe-report"] }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("not in the allowed list"), + "expected artifact-name rejection, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_applies_name_prefix() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-report-eeff5566.pdf"; + std::fs::write(dir.path().join(staged), b"hello").unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1), + "report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 5, + DUMMY_HASH.to_string(), + ); + let mut ctx = make_ctx(dir.path().to_path_buf(), true); + ctx.tool_configs.insert( + "upload-build-artifact".to_string(), + serde_json::json!({ "name-prefix": "agent-" }), + ); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(outcome.success, "expected success, got: {:?}", outcome); + // The dry-run message should contain the prefixed name. + assert!( + outcome.message.contains("agent-report"), + "expected prefixed name 'agent-report' in message, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_rejects_tampered_staged_file() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-tampered.pdf"; + // Write 100 bytes but record 50 in the result — simulates tampering. + std::fs::write(dir.path().join(staged), vec![0u8; 100]).unwrap(); + + let result = UploadBuildArtifactResult::new( + Some(1), + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + 50, // mismatched size + DUMMY_HASH.to_string(), + ); + let ctx = make_ctx(dir.path().to_path_buf(), true); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("differs from size recorded"), + "expected integrity failure, got: {}", + outcome.message + ); + } + + #[tokio::test] + async fn test_executor_rejects_sha256_mismatch() { + let dir = tempfile::tempdir().unwrap(); + let staged = "upload-build-artifact-agent-report-sha-mismatch.pdf"; + let content = b"real file content"; + std::fs::write(dir.path().join(staged), content).unwrap(); + + // Record the hash of different content — same size but wrong hash. + let wrong_hash = crate::hash::sha256_hex(b"wrong file content"); + + let result = UploadBuildArtifactResult::new( + Some(1), + "agent-report".to_string(), + "out/report.pdf".to_string(), + staged.to_string(), + content.len() as u64, + wrong_hash, + ); + // dry_run = false so the hash check executes (it's after the + // dry-run guard). The failure fires before any HTTP call. + let mut ctx = make_ctx(dir.path().to_path_buf(), false); + ctx.build_id = Some(1); + ctx.ado_org_url = Some("https://dev.azure.com/test".to_string()); + ctx.ado_project = Some("TestProject".to_string()); + ctx.access_token = Some("fake-token".to_string()); + let outcome = result.execute_impl(&ctx).await.unwrap(); + assert!(!outcome.success); + assert!( + outcome.message.contains("SHA-256 mismatch"), + "expected SHA-256 mismatch failure, got: {}", + outcome.message + ); + } + + #[test] + fn test_dry_run_summary_with_build_id() { + let result = UploadBuildArtifactResult::new( + Some(42), + "report".to_string(), + "out/report.pdf".to_string(), + "staged.pdf".to_string(), + 100, + DUMMY_HASH.to_string(), + ); + let summary = result.dry_run_summary(); + assert!(summary.contains("#42")); + assert!(summary.contains("report")); + } + + #[test] + fn test_dry_run_summary_without_build_id() { + let result = UploadBuildArtifactResult::new( + None, + "report".to_string(), + "out/report.pdf".to_string(), + "staged.pdf".to_string(), + 100, + DUMMY_HASH.to_string(), + ); + let summary = result.dry_run_summary(); + assert!(summary.contains("current build")); + assert!(summary.contains("report")); + } +} diff --git a/src/validate.rs b/src/validate.rs index 9f8ee1f7..af73d126 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -56,6 +56,20 @@ pub fn is_valid_version(s: &str) -> bool { .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-')) } +/// Validate that a string is a valid ADO artifact name or attachment-type +/// segment: non-empty, composed of `[A-Za-z0-9._-]`. +/// +/// The allowed charset is identical to [`is_valid_version`], but this function +/// exists as a separate entry point so that artifact-name validation is +/// decoupled from version-string validation. If version validation is ever +/// tightened (e.g. to require a leading digit), artifact names remain +/// unaffected. +pub fn is_valid_artifact_name(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-')) +} + /// Characters allowed in individual engine.args entries. /// Strict allowlist to prevent shell injection inside AWF single-quoted commands. pub fn is_valid_arg(s: &str) -> bool {