diff --git a/Cargo.lock b/Cargo.lock index 1e191fd..308dd56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -313,6 +313,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "edgee-compressor" +version = "0.1.0" +dependencies = [ + "lazy_static", + "regex", + "serde", + "serde_json", + "tracing", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -1713,9 +1724,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.36" diff --git a/Cargo.toml b/Cargo.toml index 203db5a..0b29cea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/cli"] +members = ["crates/cli", "crates/compressor"] resolver = "2" [workspace.dependencies] diff --git a/LICENSE b/LICENSE index 261eeb9..f4ade0f 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2026 Edgee Cloud Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..d24cde9 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,200 @@ +The token compression modules in this project are derived from RTK +(https://github.com/rtk-ai/rtk), originally created by rtk-ai and +rtk-ai Labs. The original code is licensed under the Apache License, +Version 2.0, reproduced below. + +Files containing code derived from RTK are individually marked with +their original copyright notice and modification history. + +============================================================================ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2024 rtk-ai and rtk-ai Labs + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..97d1670 --- /dev/null +++ b/NOTICE @@ -0,0 +1,9 @@ +Edgee Gateway +Copyright 2026 Edgee Cloud + +This product includes software derived from RTK (https://github.com/rtk-ai/rtk), +Copyright 2024 rtk-ai and rtk-ai Labs, licensed under the Apache License 2.0. + +Specifically, the token compression modules for bash tool outputs (ls, cat, find, +git, cargo, and related commands) are derived from RTK's compression implementation +and have been adapted for server-side gateway compression. diff --git a/README.md b/README.md index 5f718b4..6b3e6dd 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,9 @@

+**Open-source LLM gateway written in Rust.** +Route, observe, and compress your AI traffic. + [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE) [![Edgee](https://img.shields.io/badge/discord-edgee-blueviolet.svg?logo=discord)](https://www.edgee.ai/discord) [![Docs](https://img.shields.io/badge/docs-published-blue)](https://www.edgee.ai/docs/introduction) @@ -17,13 +20,20 @@ --- -AI coding assistants are incredible. They're also expensive. Every prompt you send to Claude Code or Codex carries context, your files, your history, your instructions, and your token consumption is crazy. - -Edgee sits between your coding agent and the LLM APIs and compresses that context before it reaches the model. Same output. Fewer tokens. Lower bill. +Edgee is a lightweight LLM gateway that sits between your application and AI providers. It gives you a single control point for routing, observability, and cost optimization, without changing your existing code. +Think of it as an open-source alternative to LiteLLM or OpenRouter, written in Rust for speed and low resource usage, with a built-in token compression engine that reduces your AI costs automatically. ai-gateway-horizontal-light +## Why Edgee + +- **One gateway, any provider** — Unified API for Anthropic, OpenAI, and other LLM providers. Switch models without touching your app code. +- **Token compression** — Edgee analyzes request context and strips redundancy before it reaches the model. Same output, fewer tokens, lower bill. +- **Real-time observability** — See exactly how many tokens you're sending, how many you're saving, and what it costs. +- **Rust-native** — Fast startup, minimal memory footprint, no runtime dependencies. Runs anywhere Docker runs. + +--- ## Install @@ -51,28 +61,48 @@ Installs to `%LOCALAPPDATA%\Programs\edgee\`. You can override the directory wit ## Quickstart -### Launch Claude Code with token compression +### Use with AI coding assistants + +Edgee can wrap your coding assistant and compress traffic automatically: ```bash +# Claude Code edgee launch claude + +# Codex +edgee launch codex + +# Opencode +edgee launch opencode ``` -That's it. Edgee configures itself as a gateway and Claude Code routes through it automatically. +### Use as a standalone gateway -### Launch Codex with token compression +Point any OpenAI-compatible client at Edgee: ```bash -edgee launch codex +# Start the gateway +edgee serve + +# Your app talks to Edgee instead of the provider directly +export OPENAI_BASE_URL=http://localhost:1207/v1 ``` --- -## What it does +## Features + +### Token compression + +Edgee's compression engine analyzes tool outputs (file listings, git logs, build output, test results) and removes noise before they enter the LLM context. The compression is lossless from the model's perspective — responses are identical, but prompts are leaner. -**Token compression** — Edgee analyzes your request context and removes redundancy before sending it upstream. It's lossless from the model's perspective: the response is identical, but the prompt is leaner. +### Multi-provider routing -**Usage tracking** — See how many tokens you're sending, how many you're saving, and what it costs — in real time. +Route requests across Anthropic, OpenAI, and other providers through a single endpoint. Switch models, load-balance, or failover without code changes. +### Usage tracking + +Real-time visibility into token consumption, compression savings, and cost per request. --- @@ -84,7 +114,17 @@ edgee launch codex | Codex | `edgee launch codex` | ✅ Supported | | Opencode | `edgee launch opencode` | ✅ Supported | | Cursor | `edgee launch cursor` | 🔜 Coming soon | +| Any OpenAI-compatible client | `edgee serve` | ✅ Supported | + +--- + +## Acknowledgments + +The token compression engine in Edgee is derived from [RTK](https://github.com/rtk-ai/rtk), created by [Patrick Szymkowiak](https://github.com/pszymkowiak) and contributors at rtk-ai Labs. RTK pioneered local tool-output compression for AI coding assistants, and we built on their work to bring the same optimizations to a gateway architecture. + +RTK is licensed under the Apache License 2.0. All derived files retain the original copyright notice and are individually marked with a modification history. See [`LICENSE-APACHE`](./LICENSE-APACHE) and [`NOTICE`](./NOTICE) for full details. +If you're looking for a local-first compression tool, [check out RTK directly](https://github.com/rtk-ai/rtk), it's excellent for individual developer workflows. --- @@ -106,4 +146,4 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full guide. For bigger changes, o - [Discord](https://www.edgee.ai/discord) — fastest way to get help - [GitHub Issues](https://github.com/edgee-ai/edgee/issues) — bugs and feature requests -- [Twitter / X](https://twitter.com/edgee_ai) — updates and releases +- [Twitter / X](https://twitter.com/edgee_ai) — updates and releases \ No newline at end of file diff --git a/crates/compressor/Cargo.toml b/crates/compressor/Cargo.toml new file mode 100644 index 0000000..3443473 --- /dev/null +++ b/crates/compressor/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "edgee-compressor" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +regex = "1" +lazy_static = "1" +tracing = "0.1" diff --git a/crates/compressor/src/lib.rs b/crates/compressor/src/lib.rs new file mode 100644 index 0000000..09e2dea --- /dev/null +++ b/crates/compressor/src/lib.rs @@ -0,0 +1,32 @@ +//! Tool output compression strategies for AI coding agents. +//! +//! Provides compressors for tool outputs from Claude Code, OpenCode, and Codex agents. +//! Each compressor reduces token usage by summarizing tool results while preserving +//! critical information. + +pub mod strategy; +pub mod util; + +// Re-export key traits +pub use strategy::ToolCompressor; +pub use strategy::bash::BashCompressor; + +// Re-export compressor lookup functions +pub use strategy::bash::compressor_for as bash_compressor_for; +pub use strategy::claude::compressor_for as claude_compressor_for; +pub use strategy::codex::compressor_for as codex_compressor_for; +pub use strategy::opencode::compressor_for as opencode_compressor_for; + +// Re-export the main compression utility +pub use util::compress_claude_tool_with_segment_protection; + +/// Compress a Claude Code tool output by tool name. +/// +/// Looks up the appropriate compressor for the given tool name and applies it, +/// preserving `` blocks verbatim. +/// +/// Returns `Some(compressed)` if compression was applied, `None` to keep the original. +pub fn compress_tool_output(tool_name: &str, arguments: &str, output: &str) -> Option { + let compressor = claude_compressor_for(tool_name)?; + compress_claude_tool_with_segment_protection(compressor, arguments, output) +} diff --git a/crates/compressor/src/strategy/bash/cargo.rs b/crates/compressor/src/strategy/bash/cargo.rs new file mode 100644 index 0000000..c12d0c9 --- /dev/null +++ b/crates/compressor/src/strategy/bash/cargo.rs @@ -0,0 +1,422 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `cargo` command output. +//! +//! Strips Compiling/Downloading/Checking noise lines and keeps only +//! errors, warnings, and summary for build/test/clippy/check output. + +use std::collections::HashMap; + +use super::BashCompressor; + +pub struct CargoCompressor; + +impl BashCompressor for CargoCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + let subcommand = parse_cargo_subcommand(command); + match subcommand { + "build" | "check" | "b" => Some(filter_cargo_build(output)), + "test" | "t" => Some(filter_cargo_test(output)), + "clippy" => Some(filter_cargo_clippy(output)), + _ => None, // Don't compress unknown subcommands + } + } +} + +fn parse_cargo_subcommand(command: &str) -> &str { + for arg in command.split_whitespace().skip(1) { + if arg.starts_with('-') { + continue; + } + return arg; + } + "" +} + +/// Returns true for decorative lines in Rust compiler diagnostics that carry no +/// actionable information: pure pipe separator lines (` |`), unlabeled +/// caret/dash annotation lines (` | ^^^^^^^`), and +/// "For more information about this error, try ..." footers. +fn is_decorative_diagnostic_line(line: &str) -> bool { + let trimmed = line.trim(); + + // Pure pipe separator: trimmed is exactly "|" or "|" followed only by spaces + let is_pipe_only = + trimmed == "|" || (trimmed.starts_with('|') && trimmed[1..].chars().all(|c| c == ' ')); + + // Unlabeled caret/dash annotation: `| ^^^^^^^` or `| -------` with no text label after. + // Labeled annotations like `| ^ argument #6 of type ... is missing` are kept. + let is_pure_caret = if let Some(after_pipe_raw) = trimmed.strip_prefix('|') { + let after_pipe = after_pipe_raw.trim_start(); + !after_pipe.is_empty() + && after_pipe + .chars() + .all(|c| matches!(c, '^' | '-' | '~' | '_' | ' ')) + } else { + false + }; + + // "For more information about this error, try `rustc --explain ...`" + let is_more_info = trimmed.starts_with("For more information about this error"); + + is_pipe_only || is_pure_caret || is_more_info +} + +fn is_noise_line(line: &str) -> bool { + let trimmed = line.trim_start(); + trimmed.starts_with("Compiling") + || trimmed.starts_with("Checking") + || trimmed.starts_with("Downloading") + || trimmed.starts_with("Downloaded") + || trimmed.starts_with("Finished") + || trimmed.starts_with("Locking") + || trimmed.starts_with("Updating") + || trimmed.starts_with("Blocking waiting for file lock") +} + +/// Filter cargo build/check output: strip compilation lines, keep errors + summary. +fn filter_cargo_build(output: &str) -> String { + let mut errors: Vec = Vec::new(); + let mut warnings = 0; + let mut error_count = 0; + let mut compiled = 0; + let mut in_error = false; + let mut current_error: Vec = Vec::new(); + + for line in output.lines() { + if is_noise_line(line) { + compiled += 1; + continue; + } + + if line.starts_with("error[") || line.starts_with("error:") { + if line.contains("aborting due to") || line.contains("could not compile") { + continue; + } + if in_error && !current_error.is_empty() { + errors.push(current_error.join("\n")); + current_error.clear(); + } + error_count += 1; + in_error = true; + current_error.push(line.to_string()); + } else if line.starts_with("warning:") + && line.contains("generated") + && line.contains("warning") + { + continue; // Skip summary warning lines + } else if line.starts_with("warning:") || line.starts_with("warning[") { + if in_error && !current_error.is_empty() { + errors.push(current_error.join("\n")); + current_error.clear(); + } + warnings += 1; + in_error = true; + current_error.push(line.to_string()); + } else if in_error { + if is_decorative_diagnostic_line(line) { + continue; + } else if line.trim().is_empty() && current_error.len() > 3 { + errors.push(current_error.join("\n")); + current_error.clear(); + in_error = false; + } else { + current_error.push(line.to_string()); + } + } + } + + if !current_error.is_empty() { + errors.push(current_error.join("\n")); + } + + if error_count == 0 && warnings == 0 { + return format!("ok ({} crates compiled)\n", compiled); + } + + let mut result = format!( + "cargo build: {} errors, {} warnings ({} crates)\n", + error_count, warnings, compiled + ); + + for err in errors.iter().take(15) { + result.push_str(err); + result.push('\n'); + result.push('\n'); + } + + if errors.len() > 15 { + result.push_str(&format!("... +{} more issues\n", errors.len() - 15)); + } + + result +} + +/// Filter cargo test output: show only failures + summary. +fn filter_cargo_test(output: &str) -> String { + let mut failures: Vec = Vec::new(); + let mut summary_lines: Vec = Vec::new(); + let mut in_failure_section = false; + let mut current_failure: Vec = Vec::new(); + + for line in output.lines() { + if is_noise_line(line) { + continue; + } + + // Skip "running N tests" and individual "test ... ok" lines + if line.starts_with("running ") || (line.starts_with("test ") && line.ends_with("... ok")) { + continue; + } + + if line == "failures:" { + in_failure_section = true; + continue; + } + + if in_failure_section { + if line.starts_with("test result:") { + in_failure_section = false; + summary_lines.push(line.to_string()); + } else if line.starts_with(" ") || line.starts_with("---- ") { + current_failure.push(line.to_string()); + } else if line.trim().is_empty() && !current_failure.is_empty() { + failures.push(current_failure.join("\n")); + current_failure.clear(); + } else if !line.trim().is_empty() { + current_failure.push(line.to_string()); + } + } + + if !in_failure_section && line.starts_with("test result:") { + summary_lines.push(line.to_string()); + } + } + + if !current_failure.is_empty() { + failures.push(current_failure.join("\n")); + } + + if failures.is_empty() && !summary_lines.is_empty() { + let mut result = String::new(); + for line in &summary_lines { + result.push_str(&format!("ok {}\n", line)); + } + return result; + } + + let mut result = String::new(); + + if !failures.is_empty() { + result.push_str(&format!("FAILURES ({}):\n\n", failures.len())); + for (i, failure) in failures.iter().enumerate().take(10) { + let truncated = if failure.len() > 200 { + format!("{}...", &failure[..197]) + } else { + failure.clone() + }; + result.push_str(&format!("{}. {}\n\n", i + 1, truncated)); + } + if failures.len() > 10 { + result.push_str(&format!("... +{} more failures\n", failures.len() - 10)); + } + } + + for line in &summary_lines { + result.push_str(line); + result.push('\n'); + } + + if result.trim().is_empty() { + // Fallback: return last few meaningful lines + let meaningful: Vec<&str> = output + .lines() + .filter(|l| !l.trim().is_empty() && !is_noise_line(l)) + .collect(); + for line in meaningful.iter().rev().take(5).rev() { + result.push_str(line); + result.push('\n'); + } + } + + result +} + +/// Filter cargo clippy output: group warnings by lint rule. +fn filter_cargo_clippy(output: &str) -> String { + let mut by_rule: HashMap> = HashMap::new(); + let mut error_count = 0; + let mut warning_count = 0; + let mut current_rule = String::new(); + + for line in output.lines() { + if is_noise_line(line) { + continue; + } + + if (line.starts_with("warning:") || line.starts_with("warning[")) + || (line.starts_with("error:") || line.starts_with("error[")) + { + if line.contains("generated") && line.contains("warning") { + continue; + } + if line.contains("aborting due to") || line.contains("could not compile") { + continue; + } + + let is_error = line.starts_with("error"); + if is_error { + error_count += 1; + } else { + warning_count += 1; + } + + current_rule = if let Some(bracket_start) = line.rfind('[') { + if let Some(bracket_end) = line.rfind(']') { + line[bracket_start + 1..bracket_end].to_string() + } else { + line.to_string() + } + } else { + let prefix = if is_error { "error: " } else { "warning: " }; + line.strip_prefix(prefix).unwrap_or(line).to_string() + }; + } else if line.trim_start().starts_with("--> ") { + let location = line.trim_start().trim_start_matches("--> ").to_string(); + if !current_rule.is_empty() { + by_rule + .entry(current_rule.clone()) + .or_default() + .push(location); + } + } + } + + if error_count == 0 && warning_count == 0 { + return "ok clippy\n".to_string(); + } + + let mut result = format!( + "cargo clippy: {} errors, {} warnings\n\n", + error_count, warning_count + ); + + let mut rule_counts: Vec<_> = by_rule.iter().collect(); + rule_counts.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + + for (rule, locations) in rule_counts.iter().take(15) { + result.push_str(&format!(" {} ({}x)\n", rule, locations.len())); + for loc in locations.iter().take(3) { + result.push_str(&format!(" {}\n", loc)); + } + if locations.len() > 3 { + result.push_str(&format!(" ... +{} more\n", locations.len() - 3)); + } + } + + if by_rule.len() > 15 { + result.push_str(&format!("\n... +{} more rules\n", by_rule.len() - 15)); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_cargo_subcommand() { + assert_eq!(parse_cargo_subcommand("cargo build"), "build"); + assert_eq!(parse_cargo_subcommand("cargo test --release"), "test"); + assert_eq!(parse_cargo_subcommand("cargo clippy -- -W"), "clippy"); + assert_eq!(parse_cargo_subcommand("cargo"), ""); + } + + #[test] + fn test_build_success() { + let output = " Compiling libc v0.2.153\n Compiling myapp v0.1.0\n Finished dev [unoptimized + debuginfo] target(s) in 5.2s\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo build", output).unwrap(); + assert!(result.contains("ok")); + assert!(result.contains("3 crates compiled")); + assert!(!result.contains("Compiling")); + } + + #[test] + fn test_build_errors() { + let output = " Compiling myapp v0.1.0\nerror[E0308]: mismatched types\n --> src/main.rs:10:5\n |\n10| \"hello\"\n | ^^^^^^^ expected `i32`, found `&str`\n\nerror: aborting due to 1 previous error\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo build", output).unwrap(); + assert!(result.contains("1 errors")); + assert!(result.contains("E0308")); + assert!(!result.contains("Compiling")); + assert!(!result.contains("aborting")); + } + + #[test] + fn test_test_all_pass() { + let output = " Compiling myapp v0.1.0\n Finished test target(s) in 2.5s\nrunning 15 tests\ntest foo::test_a ... ok\ntest foo::test_b ... ok\n\ntest result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo test", output).unwrap(); + assert!(result.contains("ok test result:")); + assert!(result.contains("15 passed")); + assert!(!result.contains("Compiling")); + assert!(!result.contains("test foo::test_a")); + } + + #[test] + fn test_test_failures() { + let output = "running 2 tests\ntest foo::test_a ... ok\ntest foo::test_b ... FAILED\n\nfailures:\n\n---- foo::test_b stdout ----\nthread 'foo::test_b' panicked at 'assert_eq!(1, 2)'\n\nfailures:\n foo::test_b\n\ntest result: FAILED. 1 passed; 1 failed; 0 ignored\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo test", output).unwrap(); + assert!(result.contains("FAILURES")); + assert!(result.contains("test_b")); + } + + #[test] + fn test_clippy_clean() { + let output = " Checking myapp v0.1.0\n Finished dev target(s) in 1.5s\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo clippy", output).unwrap(); + assert!(result.contains("ok clippy")); + } + + #[test] + fn test_clippy_warnings() { + let output = " Checking myapp v0.1.0\nwarning: unused variable: `x` [unused_variables]\n --> src/main.rs:10:9\n\nwarning: `myapp` (bin) generated 1 warning\n Finished dev target(s) in 1.5s\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo clippy", output).unwrap(); + assert!(result.contains("0 errors, 1 warnings")); + assert!(result.contains("unused_variables")); + } + + #[test] + fn test_unknown_subcommand_returns_none() { + let compressor = CargoCompressor; + assert!(compressor.compress("cargo run", "Hello world\n").is_none()); + } + + #[test] + fn test_check_uses_build_filter() { + let output = " Checking myapp v0.1.0\n Finished dev target(s)\n"; + let compressor = CargoCompressor; + let result = compressor.compress("cargo check", output).unwrap(); + assert!(result.contains("ok")); + } +} diff --git a/crates/compressor/src/strategy/bash/curl.rs b/crates/compressor/src/strategy/bash/curl.rs new file mode 100644 index 0000000..58f3555 --- /dev/null +++ b/crates/compressor/src/strategy/bash/curl.rs @@ -0,0 +1,224 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `curl` command output. +//! +//! Auto-detects JSON responses and shows schema (types instead of values). +//! Truncates long non-JSON output to a reasonable size. + +use super::BashCompressor; + +const MAX_LINES: usize = 30; +const MAX_LINE_LEN: usize = 200; + +pub struct CurlCompressor; + +impl BashCompressor for CurlCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + let trimmed = output.trim(); + if trimmed.is_empty() { + return None; + } + + // Only compress if there's enough output to benefit + if trimmed.lines().count() < 10 && trimmed.len() < 500 { + return None; + } + + Some(filter_curl_output(trimmed)) + } +} + +fn filter_curl_output(output: &str) -> String { + // Try JSON detection: starts with { or [ + if (output.starts_with('{') || output.starts_with('[')) + && (output.ends_with('}') || output.ends_with(']')) + && let Some(schema) = json_schema(output) + { + return schema; + } + + // Not JSON: truncate long output + let lines: Vec<&str> = output.lines().collect(); + if lines.len() > MAX_LINES { + let mut result: Vec<&str> = lines[..MAX_LINES].to_vec(); + result.push(""); + return format!( + "{}\n... ({} more lines, {} bytes total)", + result.join("\n"), + lines.len() - MAX_LINES, + output.len() + ); + } + + // Short output: truncate long lines + lines + .iter() + .map(|l| truncate(l, MAX_LINE_LEN)) + .collect::>() + .join("\n") +} + +/// Produce a JSON schema representation showing types instead of values. +fn json_schema(input: &str) -> Option { + let value: serde_json::Value = serde_json::from_str(input).ok()?; + let mut out = String::new(); + format_value(&value, &mut out, 0, 4); + Some(out) +} + +fn format_value(value: &serde_json::Value, out: &mut String, depth: usize, max_depth: usize) { + let indent = " ".repeat(depth); + + match value { + serde_json::Value::Null => out.push_str("null"), + serde_json::Value::Bool(_) => out.push_str("bool"), + serde_json::Value::Number(n) => { + if n.is_f64() { + out.push_str("float"); + } else { + out.push_str("int"); + } + } + serde_json::Value::String(s) => { + if s.len() > 50 { + out.push_str(&format!("string({})", s.len())); + } else { + out.push_str("string"); + } + } + serde_json::Value::Array(arr) => { + if arr.is_empty() { + out.push_str("[]"); + } else if depth >= max_depth { + out.push_str(&format!("[...{}]", arr.len())); + } else { + out.push_str(&format!("[{}] [\n", arr.len())); + // Show schema of first element only + out.push_str(&format!("{} ", indent)); + format_value(&arr[0], out, depth + 1, max_depth); + out.push('\n'); + out.push_str(&format!("{}]", indent)); + } + } + serde_json::Value::Object(map) => { + if map.is_empty() { + out.push_str("{}"); + } else if depth >= max_depth { + out.push_str(&format!("{{...{}}}", map.len())); + } else { + out.push_str("{\n"); + for (i, (key, val)) in map.iter().enumerate() { + if i >= 20 { + out.push_str(&format!("{} ... +{} more keys\n", indent, map.len() - 20)); + break; + } + out.push_str(&format!("{} \"{}\": ", indent, key)); + format_value(val, out, depth + 1, max_depth); + out.push('\n'); + } + out.push_str(&format!("{}}}", indent)); + } + } + } +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..s.floor_char_boundary(max.saturating_sub(3))]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_curl_json() { + let output = r#"{"name": "test", "count": 42, "items": [1, 2, 3]}"#; + // Short JSON — won't compress (< 10 lines, < 500 bytes) + let compressor = CurlCompressor; + assert!( + compressor + .compress("curl https://api.example.com", output) + .is_none() + ); + } + + #[test] + fn test_filter_curl_large_json() { + let output = r#"{"name": "test", "count": 42, "items": [1, 2, 3], "description": "a very long description that makes this output larger than 500 bytes so the compressor kicks in and actually does something useful for us in this test case here we go adding more text to make it larger and larger", "extra1": "value1", "extra2": "value2", "extra3": "value3", "extra4": "value4", "extra5": "value5", "extra6": "value6", "extra7": "value7", "extra8": "value8"}"#; + let result = filter_curl_output(output); + assert!(result.contains("string")); + assert!(result.contains("int")); + } + + #[test] + fn test_filter_curl_json_array() { + let output = r#"[{"id": 1, "name": "a"}, {"id": 2, "name": "b"}, {"id": 3, "name": "c"}]"#; + let result = filter_curl_output(output); + assert!(result.contains("id")); + assert!(result.contains("int")); + } + + #[test] + fn test_filter_curl_long_output() { + let lines: Vec = (0..50).map(|i| format!("Line {}", i)).collect(); + let output = lines.join("\n"); + let result = filter_curl_output(&output); + assert!(result.contains("Line 0")); + assert!(result.contains("Line 29")); + assert!(result.contains("more lines")); + } + + #[test] + fn test_json_schema_basic() { + let json = r#"{"name": "test", "count": 42, "active": true}"#; + let result = json_schema(json).unwrap(); + assert!(result.contains("\"name\": string")); + assert!(result.contains("\"count\": int")); + assert!(result.contains("\"active\": bool")); + } + + #[test] + fn test_json_schema_nested() { + let json = r#"{"user": {"name": "alice", "age": 30}}"#; + let result = json_schema(json).unwrap(); + assert!(result.contains("\"user\":")); + assert!(result.contains("\"name\": string")); + } + + #[test] + fn test_json_schema_array() { + let json = r#"[{"id": 1}, {"id": 2}]"#; + let result = json_schema(json).unwrap(); + assert!(result.contains("[2]")); + assert!(result.contains("\"id\": int")); + } + + #[test] + fn test_compressor_skips_short_output() { + let compressor = CurlCompressor; + assert!( + compressor + .compress("curl https://api.example.com", "OK") + .is_none() + ); + } +} diff --git a/crates/compressor/src/strategy/bash/diff.rs b/crates/compressor/src/strategy/bash/diff.rs new file mode 100644 index 0000000..47c2605 --- /dev/null +++ b/crates/compressor/src/strategy/bash/diff.rs @@ -0,0 +1,232 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `diff` / `git diff` unified diff output. +//! +//! Condenses unified diff format into per-file summaries with only +//! the changed lines, stripping context lines and headers. + +use super::BashCompressor; + +const MAX_CHANGES_PER_FILE: usize = 15; + +pub struct DiffCompressor; + +impl BashCompressor for DiffCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + let result = condense_unified_diff(output); + if result.is_empty() { + return None; + } + + Some(result) + } +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..s.floor_char_boundary(max.saturating_sub(3))]) + } +} + +fn condense_unified_diff(diff: &str) -> String { + let mut files: Vec = Vec::new(); + let mut current_file = String::new(); + let mut added = 0; + let mut removed = 0; + let mut changes: Vec = Vec::new(); + + for line in diff.lines() { + if line.starts_with("diff --git") || line.starts_with("--- ") || line.starts_with("+++ ") { + if line.starts_with("+++ ") { + // Flush previous file + if !current_file.is_empty() && (added > 0 || removed > 0) { + files.push(FileDiff { + path: current_file.clone(), + added, + removed, + changes: std::mem::take(&mut changes), + }); + } + current_file = line + .trim_start_matches("+++ ") + .trim_start_matches("b/") + .to_string(); + added = 0; + removed = 0; + changes.clear(); + } + } else if line.starts_with("index ") || line.starts_with("@@") { + continue; + } else if line.starts_with('+') && !line.starts_with("+++") { + added += 1; + if changes.len() < MAX_CHANGES_PER_FILE { + changes.push(truncate(line, 80)); + } + } else if line.starts_with('-') && !line.starts_with("---") { + removed += 1; + if changes.len() < MAX_CHANGES_PER_FILE { + changes.push(truncate(line, 80)); + } + } + } + + // Flush last file + if !current_file.is_empty() && (added > 0 || removed > 0) { + files.push(FileDiff { + path: current_file, + added, + removed, + changes, + }); + } + + if files.is_empty() { + return String::new(); + } + + let total_added: usize = files.iter().map(|f| f.added).sum(); + let total_removed: usize = files.iter().map(|f| f.removed).sum(); + + let mut out = format!("{}F +{} -{}\n\n", files.len(), total_added, total_removed); + + for f in &files { + out.push_str(&format!("{} (+{} -{})\n", f.path, f.added, f.removed)); + for c in f.changes.iter().take(10) { + out.push_str(&format!(" {}\n", c)); + } + if f.changes.len() > 10 { + out.push_str(&format!(" ... +{} more\n", f.changes.len() - 10)); + } + out.push('\n'); + } + + out +} + +struct FileDiff { + path: String, + added: usize, + removed: usize, + changes: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_condense_single_file() { + let diff = r#"diff --git a/src/main.rs b/src/main.rs +--- a/src/main.rs ++++ b/src/main.rs +@@ -1,3 +1,4 @@ + fn main() { ++ println!("hello"); + println!("world"); + } +"#; + let compressor = DiffCompressor; + let result = compressor.compress("git diff", diff).unwrap(); + assert!(result.contains("src/main.rs")); + assert!(result.contains("+1 -0")); + assert!(result.contains("println")); + } + + #[test] + fn test_condense_multiple_files() { + let diff = r#"diff --git a/a.rs b/a.rs +--- a/a.rs ++++ b/a.rs +@@ -1 +1,2 @@ + existing ++added line +diff --git a/b.rs b/b.rs +--- a/b.rs ++++ b/b.rs +@@ -1,2 +1 @@ +-removed line + kept +"#; + let compressor = DiffCompressor; + let result = compressor.compress("git diff", diff).unwrap(); + assert!(result.contains("2F")); + assert!(result.contains("a.rs")); + assert!(result.contains("b.rs")); + } + + #[test] + fn test_condense_empty() { + let compressor = DiffCompressor; + assert!(compressor.compress("git diff", "").is_none()); + } + + #[test] + fn test_condense_no_changes() { + let diff = "diff --git a/file.rs b/file.rs\n--- a/file.rs\n+++ b/file.rs\n"; + let compressor = DiffCompressor; + assert!(compressor.compress("diff", diff).is_none()); + } + + #[test] + fn test_condense_summary_counts() { + let diff = r#"diff --git a/file.rs b/file.rs +--- a/file.rs ++++ b/file.rs +@@ -1,3 +1,5 @@ ++line1 ++line2 +-old1 + unchanged +"#; + let result = condense_unified_diff(diff); + assert!(result.contains("+2 -1")); + assert!(result.contains("1F +2 -1")); + } + + #[test] + fn test_truncate_long_lines() { + assert_eq!(truncate("hello", 10), "hello"); + assert_eq!(truncate("hello world!", 8), "hello..."); + } + + #[test] + fn test_context_lines_stripped() { + let diff = r#"diff --git a/f.rs b/f.rs +--- a/f.rs ++++ b/f.rs +@@ -1,5 +1,5 @@ + context1 + context2 +-old ++new + context3 + context4 +"#; + let result = condense_unified_diff(diff); + // Should only contain changed lines, not context + assert!(!result.contains("context1")); + assert!(result.contains("-old")); + assert!(result.contains("+new")); + } +} diff --git a/crates/compressor/src/strategy/bash/docker.rs b/crates/compressor/src/strategy/bash/docker.rs new file mode 100644 index 0000000..c24b4b2 --- /dev/null +++ b/crates/compressor/src/strategy/bash/docker.rs @@ -0,0 +1,303 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `docker` command output. +//! +//! Compacts `docker ps` and `docker images` tabular output into +//! a dense, token-efficient format. + +use super::BashCompressor; + +pub struct DockerCompressor; + +impl BashCompressor for DockerCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + let subcommand = parse_docker_subcommand(command); + match subcommand { + "ps" => Some(compact_docker_ps(output)), + "images" => Some(compact_docker_images(output)), + _ => None, + } + } +} + +fn parse_docker_subcommand(command: &str) -> &str { + for arg in command.split_whitespace().skip(1) { + if arg.starts_with('-') { + continue; + } + return arg; + } + "" +} + +/// Compact `docker ps` tabular output. +/// +/// Input is the default table format with headers: +/// CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +fn compact_docker_ps(output: &str) -> String { + let lines: Vec<&str> = output.lines().collect(); + if lines.is_empty() { + return "0 containers\n".to_string(); + } + + // Find column positions from the header line + let header = lines[0]; + let data_lines: Vec<&str> = lines[1..] + .iter() + .filter(|l| !l.trim().is_empty()) + .copied() + .collect(); + + if data_lines.is_empty() { + return "0 containers\n".to_string(); + } + + let col_positions = parse_header_columns(header); + let mut out = format!("{} containers:\n", data_lines.len()); + + for line in data_lines.iter().take(20) { + let cols = extract_columns(line, &col_positions); + let id = cols + .get("CONTAINER ID") + .map(|s| if s.len() > 12 { &s[..12] } else { s.as_str() }) + .unwrap_or(""); + let name = cols.get("NAMES").unwrap_or(&String::new()).clone(); + let image = cols.get("IMAGE").unwrap_or(&String::new()).clone(); + let status = cols.get("STATUS").unwrap_or(&String::new()).clone(); + let ports = cols.get("PORTS").unwrap_or(&String::new()).clone(); + + let short_image = image.split('/').next_back().unwrap_or(&image); + let compact_ports = compact_port_string(&ports); + + if compact_ports.is_empty() || compact_ports == "-" { + out.push_str(&format!(" {} {} ({}) {}\n", id, name, short_image, status)); + } else { + out.push_str(&format!( + " {} {} ({}) {} [{}]\n", + id, name, short_image, status, compact_ports + )); + } + } + + if data_lines.len() > 20 { + out.push_str(&format!(" ... +{} more\n", data_lines.len() - 20)); + } + + out +} + +/// Compact `docker images` tabular output. +/// +/// Input headers: REPOSITORY TAG IMAGE ID CREATED SIZE +fn compact_docker_images(output: &str) -> String { + let lines: Vec<&str> = output.lines().collect(); + if lines.is_empty() { + return "0 images\n".to_string(); + } + + let header = lines[0]; + let data_lines: Vec<&str> = lines[1..] + .iter() + .filter(|l| !l.trim().is_empty()) + .copied() + .collect(); + + if data_lines.is_empty() { + return "0 images\n".to_string(); + } + + let col_positions = parse_header_columns(header); + let mut out = format!("{} images:\n", data_lines.len()); + + for line in data_lines.iter().take(20) { + let cols = extract_columns(line, &col_positions); + let repo = cols.get("REPOSITORY").unwrap_or(&String::new()).clone(); + let tag = cols.get("TAG").unwrap_or(&String::new()).clone(); + let size = cols.get("SIZE").unwrap_or(&String::new()).clone(); + + let image_name = if tag == "" || tag.is_empty() { + repo.clone() + } else { + format!("{}:{}", repo, tag) + }; + + let short = if image_name.len() > 45 { + format!("...{}", &image_name[image_name.len() - 42..]) + } else { + image_name + }; + + out.push_str(&format!(" {} [{}]\n", short, size)); + } + + if data_lines.len() > 20 { + out.push_str(&format!(" ... +{} more\n", data_lines.len() - 20)); + } + + out +} + +/// Parse column header positions from a docker table header line. +/// Returns vec of (column_name, start_position). +fn parse_header_columns(header: &str) -> Vec<(String, usize)> { + let mut cols = Vec::new(); + let mut i = 0; + let chars: Vec = header.chars().collect(); + + while i < chars.len() { + // Skip whitespace + if chars[i].is_whitespace() { + i += 1; + continue; + } + + let start = i; + // Read column name (may contain spaces like "CONTAINER ID" or "IMAGE ID") + while i < chars.len() + && !(i > start + && chars[i].is_whitespace() + && i + 1 < chars.len() + && chars[i + 1].is_whitespace()) + { + // Check for double-space which separates columns + i += 1; + } + + let name = header[start..i].trim().to_string(); + if !name.is_empty() { + cols.push((name, start)); + } + // Skip to next non-space + while i < chars.len() && chars[i].is_whitespace() { + i += 1; + } + } + + cols +} + +/// Extract column values from a data line using header positions. +fn extract_columns( + line: &str, + col_positions: &[(String, usize)], +) -> std::collections::HashMap { + let mut map = std::collections::HashMap::new(); + + for (idx, (name, start)) in col_positions.iter().enumerate() { + let end = if idx + 1 < col_positions.len() { + col_positions[idx + 1].1 + } else { + line.len() + }; + + let start = (*start).min(line.len()); + let end = end.min(line.len()); + + if start <= end { + let value = line.get(start..end).unwrap_or("").trim().to_string(); + map.insert(name.clone(), value); + } + } + + map +} + +fn compact_port_string(ports: &str) -> String { + if ports.is_empty() { + return "-".to_string(); + } + + let port_nums: Vec<&str> = ports + .split(',') + .filter_map(|p| p.split("->").next().and_then(|s| s.split(':').next_back())) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .collect(); + + if port_nums.is_empty() { + return "-".to_string(); + } + + if port_nums.len() <= 3 { + port_nums.join(", ") + } else { + format!( + "{}, ... +{}", + port_nums[..2].join(", "), + port_nums.len() - 2 + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_docker_ps_basic() { + let input = "CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES\nabc123def456 nginx:latest \"nginx -g…\" 2 hours ago Up 2 hours 80/tcp web\n"; + let compressor = DockerCompressor; + let result = compressor.compress("docker ps", input).unwrap(); + assert!(result.contains("1 containers:")); + assert!(result.contains("web")); + assert!(result.contains("nginx")); + } + + #[test] + fn test_docker_ps_empty() { + let input = "CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES\n"; + let compressor = DockerCompressor; + let result = compressor.compress("docker ps", input).unwrap(); + assert!(result.contains("0 containers")); + } + + #[test] + fn test_docker_images_basic() { + let input = "REPOSITORY TAG IMAGE ID CREATED SIZE\nnginx latest abc123def456 2 weeks ago 187MB\nredis 7.0 def456abc789 3 weeks ago 130MB\n"; + let compressor = DockerCompressor; + let result = compressor.compress("docker images", input).unwrap(); + assert!(result.contains("2 images:")); + assert!(result.contains("nginx:latest")); + assert!(result.contains("redis:7.0")); + } + + #[test] + fn test_docker_images_empty() { + let input = "REPOSITORY TAG IMAGE ID CREATED SIZE\n"; + let compressor = DockerCompressor; + let result = compressor.compress("docker images", input).unwrap(); + assert!(result.contains("0 images")); + } + + #[test] + fn test_unknown_subcommand() { + let compressor = DockerCompressor; + assert!( + compressor + .compress("docker exec -it foo bash", "root@abc:/# ") + .is_none() + ); + } + + #[test] + fn test_compact_ports() { + assert_eq!(compact_port_string(""), "-"); + assert_eq!(compact_port_string("80/tcp"), "80/tcp"); + assert_eq!(compact_port_string("0.0.0.0:8080->80/tcp"), "8080"); + } +} diff --git a/crates/compressor/src/strategy/bash/env.rs b/crates/compressor/src/strategy/bash/env.rs new file mode 100644 index 0000000..fe3e655 --- /dev/null +++ b/crates/compressor/src/strategy/bash/env.rs @@ -0,0 +1,244 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `env` / `printenv` command output. +//! +//! Categorizes environment variables, masks sensitive values, +//! and truncates long values to reduce token usage. + +use super::BashCompressor; + +pub struct EnvCompressor; + +impl BashCompressor for EnvCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return Some("0 vars\n".to_string()); + } + + Some(compact_env(output)) + } +} + +const SENSITIVE_PATTERNS: &[&str] = &[ + "key", + "secret", + "password", + "token", + "credential", + "auth", + "private", + "api_key", + "apikey", + "access_key", + "jwt", +]; + +fn compact_env(output: &str) -> String { + let mut path_vars: Vec<(String, String)> = Vec::new(); + let mut lang_vars: Vec<(String, String)> = Vec::new(); + let mut tool_vars: Vec<(String, String)> = Vec::new(); + let mut other_vars: Vec<(String, String)> = Vec::new(); + let mut total = 0; + + for line in output.lines() { + if line.trim().is_empty() { + continue; + } + + let (key, value) = match line.split_once('=') { + Some((k, v)) => (k, v), + None => continue, + }; + + total += 1; + + let is_sensitive = SENSITIVE_PATTERNS + .iter() + .any(|p| key.to_lowercase().contains(p)); + + let display_value = if is_sensitive { + mask_value(value) + } else if value.len() > 100 { + format!("{}... ({} chars)", &value[..50], value.len()) + } else { + value.to_string() + }; + + let entry = (key.to_string(), display_value); + + if key.contains("PATH") { + path_vars.push(entry); + } else if is_lang_var(key) { + lang_vars.push(entry); + } else if is_tool_var(key) { + tool_vars.push(entry); + } else { + other_vars.push(entry); + } + } + + let mut out = String::new(); + + if !path_vars.is_empty() { + out.push_str("PATH Variables:\n"); + for (k, v) in &path_vars { + if k == "PATH" { + let paths: Vec<&str> = v.split(':').collect(); + out.push_str(&format!(" PATH ({} entries):\n", paths.len())); + for p in paths.iter().take(5) { + out.push_str(&format!(" {}\n", p)); + } + if paths.len() > 5 { + out.push_str(&format!(" ... +{} more\n", paths.len() - 5)); + } + } else { + out.push_str(&format!(" {}={}\n", k, v)); + } + } + } + + if !lang_vars.is_empty() { + out.push_str("\nLanguage/Runtime:\n"); + for (k, v) in &lang_vars { + out.push_str(&format!(" {}={}\n", k, v)); + } + } + + if !tool_vars.is_empty() { + out.push_str("\nTools:\n"); + for (k, v) in &tool_vars { + out.push_str(&format!(" {}={}\n", k, v)); + } + } + + if !other_vars.is_empty() { + out.push_str("\nOther:\n"); + for (k, v) in other_vars.iter().take(20) { + out.push_str(&format!(" {}={}\n", k, v)); + } + if other_vars.len() > 20 { + out.push_str(&format!(" ... +{} more\n", other_vars.len() - 20)); + } + } + + let shown = path_vars.len() + lang_vars.len() + tool_vars.len() + other_vars.len().min(20); + out.push_str(&format!("\n{} vars ({} shown)\n", total, shown)); + + out +} + +fn mask_value(value: &str) -> String { + if value.len() <= 4 { + "****".to_string() + } else { + format!("{}****{}", &value[..2], &value[value.len() - 2..]) + } +} + +fn is_lang_var(key: &str) -> bool { + const PATTERNS: &[&str] = &[ + "RUST", "CARGO", "PYTHON", "PIP", "NODE", "NPM", "YARN", "DENO", "BUN", "JAVA", "MAVEN", + "GRADLE", "GO", "GOPATH", "GOROOT", "RUBY", "GEM", "PERL", "PHP", "DOTNET", "NUGET", + ]; + let upper = key.to_uppercase(); + PATTERNS.iter().any(|p| upper.contains(p)) +} + +fn is_tool_var(key: &str) -> bool { + const PATTERNS: &[&str] = &[ + "EDITOR", + "VISUAL", + "SHELL", + "TERM", + "GIT", + "SSH", + "GPG", + "BREW", + "HOMEBREW", + "XDG", + "CLAUDE", + "ANTHROPIC", + ]; + let upper = key.to_uppercase(); + PATTERNS.iter().any(|p| upper.contains(p)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compact_basic() { + let input = + "HOME=/home/user\nUSER=testuser\nSHELL=/bin/bash\nPATH=/usr/bin:/usr/local/bin\n"; + let compressor = EnvCompressor; + let result = compressor.compress("env", input).unwrap(); + assert!(result.contains("PATH")); + assert!(result.contains("SHELL")); + assert!(result.contains("4 vars")); + } + + #[test] + fn test_compact_empty() { + let compressor = EnvCompressor; + let result = compressor.compress("env", "").unwrap(); + assert_eq!(result, "0 vars\n"); + } + + #[test] + fn test_masks_sensitive() { + let input = "API_KEY=super_secret_value\nHOME=/home/user\n"; + let compressor = EnvCompressor; + let result = compressor.compress("env", input).unwrap(); + assert!(!result.contains("super_secret_value")); + assert!(result.contains("****")); + } + + #[test] + fn test_mask_value() { + assert_eq!(mask_value("ab"), "****"); + assert_eq!(mask_value("abcdef"), "ab****ef"); + } + + #[test] + fn test_truncates_long_values() { + let long_val = "x".repeat(200); + let input = format!("SOME_VAR={}\n", long_val); + let result = compact_env(&input); + assert!(result.contains("200 chars")); + } + + #[test] + fn test_categorizes_vars() { + let input = "RUST_LOG=debug\nEDITOR=vim\nHOME=/home/user\nPATH=/usr/bin\n"; + let result = compact_env(input); + assert!(result.contains("Language/Runtime:")); + assert!(result.contains("RUST_LOG")); + assert!(result.contains("Tools:")); + assert!(result.contains("EDITOR")); + assert!(result.contains("PATH Variables:")); + } + + #[test] + fn test_path_split() { + let input = "PATH=/usr/bin:/usr/local/bin:/home/user/bin:/opt/bin:/sbin:/usr/sbin:/extra\n"; + let result = compact_env(input); + assert!(result.contains("7 entries")); + assert!(result.contains("+2 more")); + } +} diff --git a/crates/compressor/src/strategy/bash/eslint.rs b/crates/compressor/src/strategy/bash/eslint.rs new file mode 100644 index 0000000..a647f88 --- /dev/null +++ b/crates/compressor/src/strategy/bash/eslint.rs @@ -0,0 +1,326 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `eslint` command output. +//! +//! Groups ESLint issues by rule and file, providing a compact summary. +//! Handles both the default formatter and JSON output. + +use std::collections::HashMap; + +use super::BashCompressor; + +pub struct EslintCompressor; + +impl BashCompressor for EslintCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + // Try JSON format first (if user ran eslint -f json) + if output.trim().starts_with('[') { + return filter_eslint_json(output); + } + + // Default text format + filter_eslint_text(output) + } +} + +/// Filter ESLint JSON output — group by rule and file. +fn filter_eslint_json(output: &str) -> Option { + let results: Vec = match serde_json::from_str(output) { + Ok(r) => r, + Err(_) => return None, + }; + + let total_errors: usize = results.iter().map(|r| r.error_count).sum(); + let total_warnings: usize = results.iter().map(|r| r.warning_count).sum(); + let total_files = results.iter().filter(|r| !r.messages.is_empty()).count(); + + if total_errors == 0 && total_warnings == 0 { + return Some("ESLint: No issues found".to_string()); + } + + // Group by rule + let mut by_rule: HashMap = HashMap::new(); + for result in &results { + for msg in &result.messages { + let rule = msg.rule_id.as_deref().unwrap_or("unknown"); + *by_rule.entry(rule.to_string()).or_insert(0) += 1; + } + } + + let mut out = format!( + "ESLint: {} errors, {} warnings in {} files\n\n", + total_errors, total_warnings, total_files + ); + + // Top rules + let mut rule_counts: Vec<_> = by_rule.iter().collect(); + rule_counts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + + if !rule_counts.is_empty() { + for (rule, count) in rule_counts.iter().take(10) { + out.push_str(&format!(" {} ({}x)\n", rule, count)); + } + if rule_counts.len() > 10 { + out.push_str(&format!(" ... +{} more rules\n", rule_counts.len() - 10)); + } + } + + Some(out.trim().to_string()) +} + +/// Filter ESLint default text output — group issues by file with counts. +fn filter_eslint_text(output: &str) -> Option { + let mut files: Vec = Vec::new(); + let mut current_file = String::new(); + let mut current_issues: Vec = Vec::new(); + let mut total_errors = 0usize; + let mut total_warnings = 0usize; + let mut by_rule: HashMap = HashMap::new(); + + for line in output.lines() { + let trimmed = line.trim(); + + // Skip empty lines and summary lines + if trimmed.is_empty() { + continue; + } + + // Summary line at the end + if trimmed.starts_with('\u{2716}') || trimmed.starts_with("✖") { + continue; + } + + // File path line (not indented, contains / or \) + if !line.starts_with(' ') + && !line.starts_with('\t') + && (trimmed.contains('/') || trimmed.contains('\\')) + && !trimmed.contains(" ") + { + // Flush previous file + if !current_file.is_empty() && !current_issues.is_empty() { + files.push(FileIssues { + path: current_file.clone(), + issues: std::mem::take(&mut current_issues), + }); + } + current_file = trimmed.to_string(); + continue; + } + + // Issue line: " line:col severity message rule" + if (line.starts_with(' ') || line.starts_with('\t')) && !current_file.is_empty() { + let parts: Vec<&str> = trimmed.split_whitespace().collect(); + if parts.len() >= 3 { + let severity = if parts.contains(&"error") { + total_errors += 1; + "error" + } else if parts.contains(&"warning") { + total_warnings += 1; + "warning" + } else { + continue; + }; + + // Last part is usually the rule name + let rule = parts.last().unwrap_or(&""); + *by_rule.entry(rule.to_string()).or_insert(0) += 1; + + current_issues.push(Issue { + severity: severity.to_string(), + _rule: rule.to_string(), + _location: parts.first().unwrap_or(&"").to_string(), + }); + } + } + } + + // Flush last file + if !current_file.is_empty() && !current_issues.is_empty() { + files.push(FileIssues { + path: current_file, + issues: current_issues, + }); + } + + if files.is_empty() { + return None; + } + + let mut result = format!( + "ESLint: {} errors, {} warnings in {} files\n\n", + total_errors, + total_warnings, + files.len() + ); + + // Top rules + let mut rule_counts: Vec<_> = by_rule.iter().collect(); + rule_counts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + + if !rule_counts.is_empty() { + result.push_str("Top rules:\n"); + for (rule, count) in rule_counts.iter().take(10) { + result.push_str(&format!(" {} ({}x)\n", rule, count)); + } + result.push('\n'); + } + + // Top files + let mut sorted_files = files; + sorted_files.sort_by(|a, b| b.issues.len().cmp(&a.issues.len())); + + result.push_str("Files:\n"); + for file in sorted_files.iter().take(15) { + let short_path = compact_path(&file.path); + let errors = file.issues.iter().filter(|i| i.severity == "error").count(); + let warnings = file + .issues + .iter() + .filter(|i| i.severity == "warning") + .count(); + result.push_str(&format!( + " {} ({} errors, {} warnings)\n", + short_path, errors, warnings + )); + } + + if sorted_files.len() > 15 { + result.push_str(&format!(" ... +{} more files\n", sorted_files.len() - 15)); + } + + Some(result.trim().to_string()) +} + +fn compact_path(path: &str) -> String { + let path = path.replace('\\', "/"); + if let Some(pos) = path.rfind("/src/") { + format!("src/{}", &path[pos + 5..]) + } else if let Some(pos) = path.rfind("/lib/") { + format!("lib/{}", &path[pos + 5..]) + } else if let Some(pos) = path.rfind('/') { + path[pos + 1..].to_string() + } else { + path + } +} + +struct FileIssues { + path: String, + issues: Vec, +} + +struct Issue { + severity: String, + _rule: String, + _location: String, +} + +#[derive(serde::Deserialize)] +struct EslintJsonResult { + #[serde(rename = "filePath")] + _file_path: String, + messages: Vec, + #[serde(rename = "errorCount")] + error_count: usize, + #[serde(rename = "warningCount")] + warning_count: usize, +} + +#[derive(serde::Deserialize)] +struct EslintJsonMessage { + #[serde(rename = "ruleId")] + rule_id: Option, + #[serde(rename = "severity")] + _severity: u8, + #[serde(rename = "message")] + _message: String, + #[serde(rename = "line")] + _line: usize, + #[serde(rename = "column")] + _column: usize, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_eslint_json() { + let json = r#"[ + { + "filePath": "/Users/test/project/src/utils.ts", + "messages": [ + {"ruleId": "prefer-const", "severity": 1, "message": "Use const", "line": 10, "column": 5}, + {"ruleId": "prefer-const", "severity": 1, "message": "Use const", "line": 15, "column": 5} + ], + "errorCount": 0, + "warningCount": 2 + }, + { + "filePath": "/Users/test/project/src/api.ts", + "messages": [ + {"ruleId": "@typescript-eslint/no-unused-vars", "severity": 2, "message": "Variable x is unused", "line": 20, "column": 10} + ], + "errorCount": 1, + "warningCount": 0 + } + ]"#; + + let compressor = EslintCompressor; + let result = compressor.compress("eslint -f json .", json).unwrap(); + assert!(result.contains("ESLint:")); + assert!(result.contains("prefer-const")); + assert!(result.contains("no-unused-vars")); + } + + #[test] + fn test_filter_eslint_text() { + let output = "/Users/test/src/utils.ts\n 10:5 warning Use const instead of let prefer-const\n 15:5 warning Use const instead of let prefer-const\n\n/Users/test/src/api.ts\n 20:10 error Variable x is unused @typescript-eslint/no-unused-vars\n\n✖ 3 problems (1 error, 2 warnings)\n"; + let compressor = EslintCompressor; + let result = compressor.compress("eslint .", output).unwrap(); + assert!(result.contains("ESLint: 1 errors, 2 warnings")); + assert!(result.contains("prefer-const")); + } + + #[test] + fn test_filter_eslint_json_no_issues() { + let json = + r#"[{"filePath": "/test.ts", "messages": [], "errorCount": 0, "warningCount": 0}]"#; + let result = filter_eslint_json(json).unwrap(); + assert!(result.contains("No issues found")); + } + + #[test] + fn test_compact_path() { + assert_eq!( + compact_path("/Users/foo/project/src/utils.ts"), + "src/utils.ts" + ); + assert_eq!(compact_path("simple.ts"), "simple.ts"); + } + + #[test] + fn test_compressor_returns_none_for_empty() { + let compressor = EslintCompressor; + assert!(compressor.compress("eslint .", "").is_none()); + } +} diff --git a/crates/compressor/src/strategy/bash/find.rs b/crates/compressor/src/strategy/bash/find.rs new file mode 100644 index 0000000..56d03ec --- /dev/null +++ b/crates/compressor/src/strategy/bash/find.rs @@ -0,0 +1,187 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `find` command output. +//! +//! Groups found paths by directory and adds an extension summary, +//! producing a compact listing instead of a flat file list. + +use std::collections::HashMap; +use std::path::Path; + +use super::BashCompressor; + +pub struct FindCompressor; + +impl BashCompressor for FindCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + let lines: Vec<&str> = output.lines().filter(|l| !l.trim().is_empty()).collect(); + if lines.is_empty() { + return Some("0 results\n".to_string()); + } + + Some(compact_find(&lines)) + } +} + +fn compact_find(paths: &[&str]) -> String { + let mut by_dir: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut by_ext: HashMap = HashMap::new(); + + for path in paths { + let p = Path::new(path); + let dir = p.parent().map(|d| d.to_str().unwrap_or(".")).unwrap_or("."); + let dir = if dir.is_empty() { "." } else { dir }; + let filename = p + .file_name() + .map(|f| f.to_str().unwrap_or("")) + .unwrap_or(""); + + by_dir.entry(dir).or_default().push(filename); + + let ext = p + .extension() + .map(|e| format!(".{}", e.to_str().unwrap_or(""))) + .unwrap_or_else(|| "no ext".to_string()); + *by_ext.entry(ext).or_default() += 1; + } + + let mut dirs: Vec<_> = by_dir.keys().copied().collect(); + dirs.sort(); + + let total = paths.len(); + let mut out = format!("{}F {}D:\n\n", total, dirs.len()); + + let mut shown = 0; + let max_results = 50; + + for dir in &dirs { + if shown >= max_results { + break; + } + + let files_in_dir = &by_dir[dir]; + let dir_display = compact_path(dir); + + let remaining = max_results - shown; + if files_in_dir.len() <= remaining { + out.push_str(&format!("{}/ {}\n", dir_display, files_in_dir.join(" "))); + shown += files_in_dir.len(); + } else { + let partial: Vec<&str> = files_in_dir.iter().take(remaining).copied().collect(); + out.push_str(&format!("{}/ {}\n", dir_display, partial.join(" "))); + shown += partial.len(); + break; + } + } + + if shown < total { + out.push_str(&format!("+{} more\n", total - shown)); + } + + // Extension summary + if by_ext.len() > 1 { + let mut exts: Vec<_> = by_ext.iter().collect(); + exts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let ext_parts: Vec = exts + .iter() + .take(5) + .map(|(e, c)| format!("{}({})", e, c)) + .collect(); + out.push_str(&format!("\next: {}\n", ext_parts.join(" "))); + } + + out +} + +fn compact_path(path: &str) -> String { + if path.len() <= 50 { + return path.to_string(); + } + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() <= 3 { + return path.to_string(); + } + format!( + "{}/.../{}/{}", + parts[0], + parts[parts.len() - 2], + parts[parts.len() - 1] + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compact_basic() { + let input = "src/main.rs\nsrc/lib.rs\ntests/test.rs\n"; + let compressor = FindCompressor; + let result = compressor.compress("find . -name '*.rs'", input).unwrap(); + assert!(result.contains("3F 2D:")); + assert!(result.contains("src/")); + assert!(result.contains("main.rs")); + assert!(result.contains("lib.rs")); + assert!(result.contains("tests/")); + assert!(result.contains("test.rs")); + } + + #[test] + fn test_compact_empty() { + let compressor = FindCompressor; + let result = compressor.compress("find . -name '*.xyz'", "").unwrap(); + assert_eq!(result, "0 results\n"); + } + + #[test] + fn test_compact_single_dir() { + let result = compact_find(&["main.rs", "lib.rs", "utils.rs"]); + assert!(result.contains("3F 1D:")); + assert!(result.contains("./ main.rs lib.rs utils.rs")); + } + + #[test] + fn test_compact_extension_summary() { + let input = vec!["src/main.rs", "src/lib.rs", "Cargo.toml", "README.md"]; + let result = compact_find(&input); + assert!(result.contains("ext:")); + assert!(result.contains(".rs(2)")); + } + + #[test] + fn test_compact_path_short() { + assert_eq!(compact_path("src/main.rs"), "src/main.rs"); + } + + #[test] + fn test_compact_path_long() { + let long = "very/long/deeply/nested/path/to/some/directory/here"; + let result = compact_path(long); + assert!(result.contains("...")); + assert!(result.len() <= long.len()); + } + + #[test] + fn test_compact_many_results() { + let paths: Vec = (0..100).map(|i| format!("src/file{}.rs", i)).collect(); + let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect(); + let result = compact_find(&path_refs); + assert!(result.contains("100F")); + assert!(result.contains("+50 more")); + } +} diff --git a/crates/compressor/src/strategy/bash/go.rs b/crates/compressor/src/strategy/bash/go.rs new file mode 100644 index 0000000..323224b --- /dev/null +++ b/crates/compressor/src/strategy/bash/go.rs @@ -0,0 +1,424 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `go` command output. +//! +//! Filters `go test`, `go build`, and `go vet` output to show +//! only failures, errors, and compact summaries. + +use std::collections::HashMap; + +use super::BashCompressor; + +pub struct GoCompressor; + +impl BashCompressor for GoCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + let subcommand = parse_go_subcommand(command); + match subcommand { + "test" => Some(filter_go_test(output)), + "build" => Some(filter_go_build(output)), + "vet" => Some(filter_go_vet(output)), + _ => None, + } + } +} + +fn parse_go_subcommand(command: &str) -> &str { + for arg in command.split_whitespace().skip(1) { + if arg.starts_with('-') { + continue; + } + return arg; + } + "" +} + +/// Filter go test text output — show failures + summary. +fn filter_go_test(output: &str) -> String { + let mut packages: HashMap = HashMap::new(); + let mut current_test: Option = None; + let mut current_output: Vec = Vec::new(); + let mut current_package = String::new(); + + for line in output.lines() { + let trimmed = line.trim(); + + // Package result lines: "ok package 0.123s" or "FAIL package 0.123s" + if trimmed.starts_with("ok ") + || trimmed.starts_with("FAIL\t") + || trimmed.starts_with("ok\t") + { + let parts: Vec<&str> = trimmed.split_whitespace().collect(); + if parts.len() >= 2 { + let status = parts[0]; + let package = parts[1].to_string(); + let pkg = packages.entry(package).or_default(); + if status == "FAIL" { + pkg.failed = true; + } + } + continue; + } + + // Test run line: "=== RUN TestFoo" + if trimmed.starts_with("=== RUN") { + // Flush previous test if any + flush_test( + &mut packages, + ¤t_package, + ¤t_test, + ¤t_output, + ); + current_test = trimmed + .strip_prefix("=== RUN") + .map(|s| s.trim().to_string()); + current_output.clear(); + continue; + } + + // Test result line: "--- PASS: TestFoo (0.00s)" or "--- FAIL: TestFoo (0.01s)" + if trimmed.starts_with("--- PASS:") { + if let Some(pkg_name) = extract_package_from_context(¤t_package) { + packages.entry(pkg_name).or_default().pass += 1; + } + current_test = None; + current_output.clear(); + continue; + } + + if trimmed.starts_with("--- FAIL:") { + let test_name = trimmed + .strip_prefix("--- FAIL:") + .and_then(|s| s.split('(').next()) + .map(|s| s.trim().to_string()) + .unwrap_or_default(); + + if let Some(pkg_name) = extract_package_from_context(¤t_package) { + let pkg = packages.entry(pkg_name).or_default(); + pkg.fail += 1; + pkg.failed_tests.push((test_name, current_output.clone())); + } + current_test = None; + current_output.clear(); + continue; + } + + // "--- SKIP:" lines + if trimmed.starts_with("--- SKIP:") { + if let Some(pkg_name) = extract_package_from_context(¤t_package) { + packages.entry(pkg_name).or_default().skip += 1; + } + current_test = None; + current_output.clear(); + continue; + } + + // Package header: "# package/path" + if let Some(stripped) = trimmed.strip_prefix('#') { + current_package = stripped.trim().to_string(); + continue; + } + + // Collect test output + if current_test.is_some() && !trimmed.is_empty() { + current_output.push(trimmed.to_string()); + } + + // Build errors (file:line:col format within test output) + if trimmed.contains(".go:") + && trimmed.contains(": ") + && !trimmed.starts_with("---") + && let Some(pkg_name) = extract_package_from_context(¤t_package) + { + let pkg = packages.entry(pkg_name).or_default(); + if !pkg.build_errors.contains(&trimmed.to_string()) { + pkg.build_errors.push(trimmed.to_string()); + } + } + } + + // Flush last test + flush_test( + &mut packages, + ¤t_package, + ¤t_test, + ¤t_output, + ); + + build_go_test_summary(&packages) +} + +fn flush_test( + packages: &mut HashMap, + current_package: &str, + current_test: &Option, + current_output: &[String], +) { + if current_test.is_some() + && !current_output.is_empty() + && let Some(pkg_name) = extract_package_from_context(current_package) + { + let _pkg = packages.entry(pkg_name).or_default(); + } +} + +fn extract_package_from_context(pkg: &str) -> Option { + if pkg.is_empty() { + Some("(default)".to_string()) + } else { + Some(pkg.to_string()) + } +} + +fn build_go_test_summary(packages: &HashMap) -> String { + let total_pass: usize = packages.values().map(|p| p.pass).sum(); + let total_fail: usize = packages.values().map(|p| p.fail).sum(); + let total_skip: usize = packages.values().map(|p| p.skip).sum(); + let build_failures: usize = packages + .values() + .filter(|p| !p.build_errors.is_empty()) + .count(); + + let has_failures = total_fail > 0 || build_failures > 0; + + if !has_failures && total_pass == 0 { + return "Go test: No tests found".to_string(); + } + + if !has_failures { + return format!( + "Go test: {} passed in {} packages", + total_pass, + packages.len() + ); + } + + let mut result = format!("Go test: {} passed, {} failed", total_pass, total_fail); + if total_skip > 0 { + result.push_str(&format!(", {} skipped", total_skip)); + } + result.push_str(&format!(" in {} packages\n", packages.len())); + + // Show build errors first + for (package, pkg_result) in packages.iter() { + if pkg_result.build_errors.is_empty() { + continue; + } + result.push_str(&format!( + "\n{} [build errors]\n", + compact_package_name(package) + )); + for err in pkg_result.build_errors.iter().take(10) { + result.push_str(&format!(" {}\n", truncate(err, 120))); + } + } + + // Show failed tests + for (package, pkg_result) in packages.iter() { + if pkg_result.fail == 0 { + continue; + } + result.push_str(&format!( + "\n{} ({} passed, {} failed)\n", + compact_package_name(package), + pkg_result.pass, + pkg_result.fail + )); + + for (test, outputs) in &pkg_result.failed_tests { + result.push_str(&format!(" FAIL {}\n", test)); + + let relevant: Vec<&String> = outputs + .iter() + .filter(|line| { + let lower = line.to_lowercase(); + !line.trim().is_empty() + && (lower.contains("error") + || lower.contains("expected") + || lower.contains("got") + || lower.contains("panic")) + }) + .take(5) + .collect(); + + for line in relevant { + result.push_str(&format!(" {}\n", truncate(line, 100))); + } + } + } + + result.trim().to_string() +} + +/// Filter go build output — show only errors. +fn filter_go_build(output: &str) -> String { + let mut errors: Vec = Vec::new(); + + for line in output.lines() { + let trimmed = line.trim(); + let lower = trimmed.to_lowercase(); + + if trimmed.starts_with('#') && !lower.contains("error") { + continue; + } + + if !trimmed.is_empty() + && (lower.contains("error") + || trimmed.contains(".go:") + || lower.contains("undefined") + || lower.contains("cannot")) + { + errors.push(trimmed.to_string()); + } + } + + if errors.is_empty() { + return "Go build: ok".to_string(); + } + + let mut result = format!("Go build: {} errors\n", errors.len()); + for (i, error) in errors.iter().take(20).enumerate() { + result.push_str(&format!("{}. {}\n", i + 1, truncate(error, 120))); + } + if errors.len() > 20 { + result.push_str(&format!("\n... +{} more errors\n", errors.len() - 20)); + } + + result.trim().to_string() +} + +/// Filter go vet output — show issues. +fn filter_go_vet(output: &str) -> String { + let mut issues: Vec = Vec::new(); + + for line in output.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() && !trimmed.starts_with('#') && trimmed.contains(".go:") { + issues.push(trimmed.to_string()); + } + } + + if issues.is_empty() { + return "Go vet: ok".to_string(); + } + + let mut result = format!("Go vet: {} issues\n", issues.len()); + for (i, issue) in issues.iter().take(20).enumerate() { + result.push_str(&format!("{}. {}\n", i + 1, truncate(issue, 120))); + } + if issues.len() > 20 { + result.push_str(&format!("\n... +{} more issues\n", issues.len() - 20)); + } + + result.trim().to_string() +} + +fn compact_package_name(package: &str) -> String { + if let Some(pos) = package.rfind('/') { + package[pos + 1..].to_string() + } else { + package.to_string() + } +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..s.floor_char_boundary(max.saturating_sub(3))]) + } +} + +#[derive(Default)] +struct PackageResult { + pass: usize, + fail: usize, + skip: usize, + failed: bool, + build_errors: Vec, + failed_tests: Vec<(String, Vec)>, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_go_test_all_pass() { + let output = "=== RUN TestFoo\n--- PASS: TestFoo (0.00s)\n=== RUN TestBar\n--- PASS: TestBar (0.00s)\nok\texample.com/pkg\t0.005s\n"; + let compressor = GoCompressor; + let result = compressor.compress("go test ./...", output).unwrap(); + assert!(result.contains("2 passed")); + assert!(!result.contains("failed")); + } + + #[test] + fn test_go_test_with_failures() { + let output = "=== RUN TestFoo\n--- PASS: TestFoo (0.00s)\n=== RUN TestBar\n bar_test.go:10: expected 5, got 3\n--- FAIL: TestBar (0.01s)\nFAIL\texample.com/pkg\t0.015s\n"; + let compressor = GoCompressor; + let result = compressor.compress("go test ./...", output).unwrap(); + assert!(result.contains("1 passed, 1 failed")); + assert!(result.contains("TestBar")); + } + + #[test] + fn test_go_build_success() { + let result = filter_go_build(""); + assert!(result.contains("ok")); + } + + #[test] + fn test_go_build_errors() { + let output = "# example.com/foo\nmain.go:10:5: undefined: missingFunc\nmain.go:15:2: cannot use x (type int) as type string\n"; + let compressor = GoCompressor; + let result = compressor.compress("go build ./...", output).unwrap(); + assert!(result.contains("2 errors")); + assert!(result.contains("undefined: missingFunc")); + } + + #[test] + fn test_go_vet_no_issues() { + let result = filter_go_vet(""); + assert!(result.contains("ok")); + } + + #[test] + fn test_go_vet_with_issues() { + let output = "main.go:42:2: Printf format %d has arg x of wrong type string\nutils.go:15:5: unreachable code\n"; + let compressor = GoCompressor; + let result = compressor.compress("go vet ./...", output).unwrap(); + assert!(result.contains("2 issues")); + assert!(result.contains("Printf format")); + } + + #[test] + fn test_unknown_subcommand() { + let compressor = GoCompressor; + assert!(compressor.compress("go run .", "Hello world\n").is_none()); + } + + #[test] + fn test_compact_package_name() { + assert_eq!(compact_package_name("github.com/user/repo/pkg"), "pkg"); + assert_eq!(compact_package_name("simple"), "simple"); + } +} diff --git a/crates/compressor/src/strategy/bash/grep.rs b/crates/compressor/src/strategy/bash/grep.rs new file mode 100644 index 0000000..e8f8542 --- /dev/null +++ b/crates/compressor/src/strategy/bash/grep.rs @@ -0,0 +1,1768 @@ +//! Compressor for `grep` / `rg` command output. +//! +//! Groups matches by file, strips leading whitespace, and truncates +//! long lines to produce a compact search result listing. +//! +//! Handles context flags (-A, -B, -C) where grep uses `file-content` for +//! context lines and `file:content` for match lines. Filenames are discovered +//! from match lines (`:` is unambiguous) then used to parse context lines. + +use std::collections::{BTreeMap, HashSet}; + +use super::BashCompressor; + +const MAX_LINE_LEN: usize = 120; +const MAX_MATCHES_PER_FILE: usize = 10; +const MAX_CONTEXT_PER_MATCH: usize = 5; + +pub struct GrepCompressor; + +impl BashCompressor for GrepCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + let info = parse_grep_command(command); + compact_grep(output, &info) + } +} + +/// Information extracted from a grep command. +#[derive(Debug)] +struct GrepCommandInfo { + /// Single-file target, or None for recursive/multi-file searches. + single_file: Option, + /// When `-r` is used with a single positional target, store it here. + /// We'll check at output-processing time whether grep actually prefixed + /// filenames (directory target) or not (file target). + recursive_single_target: Option, + /// Whether -n / --line-number was present. + has_line_numbers: bool, + /// Max context lines requested via -A/-B/-C (0 = no context flags). + context_lines: usize, +} + +/// Tokenize a shell command respecting single and double quotes. +/// +/// Strips quote characters from the output; a backslash outside single quotes +/// escapes the next character. This is a best-effort approximation of POSIX +/// shell word-splitting — enough to correctly count positional arguments even +/// when the grep pattern contains spaces (e.g. `'"foo bar"'`). +fn shell_tokenize(s: &str) -> Vec { + let mut tokens: Vec = Vec::new(); + let mut current = String::new(); + let mut in_single = false; + let mut in_double = false; + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + '\\' if !in_single => { + if let Some(next) = chars.next() { + current.push(next); + } + } + ' ' | '\t' if !in_single && !in_double => { + if !current.is_empty() { + tokens.push(std::mem::take(&mut current)); + } + } + _ => current.push(c), + } + } + if !current.is_empty() { + tokens.push(current); + } + tokens +} + +/// Split a command line on unquoted `|` characters (pipeline segments). +/// +/// Respects single quotes, double quotes, and backslash escapes so that a +/// `|` inside a quoted grep pattern (e.g. `grep "a\|b" file | head`) is not +/// treated as a pipe. +fn shell_split_pipes(s: &str) -> Vec { + let mut segments: Vec = Vec::new(); + let mut current = String::new(); + let mut in_single = false; + let mut in_double = false; + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '\'' if !in_double => { + in_single = !in_single; + current.push(c); + } + '"' if !in_single => { + in_double = !in_double; + current.push(c); + } + '\\' if !in_single => { + current.push(c); + if let Some(next) = chars.next() { + current.push(next); + } + } + '|' if !in_single && !in_double => { + segments.push(std::mem::take(&mut current)); + } + _ => current.push(c), + } + } + segments.push(current); + segments +} + +/// Parse a grep command, which may be part of a pipeline. +/// +/// Handles combined short flags (`-rnA30`), long flags (`--line-number`), +/// flags with inline values (`-A30`, `--after-context=30`), and +/// flags with separate values (`-A 30`). +fn parse_grep_command(command: &str) -> GrepCommandInfo { + // Find the grep segment in a pipeline by splitting on `|`, respecting quotes. + // A `|` inside single or double quotes (e.g. `grep "a\|b" file`) is NOT a pipe. + let segments = shell_split_pipes(command); + let grep_part = segments + .iter() + .find(|s| { + let t = s.trim(); + t == "grep" || t.starts_with("grep ") || t.starts_with("grep\t") + }) + .map(|s| s.as_str()) + .unwrap_or(command); + + let all_tokens = shell_tokenize(grep_part); + + // Skip the "grep" token itself. + let tokens: &[String] = if all_tokens + .first() + .map(|s| s == "grep" || s.ends_with("/grep")) + .unwrap_or(false) + { + &all_tokens[1..] + } else { + &all_tokens[..] + }; + + let mut has_line_numbers = false; + let mut is_recursive = false; + let mut after_context: usize = 0; + let mut before_context: usize = 0; + let mut positional: Vec<&str> = Vec::new(); + let mut after_dashdash = false; + let mut i = 0; + + /// Parse a decimal integer from a byte slice, returning 0 on failure. + fn parse_usize(s: &[u8]) -> usize { + s.iter() + .take_while(|b| b.is_ascii_digit()) + .fold(0usize, |acc, &b| acc * 10 + (b - b'0') as usize) + } + + while i < tokens.len() { + let tok: &str = &tokens[i]; + + if after_dashdash { + positional.push(tok); + i += 1; + continue; + } + + if tok == "--" { + after_dashdash = true; + i += 1; + continue; + } + + if tok.starts_with("--") { + // Long flag, possibly with an inline value: --after-context=30 + let (opt, inline_val) = match tok.find('=') { + Some(eq) => (&tok[..eq], Some(&tok[eq + 1..])), + None => (tok, None), + }; + match opt { + "--recursive" => is_recursive = true, + "--line-number" => has_line_numbers = true, + "--after-context" => { + let val = inline_val.unwrap_or_else(|| { + i += 1; + tokens.get(i).map(|s| s.as_str()).unwrap_or("0") + }); + after_context = after_context.max(val.parse().unwrap_or(0)); + } + "--before-context" => { + let val = inline_val.unwrap_or_else(|| { + i += 1; + tokens.get(i).map(|s| s.as_str()).unwrap_or("0") + }); + before_context = before_context.max(val.parse().unwrap_or(0)); + } + "--context" => { + let val = inline_val.unwrap_or_else(|| { + i += 1; + tokens.get(i).map(|s| s.as_str()).unwrap_or("0") + }); + let n: usize = val.parse().unwrap_or(0); + after_context = after_context.max(n); + before_context = before_context.max(n); + } + // Flags that consume the next token as their value. + "--max-count" | "--label" | "--include" | "--exclude" | "--exclude-dir" + | "--color" | "--colour" => { + if inline_val.is_none() { + i += 1; // skip value token + } + } + _ => {} + } + } else if tok.starts_with('-') && tok.len() > 1 { + // Short flag(s), possibly combined: -rnA30 + let bytes = &tok.as_bytes()[1..]; + let mut j = 0; + while j < bytes.len() { + match bytes[j] { + b'r' | b'R' => is_recursive = true, + b'n' => has_line_numbers = true, + b'A' => { + if j + 1 < bytes.len() { + after_context = after_context.max(parse_usize(&bytes[j + 1..])); + j = bytes.len() - 1; + } else if let Some(val) = tokens.get(i + 1) { + after_context = after_context.max(val.parse().unwrap_or(0)); + i += 1; + } + } + b'B' => { + if j + 1 < bytes.len() { + before_context = before_context.max(parse_usize(&bytes[j + 1..])); + j = bytes.len() - 1; + } else if let Some(val) = tokens.get(i + 1) { + before_context = before_context.max(val.parse().unwrap_or(0)); + i += 1; + } + } + b'C' => { + let n = if j + 1 < bytes.len() { + let v = parse_usize(&bytes[j + 1..]); + j = bytes.len() - 1; + v + } else if let Some(val) = tokens.get(i + 1) { + let v = val.parse().unwrap_or(0); + i += 1; + v + } else { + 0 + }; + after_context = after_context.max(n); + before_context = before_context.max(n); + } + // Flags that consume a value (inline or next token). + b'e' | b'f' | b'm' | b'D' | b'd' => { + if j + 1 < bytes.len() { + j = bytes.len() - 1; + } else { + i += 1; + } + } + _ => {} + } + j += 1; + } + } else { + positional.push(tok); + } + + i += 1; + } + + // positional[0] = pattern, positional[1..] = file/dir targets. + // Single-file mode: exactly one target and not recursive. + let (single_file, recursive_single_target) = if positional.len() == 2 { + if is_recursive { + // `-r` with a single target: might be a file (no prefix in output) + // or a directory (prefix in output). We'll check at output time. + (None, Some(positional[1].to_string())) + } else { + (Some(positional[1].to_string()), None) + } + } else { + (None, None) + }; + + GrepCommandInfo { + single_file, + recursive_single_target, + has_line_numbers, + context_lines: after_context.max(before_context), + } +} + +/// Prepend the filename to every output line when the grep output lacks a filename prefix. +/// +/// For `grep -n` (line numbers): match lines start with `N:content`, context lines with `N-content`. +/// We prepend `filename:` or `filename-` accordingly so the rest of the parser sees the normal +/// `file:linenum:content` / `file-linenum-content` format. +/// +/// For `grep` without `-n` (no line numbers): all lines are bare content, so we prepend `filename:` +/// to every line (treating them all as match lines). +fn prepend_filename_if_needed(output: &str, info: &GrepCommandInfo) -> String { + let filename = match &info.single_file { + Some(f) => f.as_str(), + None => return output.to_string(), + }; + + output + .lines() + .map(|l| { + if l.is_empty() || l.starts_with("--") { + return l.to_string(); + } + if info.has_line_numbers { + // Determine separator from the line itself: digits then `:` (match) or `-` (context). + let first_non_digit = l.bytes().position(|b| !b.is_ascii_digit()); + let starts_with_digit = l + .bytes() + .next() + .map(|b| b.is_ascii_digit()) + .unwrap_or(false); + if starts_with_digit && let Some(pos) = first_non_digit { + let sep = l.as_bytes()[pos]; + if sep == b'-' { + return format!("{}-{}", filename, l); + } + } + format!("{}:{}", filename, l) + } else { + // Bare content: no line numbers, all lines are match lines. + format!("{}:{}", filename, l) + } + }) + .collect::>() + .join("\n") +} + +/// A parsed grep output line. +struct GrepLine<'a> { + file: &'a str, + line_num: usize, + content: &'a str, + /// true for match lines (`:`), false for context lines (`-`) + #[allow(dead_code)] + is_match: bool, +} + +/// Parse a match line (`:` separated). Returns (file, line_num, content). +/// Handles both `file:linenum:content` and `file:content` formats. +fn parse_match_line(line: &str) -> Option> { + let parts: Vec<&str> = line.splitn(3, ':').collect(); + if parts.len() == 3 { + if let Ok(ln) = parts[1].trim().parse::() + && ln > 0 + { + return Some(GrepLine { + file: parts[0], + line_num: ln, + content: parts[2], + is_match: true, + }); + } + // parts[1] wasn't a line number — treat as "file:content" with rest joined + let content = &line[parts[0].len() + 1..]; + return Some(GrepLine { + file: parts[0], + line_num: 0, + content, + is_match: true, + }); + } + if parts.len() == 2 { + return Some(GrepLine { + file: parts[0], + line_num: 0, + content: parts[1], + is_match: true, + }); + } + None +} + +/// Parse a context line using known filenames. +/// Context lines use `-` as separator: `file-linenum-content` or `file-content`. +fn parse_context_line<'a>(line: &'a str, known_files: &HashSet<&'a str>) -> Option> { + for file in known_files { + let prefix = format!("{}-", file); + if let Some(rest) = line.strip_prefix(&prefix) { + // Try "linenum-content" + if let Some(dash_pos) = rest.find('-') { + let maybe_num = &rest[..dash_pos]; + if let Ok(ln) = maybe_num.parse::() + && ln > 0 + { + let content = &rest[dash_pos + 1..]; + return Some(GrepLine { + file, + line_num: ln, + content, + is_match: false, + }); + } + } + // No line number — just "content" + return Some(GrepLine { + file, + line_num: 0, + content: rest, + is_match: false, + }); + } + } + None +} + +/// Select which lines to display for a file, prioritizing match lines over context. +/// Always includes all match lines (up to `max`), fills remaining budget with context. +fn select_lines(matches: &[(usize, String, bool)], max: usize) -> Vec<(usize, String, bool)> { + if matches.len() <= max { + return matches.to_vec(); + } + + let match_lines: Vec<_> = matches + .iter() + .filter(|(_, _, is_match)| *is_match) + .collect(); + let context_lines: Vec<_> = matches + .iter() + .filter(|(_, _, is_match)| !*is_match) + .collect(); + + // If match lines alone exceed budget, just take first `max` match lines. + if match_lines.len() >= max { + return match_lines.into_iter().take(max).cloned().collect(); + } + + // Budget for context lines around matches. + let context_budget = max - match_lines.len(); + + // Build a set of indices we want to keep: all match indices + nearby context. + let match_indices: Vec = matches + .iter() + .enumerate() + .filter(|(_, (_, _, is_match))| *is_match) + .map(|(i, _)| i) + .collect(); + + // For each match, include surrounding context (prefer lines just before/after). + let mut keep = vec![false; matches.len()]; + for &idx in &match_indices { + keep[idx] = true; + } + + // Distribute context budget around matches, trying to center them. + // For each match, expand outward equally before and after to keep match centered. + let mut remaining = context_budget; + let per_match = if match_indices.is_empty() { + context_budget + } else { + (context_budget / match_indices.len()).max(1) + }; + + for &idx in &match_indices { + if remaining == 0 { + break; + } + let mut budget = per_match.min(remaining); + + // Expand symmetrically around the match (after first so odd budget lines + // land after the match, keeping it closer to the center of its context window). + let mut distance = 1; + while budget > 0 && (idx >= distance || idx + distance < matches.len()) { + // Try after first (so with an odd remaining budget, the extra line + // goes after the match rather than before it). + if idx + distance < matches.len() && budget > 0 { + let after_idx = idx + distance; + if !keep[after_idx] && !matches[after_idx].2 { + keep[after_idx] = true; + budget -= 1; + remaining -= 1; + } + } + // Try before + if idx >= distance && budget > 0 { + let before_idx = idx - distance; + if !keep[before_idx] && !matches[before_idx].2 { + keep[before_idx] = true; + budget -= 1; + remaining -= 1; + } + } + distance += 1; + } + } + + // If there's still budget, fill with remaining context lines in order. + if remaining > 0 { + for (i, _) in context_lines.iter().enumerate() { + if remaining == 0 { + break; + } + let orig_idx = matches + .iter() + .enumerate() + .filter(|(_, (_, _, is_match))| !*is_match) + .nth(i) + .map(|(idx, _)| idx) + .unwrap(); + if !keep[orig_idx] { + keep[orig_idx] = true; + remaining -= 1; + } + } + } + + matches + .iter() + .enumerate() + .filter(|(i, _)| keep[*i]) + .map(|(_, entry)| entry.clone()) + .collect() +} + +/// Split raw grep output into `--`-delimited blocks. +/// Lines not separated by `--` form a single block. +fn split_blocks(raw: &str) -> Vec> { + let mut blocks: Vec> = Vec::new(); + let mut current: Vec<&str> = Vec::new(); + for line in raw.lines() { + if line == "--" { + if !current.is_empty() { + blocks.push(current); + current = Vec::new(); + } + } else { + current.push(line); + } + } + if !current.is_empty() { + blocks.push(current); + } + blocks +} + +fn compact_grep(raw: &str, info: &GrepCommandInfo) -> Option { + // When `-r` was used with a single target, check whether the output lines + // actually start with that target path. If they don't, grep treated the + // target as a file (not a directory) and we should use single-file mode. + let promoted; + let info = if info.single_file.is_none() { + if let Some(ref target) = info.recursive_single_target { + let has_prefix = raw.lines().any(|l| l != "--" && l.starts_with(target)); + if !has_prefix { + promoted = GrepCommandInfo { + single_file: Some(target.clone()), + recursive_single_target: None, + has_line_numbers: info.has_line_numbers, + context_lines: info.context_lines, + }; + &promoted + } else { + info + } + } else { + info + } + } else { + info + }; + + // In single-file mode the output has no filename prefix; prepend it so + // the rest of the parser sees the standard `file:linenum:content` format. + let processed_output = if info.single_file.is_some() { + prepend_filename_if_needed(raw, info) + } else { + raw.to_string() + }; + + let blocks = split_blocks(&processed_output); + + // Process each block: find the filename from match lines (`:` separator), + // then use it to parse context lines (`-` separator) in the same block. + let mut by_file: BTreeMap<&str, Vec<(usize, String, bool)>> = BTreeMap::new(); + let mut total = 0; + + for block in blocks.iter() { + // Find the filename for this block from match lines. + // We pick the candidate file that appears as a prefix (`file:` or `file-`) + // on the most lines in the block. This avoids picking a bogus file from + // context lines that happen to contain `:` in their content. + let mut block_file: Option<&str> = None; + let mut best_count = 0; + for line in block { + if let Some(parsed) = parse_match_line(line) + && !parsed.file.is_empty() + { + let colon_prefix = format!("{}:", parsed.file); + let dash_prefix = format!("{}-", parsed.file); + let count = block + .iter() + .filter(|l| l.starts_with(&colon_prefix) || l.starts_with(&dash_prefix)) + .count(); + if count > best_count { + best_count = count; + block_file = Some(parsed.file); + } + } + } + let mut block_known: HashSet<&str> = HashSet::new(); + if let Some(f) = block_file { + block_known.insert(f); + } + + // Now parse all lines in this block. + for line in block { + // Try context line first using the block's known file. + if let Some(parsed) = parse_context_line(line, &block_known) { + total += 1; + let cleaned = truncate_line(parsed.content.trim(), MAX_LINE_LEN); + by_file + .entry(parsed.file) + .or_default() + .push((parsed.line_num, cleaned, false)); + continue; + } + + // Then try match line. + if let Some(parsed) = parse_match_line(line) { + total += 1; + let cleaned = truncate_line(parsed.content.trim(), MAX_LINE_LEN); + by_file + .entry(parsed.file) + .or_default() + .push((parsed.line_num, cleaned, true)); + continue; + } + } + } + + if total == 0 { + return None; + } + + if total < 10 { + return None; + } + + let mut out = format!("{} in {}F:\n\n", total, by_file.len()); + + for (file, matches) in &by_file { + let file_display = compact_path(file); + out.push_str(&format!("{} ({}):\n", file_display, matches.len())); + + // Always show match lines plus surrounding context. + // When the user requested context (-A/-B/-C), honour that many + // context lines per match so we don't throw away what they asked for. + let num_matches = matches.iter().filter(|(_, _, m)| *m).count().max(1); + let budget = if info.context_lines > 0 { + let ctx = info.context_lines.min(MAX_CONTEXT_PER_MATCH); + num_matches * (ctx * 2 + 1) + } else { + MAX_MATCHES_PER_FILE + }; + let selected = select_lines(matches, budget); + for (line_num, content, is_match) in &selected { + if *line_num > 0 { + let sep = if *is_match { ':' } else { '-' }; + out.push_str(&format!(" {:>4}{} {}\n", line_num, sep, content)); + } else { + out.push_str(&format!(" {}\n", content)); + } + } + + out.push('\n'); + } + + Some(out) +} + +fn truncate_line(line: &str, max_len: usize) -> String { + if line.len() <= max_len { + line.to_string() + } else { + let end = max_len.saturating_sub(3); + // Find a valid char boundary at or before `end` + let end = line.floor_char_boundary(end); + format!("{}...", &line[..end]) + } +} + +fn compact_path(path: &str) -> &str { + // Just return as-is for the compressor (no emoji, keep path readable) + path +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper: calls compact_grep with no single-file mode and no context flags. + fn compact_grep(input: &str) -> Option { + super::compact_grep( + input, + &GrepCommandInfo { + single_file: None, + recursive_single_target: None, + has_line_numbers: false, + context_lines: 0, + }, + ) + } + + fn single_file_info(file: &str, has_line_numbers: bool) -> GrepCommandInfo { + GrepCommandInfo { + single_file: Some(file.to_string()), + recursive_single_target: None, + has_line_numbers, + context_lines: 0, + } + } + + // ── shell_split_pipes ───────────────────────────────────────────── + + #[test] + fn test_shell_split_pipes_simple() { + let segs = shell_split_pipes("grep -rn foo | head -20"); + assert_eq!(segs.len(), 2); + assert!(segs[0].contains("grep")); + assert!(segs[1].contains("head")); + } + + #[test] + fn test_shell_split_pipes_quoted_pipe() { + // The \| inside double quotes must NOT be treated as a pipe + let segs = shell_split_pipes(r#"grep -A 30 "get_me\|/me" /some/file.json | head -50"#); + assert_eq!(segs.len(), 2); + assert!( + segs[0].contains("/some/file.json"), + "file path should stay in grep segment, got: {:?}", + segs[0] + ); + } + + #[test] + fn test_shell_split_pipes_single_quoted_pipe() { + let segs = shell_split_pipes("grep 'a|b' file.txt | wc -l"); + assert_eq!(segs.len(), 2); + assert!(segs[0].contains("file.txt")); + } + + // ── parse_grep_command ──────────────────────────────────────────── + + #[test] + fn test_parse_pattern_with_pipe_in_quotes() { + // grep -A 30 "get_me\|/me" /some/file.json | head -50 + // The \| is inside double quotes — should NOT split the command + let info = parse_grep_command(r#"grep -A 30 "get_me\|/me" /some/file.json | head -50"#); + assert_eq!( + info.single_file.as_deref(), + Some("/some/file.json"), + "should detect single file despite pipe in pattern" + ); + } + + #[test] + fn test_parse_quoted_pattern_with_spaces() { + // grep -rnC 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json + // The pattern contains spaces and is wrapped in single quotes. + // split_whitespace() would incorrectly split it into multiple tokens, + // making positional.len() > 2 and preventing single-file detection. + let info = parse_grep_command( + r#"grep -rnC 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json"#, + ); + assert!(info.has_line_numbers, "should detect -n"); + // -r makes single_file None, but the target is stored in recursive_single_target + // so we can check the output at processing time. + assert!(info.single_file.is_none()); + assert_eq!( + info.recursive_single_target.as_deref(), + Some("edgee-cli/openapi/openapi.json"), + ); + } + + #[test] + fn test_parse_quoted_pattern_no_recursive() { + // grep -nC 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json + // Without -r, with exactly one file target, should detect single-file mode. + let info = parse_grep_command( + r#"grep -nC 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json"#, + ); + assert!(info.has_line_numbers, "should detect -n"); + assert_eq!( + info.single_file.as_deref(), + Some("edgee-cli/openapi/openapi.json"), + "should detect single file despite quoted pattern with spaces" + ); + } + + // ── helpers ────────────────────────────────────────────────────── + + #[test] + fn test_truncate_short() { + assert_eq!(truncate_line("hello", 10), "hello"); + } + + #[test] + fn test_truncate_long() { + let long = "a".repeat(200); + let result = truncate_line(&long, 120); + assert!(result.ends_with("...")); + assert!(result.len() <= 120); + } + + // ── thresholds ────────────────────────────────────────────────── + + #[test] + fn test_empty_input() { + let compressor = GrepCompressor; + assert!(compressor.compress("grep -rn 'xyz' .", "").is_none()); + } + + #[test] + fn test_whitespace_only() { + let compressor = GrepCompressor; + assert!( + compressor + .compress("grep -rn 'xyz' .", " \n \n") + .is_none() + ); + } + + #[test] + fn test_below_threshold_not_compressed() { + // 3 matches is below the 10-line threshold + let input = "src/main.rs:10:fn main() {\nsrc/main.rs:15: println!(\"hello\");\nsrc/lib.rs:1:pub mod utils;\n"; + assert!(compact_grep(input).is_none()); + } + + #[test] + fn test_limits_matches_per_file() { + let mut input = String::new(); + for i in 1..=20 { + input.push_str(&format!("src/main.rs:{}:line {}\n", i, i)); + } + let result = compact_grep(&input).unwrap(); + assert!(result.contains("src/main.rs (20):")); + // Shows first 10 matches, truncates remaining 10 (no +10 indicator anymore) + assert!(result.contains("line 1")); + assert!(result.contains("line 10")); + } + + #[test] + fn test_strips_leading_whitespace() { + let mut lines = Vec::new(); + for i in 1..=15 { + lines.push(format!("src/main.rs:{}: fn main() {{", i * 10)); + } + let input = lines.join("\n"); + let result = compact_grep(&input).unwrap(); + assert!(result.contains("fn main()")); + } + + // ── 1. grep -rn (recursive + line numbers) ───────────────────── + + #[test] + fn test_grep_rn() { + // Format: file:linenum:content + let mut lines = Vec::new(); + for i in 1..=15 { + lines.push(format!("src/main.rs:{}:fn function_{}() {{}}", i, i)); + } + let input = lines.join("\n"); + let result = compact_grep(&input).unwrap(); + assert!(result.contains("15 in 1F:")); + assert!(result.contains("src/main.rs (15):")); + assert!(result.contains(" 1: fn function_1()")); + } + + // ── 2. grep -r (recursive, no line numbers) ──────────────────── + + #[test] + fn test_grep_r() { + // Format: file:content + let mut lines = Vec::new(); + for i in 0..15 { + lines.push(format!("src/file{}.rs:fn something() {{}}", i)); + } + let input = lines.join("\n"); + let result = compact_grep(&input).unwrap(); + assert!(result.contains("15 in 15F:")); + // No line numbers in output + assert!(!result.contains(" 0:")); + } + + // ── 3. grep -rnA/B/C (recursive + line numbers + context) ────── + + #[test] + fn test_grep_rn_context() { + // Format: match file:linenum:content, context file-linenum-content, separator -- + let input = "\ +src/main.rs-9-// before +src/main.rs:10:fn main() { +src/main.rs-11- let x = 1; +-- +src/main.rs-19-// before2 +src/main.rs:20:fn other() { +src/main.rs-21- let y = 2; +-- +src/lib.rs-4-use std::io; +src/lib.rs:5:pub fn init() { +src/lib.rs-6- println!(\"init\"); +-- +src/lib.rs-14-use std::fs; +src/lib.rs:15:pub fn load() { +src/lib.rs-16- println!(\"load\"); +-- +src/util.rs-1-// header +src/util.rs:2:fn helper() { +src/util.rs-3- todo!() +-- +src/util.rs-9-// other +src/util.rs:10:fn helper2() { +src/util.rs-11- todo!() +"; + let result = compact_grep(input).unwrap(); + assert!(result.contains("in 3F:")); + // Match lines show linenum + ':' + assert!(result.contains(" 10: fn main()")); + // Context lines show linenum + '-' + assert!(result.contains(" 9- // before")); + assert!(result.contains(" 11- let x = 1;")); + // -- separators are stripped + assert!(!result.contains("--")); + } + + #[test] + fn test_grep_rn_context_dashes_in_filename() { + // Dashed filenames like edgee-cli/... with -rnA + let input = "\ +edgee-cli/openapi/openapi.json:2426: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get my User object\", +edgee-cli/openapi/openapi.json-2428- \"description\": \"Returns the current user\", +-- +edgee-cli/openapi/openapi.json:2500: \"operationId\": \"listOrgs\", +edgee-cli/openapi/openapi.json-2501- \"summary\": \"List organizations\", +edgee-cli/openapi/openapi.json-2502- \"description\": \"Returns all orgs\", +-- +my-app/src/main.rs:10:fn hello() { +my-app/src/main.rs-11- println!(\"hi\"); +my-app/src/main.rs-12-} +-- +my-app/src/main.rs:20:fn world() { +my-app/src/main.rs-21- println!(\"world\"); +"; + let result = compact_grep(input).unwrap(); + assert!(result.contains("in 2F:")); + assert!(result.contains("edgee-cli/openapi/openapi.json")); + assert!(result.contains("2427- \"summary\": \"Get my User object\",")); + assert!(result.contains("my-app/src/main.rs")); + // No bogus filenames with mangled dash prefixes + assert!(!result.contains("json-")); + } + + // ── 4. grep -rA/B/C (recursive, no line numbers, context) ────── + + #[test] + fn test_grep_r_context() { + // Format: match file:content, context file-content, separator -- + let input = "\ +src/main.rs:fn main() { +src/main.rs- let x = 1; +src/main.rs- let y = 2; +-- +src/main.rs:fn other() { +src/main.rs- let z = 3; +-- +src/lib.rs:pub fn init() { +src/lib.rs- println!(\"init\"); +-- +src/lib.rs:pub fn load() { +src/lib.rs- println!(\"load\"); +-- +src/util.rs:fn helper() { +src/util.rs- todo!() +"; + let result = compact_grep(input).unwrap(); + assert!(result.contains("in 3F:")); + // No line numbers anywhere + assert!(!result.contains(" 0")); + // Content preserved + assert!(result.contains("fn main()")); + assert!(result.contains("let x = 1;")); + } + + #[test] + fn test_grep_r_context_dashes_in_filename() { + // The hardest case: dashed filenames + no line numbers + context with `:` in content + let input = "\ +edgee-cli/openapi/openapi.json: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json- \"summary\": \"Get my User object\", +edgee-cli/openapi/openapi.json- \"description\": \"Returns the current user\", +-- +edgee-cli/openapi/openapi.json: \"operationId\": \"listOrgs\", +edgee-cli/openapi/openapi.json- \"summary\": \"List organizations\", +edgee-cli/openapi/openapi.json- \"description\": \"Returns all orgs\", +-- +my-app/src/my-module.rs:fn hello() { +my-app/src/my-module.rs- println!(\"hi\"); +my-app/src/my-module.rs-} +-- +my-app/src/my-module.rs:fn world() { +my-app/src/my-module.rs- println!(\"world\"); +"; + let result = compact_grep(input).unwrap(); + assert!(result.contains("in 2F:")); + assert!(result.contains("edgee-cli/openapi/openapi.json")); + assert!(result.contains("my-app/src/my-module.rs")); + assert!(result.contains("\"summary\": \"Get my User object\",")); + // No mangled filenames + assert!(!result.contains("json-")); + } + + // ── 5. grep -n (single file, line numbers) ───────────────────── + + #[test] + fn test_grep_n_single_file() { + // Format: linenum:content (no filename prefix) + // Each "linenum:content" parses as a separate "file" since there's no real filename. + // This is expected — single-file grep -n output is inherently ambiguous. + let mut lines = Vec::new(); + for i in 1..=12 { + lines.push(format!("{}:fn function_{}() {{}}", i, i)); + } + let input = lines.join("\n"); + let result = compact_grep(&input).unwrap(); + assert!(result.contains("12 in 12F:")); + assert!(result.contains("fn function_1()")); + } + + // ── 6. grep (single file, bare — no flags) ───────────────────── + + #[test] + fn test_grep_bare_single_file() { + // Format: just content lines, no prefix at all + // These lines have no `:` so they'll be unparseable — nothing to compress + let mut lines = Vec::new(); + for i in 0..15 { + lines.push(format!("fn function_{}() {{}}", i)); + } + let input = lines.join("\n"); + // No `:` in any line, can't parse file or line number + assert!(compact_grep(&input).is_none()); + } + + // ── 7. grep -nA/B/C (single file, line numbers + context) ────── + + #[test] + fn test_grep_n_context_single_file() { + // Format: match linenum:content, context linenum-content, separator -- + // Without a filename prefix, match lines look like "10:content" + // and context lines look like "9-content" or "11-content". + // Each match line parses as a separate "file" since line numbers look like filenames. + // Context lines won't match known "files" (which are "10", "20", etc.) so they're lost. + // This is inherently ambiguous — single-file grep -nA can't be reliably parsed. + let input = "\ +9-// before +10:fn main() { +11- let x = 1; +-- +19-// before2 +20:fn other() { +21- let y = 2; +-- +29-// before3 +30:fn third() { +31- let z = 3; +-- +39-// before4 +40:fn fourth() { +41- let w = 4; +"; + let result = compact_grep(input); + // Only 4 match lines parse (context lines are lost) — below threshold + assert!(result.is_none()); + } + + // ── 8. grep -A/B/C (single file, no line numbers, context) ───── + + #[test] + fn test_grep_context_single_file_no_linenums() { + // Format: just content lines with -- separators, no prefix at all + // Lines with `:` in content may parse, lines without won't + let input = "\ +\"operationId\": \"getMe\", +\"summary\": \"Get my User object\", +\"description\": \"Returns the current user\", +-- +\"operationId\": \"listOrgs\", +\"summary\": \"List organizations\", +\"description\": \"Returns all orgs\", +-- +\"operationId\": \"createOrg\", +\"summary\": \"Create organization\", +\"description\": \"Creates a new org\", +-- +\"operationId\": \"deleteOrg\", +\"summary\": \"Delete organization\", +"; + // Lines with `:` will parse as file:content with weird "files" + // This is a best-effort case — single-file grep without -n or -r + // produces output we can't reliably distinguish from random text + let result = compact_grep(input); + // May or may not compress depending on how many lines parse + // The important thing is it doesn't panic + if let Some(r) = result { + assert!(!r.is_empty()); + } + } + + // ── 9. grep -l (filenames only) ───────────────────────────────── + + #[test] + fn test_grep_l_filenames_only() { + // Format: one filename per line, no `:` or `-` separators + let input = "\ +src/main.rs +src/lib.rs +src/util.rs +src/config.rs +src/auth.rs +src/db.rs +src/api.rs +src/routes.rs +src/models.rs +src/helpers.rs +src/tests.rs +"; + // No `:` in any line — can't parse + assert!(compact_grep(input).is_none()); + } + + // ── 10. grep -c / grep -rc (count mode) ──────────────────────── + + #[test] + fn test_grep_rc_count() { + // Format: file:count + let mut lines = Vec::new(); + for i in 0..15 { + lines.push(format!("src/file{}.rs:42", i)); + } + let input = lines.join("\n"); + // These parse as file:content with content="42" + let result = compact_grep(&input).unwrap(); + assert!(result.contains("15 in 15F:")); + } + + #[test] + fn test_grep_c_single_file_count() { + // Format: just a number + let input = "42\n"; + assert!(compact_grep(input).is_none()); + } + + // ── unit tests for parse helpers ──────────────────────────────── + + #[test] + fn test_parse_match_line_with_linenum() { + let parsed = parse_match_line("src/main.rs:10:fn main() {").unwrap(); + assert_eq!(parsed.file, "src/main.rs"); + assert_eq!(parsed.line_num, 10); + assert_eq!(parsed.content, "fn main() {"); + assert!(parsed.is_match); + } + + #[test] + fn test_parse_match_line_without_linenum() { + let parsed = parse_match_line("src/main.rs:fn main() {").unwrap(); + assert_eq!(parsed.file, "src/main.rs"); + assert_eq!(parsed.line_num, 0); + assert_eq!(parsed.content, "fn main() {"); + assert!(parsed.is_match); + } + + #[test] + fn test_parse_match_line_content_with_colons() { + // JSON content has colons — should keep full content after file:linenum: + let parsed = + parse_match_line("openapi.json:10: \"operationId\": \"getMe\",").unwrap(); + assert_eq!(parsed.file, "openapi.json"); + assert_eq!(parsed.line_num, 10); + assert_eq!(parsed.content, " \"operationId\": \"getMe\","); + } + + #[test] + fn test_parse_context_with_known_file() { + let mut known = HashSet::new(); + known.insert("edgee-cli/openapi/openapi.json"); + let parsed = parse_context_line( + "edgee-cli/openapi/openapi.json- \"summary\": \"Get a User\",", + &known, + ) + .unwrap(); + assert_eq!(parsed.file, "edgee-cli/openapi/openapi.json"); + assert_eq!(parsed.line_num, 0); + assert_eq!(parsed.content, " \"summary\": \"Get a User\","); + assert!(!parsed.is_match); + } + + #[test] + fn test_parse_context_with_linenum_and_known_file() { + let mut known = HashSet::new(); + known.insert("edgee-cli/openapi/openapi.json"); + let parsed = parse_context_line( + "edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get a User\",", + &known, + ) + .unwrap(); + assert_eq!(parsed.file, "edgee-cli/openapi/openapi.json"); + assert_eq!(parsed.line_num, 2427); + assert_eq!(parsed.content, " \"summary\": \"Get a User\","); + assert!(!parsed.is_match); + } + + #[test] + fn test_parse_context_unknown_file_returns_none() { + let known = HashSet::new(); + assert!(parse_context_line("src/main.rs-10-content", &known).is_none()); + } + + #[test] + fn test_block_separator_skipped() { + // -- lines should not appear in output + let mut lines = Vec::new(); + for i in 1..=5 { + lines.push(format!("src/a.rs:{i}:match {i}\nsrc/a.rs-{}-ctx", i + 100)); + } + let input = lines.join("\n--\n"); + let result = compact_grep(&input).unwrap(); + assert!(!result.contains("--")); + } + + // ── match lines always kept when truncating ──────────────────── + + #[test] + fn test_grep_r_b_match_not_lost() { + // grep -rB 30: match is the LAST line in the block, preceded by 30 context lines. + // The match must still appear in output even when context exceeds MAX_MATCHES_PER_FILE. + let mut block_lines = Vec::new(); + // 30 context lines before the match + for i in 1..=30 { + block_lines.push(format!( + "edgee-cli/openapi/openapi.json- \"field{}\": \"value{}\",", + i, i + )); + } + // The actual match line + block_lines.push( + "edgee-cli/openapi/openapi.json: \"operationId\": \"deleteInvitation\"," + .to_string(), + ); + let input = block_lines.join("\n"); + let result = compact_grep(&input).unwrap(); + // The match line must be in the output + assert!( + result.contains("\"operationId\": \"deleteInvitation\","), + "match line must not be truncated, got:\n{}", + result + ); + assert!(result.contains("edgee-cli/openapi/openapi.json")); + } + + #[test] + fn test_grep_c_match_centered() { + // grep -rnC 30: match is in the MIDDLE of the block (30 before, 30 after). + // The match must appear in output and the window must be centered (roughly + // equal context before and after), not just the top context lines. + let file = "edgee-cli/openapi/openapi.json"; + let mut block_lines = Vec::new(); + // 30 context lines before match (line numbers 2396..=2425) + for i in 2396usize..=2425 { + block_lines.push(format!("{file}-{i}- \"field{i}\": \"value\",")); + } + // The actual match line at 2426 + block_lines.push(format!("{file}:2426: \"operationId\": \"getMe\",")); + // 30 context lines after match (line numbers 2427..=2456) + for i in 2427usize..=2456 { + block_lines.push(format!("{file}-{i}- \"field{i}\": \"value\",")); + } + let input = block_lines.join("\n"); + let result = super::compact_grep( + &input, + &GrepCommandInfo { + single_file: None, + recursive_single_target: None, + has_line_numbers: true, + context_lines: 30, + }, + ) + .unwrap(); + + // The match line must be present + assert!( + result.contains("\"operationId\": \"getMe\","), + "match line must not be truncated, got:\n{}", + result + ); + + // The window must be roughly centered: there should be context lines + // both before AND after the match, not just before. + assert!( + result.contains("2427"), + "should show at least one after-context line (2427), got:\n{}", + result + ); + assert!( + result.contains("2425"), + "should show at least one before-context line (2425), got:\n{}", + result + ); + } + + #[test] + fn test_select_lines_prioritizes_matches() { + // 20 context lines then 1 match — match must survive truncation + let mut entries: Vec<(usize, String, bool)> = Vec::new(); + for i in 1..=20 { + entries.push((i, format!("context line {}", i), false)); + } + entries.push((21, "THE MATCH".to_string(), true)); + + let selected = select_lines(&entries, 10); + assert!( + selected.iter().any(|(_, c, m)| *m && c == "THE MATCH"), + "match line must be in selected lines" + ); + assert!(selected.len() <= 10); + } + + #[test] + fn test_single_file_grep_with_line_numbers() { + // Single file grep without -r outputs: linenum:content + // Should prepend filename when compressing + let input = "\ +10:fn main() { +11: let x = 1; +20:fn other() { +21: let y = 2; +30:fn third() { +31: let z = 3; +40:fn fourth() { +41: let w = 4; +50:fn fifth() { +51: let v = 5; +60:fn sixth() { +61: let u = 6; +"; + let result = super::compact_grep(input, &single_file_info("main.rs", true)).unwrap(); + assert!(result.contains("main.rs")); + assert!(result.contains("12 in 1F:")); + assert!(result.contains("fn main()")); + } + + #[test] + fn test_single_file_with_context() { + // Single file grep -A2 outputs: linenum:content, linenum-context, -- + let input = "\ +9-// before +10:fn main() { +11- let x = 1; +-- +19-// before2 +20:fn other() { +21- let y = 2; +-- +29-// before3 +30:fn third() { +31- let z = 3; +-- +39-// before4 +40:fn fourth() { +41- let w = 4; +"; + let result = super::compact_grep(input, &single_file_info("file.rs", true)).unwrap(); + assert!(result.contains("file.rs")); + assert!(result.contains("12 in 1F:")); + assert!(result.contains("10: fn main()")); + assert!(result.contains("11- ") && result.contains("let x")); + } + + // ── parse_grep_command ─────────────────────────────────────────────────── + + #[test] + fn test_parse_grep_command_dump() { + let commands = [ + r#"grep -rA 30 '"operationId": "getMe"' edgee-cli/"#, + r#"grep -rnA 30 '"operationId": "getMe"' edgee-cli/"#, + r#"grep -rnA 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json"#, + r#"grep -rA 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json"#, + r#"grep -n 'pattern' src/main.rs"#, + r#"grep -rn 'pattern' src/"#, + r#"git log | grep -n 'fix'"#, + r#"grep -A5 'pattern' file.txt | head -20"#, + ]; + for cmd in &commands { + let info = parse_grep_command(cmd); + println!("command: {:?}", cmd); + println!("parsed: {:?}", info); + println!(); + } + } + + // ── real-world: grep -A 30 '"operationId": "getMe"' edgee-cli/ ───────── + + #[test] + fn test_real_grep_rA_multi_file() { + // grep -rA 30 '"operationId": "getMe"' edgee-cli/ + // Multi-file, no line numbers, dashed filenames, large context block + let input = "\ +edgee-cli/openapi/openapi2.json:\"operationId\": \"getMe\", +-- +edgee-cli/openapi/openapi.json: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json- \"summary\": \"Get my User object\", +edgee-cli/openapi/openapi.json- \"description\": \"Retrieves my current User object.\", +edgee-cli/openapi/openapi.json- \"responses\": { +edgee-cli/openapi/openapi.json- \"200\": { +edgee-cli/openapi/openapi.json- \"description\": \"Your User object\", +edgee-cli/openapi/openapi.json- \"content\": { +edgee-cli/openapi/openapi.json- \"application/json\": { +edgee-cli/openapi/openapi.json- \"schema\": { +edgee-cli/openapi/openapi.json- \"$ref\": \"#/components/schemas/UserWithRoles\" +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- }, +edgee-cli/openapi/openapi.json- \"4XX\": { +edgee-cli/openapi/openapi.json- \"description\": \"unexpected error\", +edgee-cli/openapi/openapi.json- \"content\": { +edgee-cli/openapi/openapi.json- \"application/json\": { +edgee-cli/openapi/openapi.json- \"schema\": { +edgee-cli/openapi/openapi.json- \"$ref\": \"#/components/schemas/ErrorResponse\" +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- } +edgee-cli/openapi/openapi.json- }, +edgee-cli/openapi/openapi.json- \"/v1/users/{id}\": { +edgee-cli/openapi/openapi.json- \"get\": { +edgee-cli/openapi/openapi.json- \"operationId\": \"getUser\", +edgee-cli/openapi/openapi.json- \"summary\": \"Get a User\", +"; + let result = compact_grep(input).unwrap(); + assert!( + result.contains("edgee-cli/openapi/openapi2.json"), + "openapi2.json missing:\n{}", + result + ); + assert!( + result.contains("edgee-cli/openapi/openapi.json"), + "openapi.json missing:\n{}", + result + ); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match content missing:\n{}", + result + ); + assert!(!result.contains("--"), "-- separator leaked:\n{}", result); + assert!(!result.contains("json-"), "mangled filename:\n{}", result); + } + + #[test] + fn test_real_grep_rnA_multi_file() { + // grep -rnA 30 '"operationId": "getMe"' edgee-cli/ + // Multi-file, with line numbers, dashed filenames, large context block + let input = "\ +edgee-cli/openapi/openapi2.json:1:\"operationId\": \"getMe\", +-- +edgee-cli/openapi/openapi.json:2426: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get my User object\", +edgee-cli/openapi/openapi.json-2428- \"description\": \"Retrieves my current User object.\", +edgee-cli/openapi/openapi.json-2429- \"responses\": { +edgee-cli/openapi/openapi.json-2430- \"200\": { +edgee-cli/openapi/openapi.json-2431- \"description\": \"Your User object\", +edgee-cli/openapi/openapi.json-2432- \"content\": { +edgee-cli/openapi/openapi.json-2433- \"application/json\": { +edgee-cli/openapi/openapi.json-2434- \"schema\": { +edgee-cli/openapi/openapi.json-2435- \"$ref\": \"#/components/schemas/UserWithRoles\" +edgee-cli/openapi/openapi.json-2436- } +edgee-cli/openapi/openapi.json-2437- } +edgee-cli/openapi/openapi.json-2438- } +edgee-cli/openapi/openapi.json-2439- }, +edgee-cli/openapi/openapi.json-2440- \"4XX\": { +edgee-cli/openapi/openapi.json-2441- \"description\": \"unexpected error\", +edgee-cli/openapi/openapi.json-2442- \"content\": { +edgee-cli/openapi/openapi.json-2443- \"application/json\": { +edgee-cli/openapi/openapi.json-2444- \"schema\": { +edgee-cli/openapi/openapi.json-2445- \"$ref\": \"#/components/schemas/ErrorResponse\" +edgee-cli/openapi/openapi.json-2446- } +edgee-cli/openapi/openapi.json-2447- } +edgee-cli/openapi/openapi.json-2448- } +edgee-cli/openapi/openapi.json-2449- } +edgee-cli/openapi/openapi.json-2450- } +edgee-cli/openapi/openapi.json-2451- } +edgee-cli/openapi/openapi.json-2452- }, +edgee-cli/openapi/openapi.json-2453- \"/v1/users/{id}\": { +edgee-cli/openapi/openapi.json-2454- \"get\": { +edgee-cli/openapi/openapi.json-2455- \"operationId\": \"getUser\", +edgee-cli/openapi/openapi.json-2456- \"summary\": \"Get a User\", +"; + let result = compact_grep(input).unwrap(); + assert!( + result.contains("edgee-cli/openapi/openapi2.json"), + "openapi2.json missing:\n{}", + result + ); + assert!( + result.contains("edgee-cli/openapi/openapi.json"), + "openapi.json missing:\n{}", + result + ); + assert!( + result.contains("2426:"), + "match line number missing:\n{}", + result + ); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match content missing:\n{}", + result + ); + assert!(!result.contains("--"), "-- separator leaked:\n{}", result); + assert!(!result.contains("json-"), "mangled filename:\n{}", result); + } + + #[test] + fn test_real_grep_rnA_single_file() { + // grep -rnA 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json + // Single file, with line numbers, no filename prefix in output + let input = "\ +2426: \"operationId\": \"getMe\", +2427- \"summary\": \"Get my User object\", +2428- \"description\": \"Retrieves my current User object.\", +2429- \"responses\": { +2430- \"200\": { +2431- \"description\": \"Your User object\", +2432- \"content\": { +2433- \"application/json\": { +2434- \"schema\": { +2435- \"$ref\": \"#/components/schemas/UserWithRoles\" +2436- } +2437- } +2438- } +2439- }, +2440- \"4XX\": { +2441- \"description\": \"unexpected error\", +2442- \"content\": { +2443- \"application/json\": { +2444- \"schema\": { +2445- \"$ref\": \"#/components/schemas/ErrorResponse\" +2446- } +2447- } +2448- } +2449- } +2450- } +2451- } +2452- }, +2453- \"/v1/users/{id}\": { +2454- \"get\": { +2455- \"operationId\": \"getUser\", +2456- \"summary\": \"Get a User\", +"; + let result = super::compact_grep( + input, + &single_file_info("edgee-cli/openapi/openapi.json", true), + ) + .unwrap(); + assert!( + result.contains("edgee-cli/openapi/openapi.json"), + "filename missing:\n{}", + result + ); + assert!( + result.contains("2426:"), + "match line number missing:\n{}", + result + ); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match content missing:\n{}", + result + ); + // Context lines should appear with dash separator, not with line-num embedded in content + assert!( + result.contains("2427-"), + "context line number missing:\n{}", + result + ); + } + + #[test] + fn test_real_grep_rA_single_file_no_linenums() { + // grep -rA 30 '"operationId": "getMe"' edgee-cli/openapi/openapi.json + // Single file, no line numbers, no filename prefix in output (bare content) + let input = "\ + \"operationId\": \"getMe\", + \"summary\": \"Get my User object\", + \"description\": \"Retrieves my current User object.\", + \"responses\": { + \"200\": { + \"description\": \"Your User object\", + \"content\": { + \"application/json\": { + \"schema\": { + \"$ref\": \"#/components/schemas/UserWithRoles\" + } + } + } + }, + \"4XX\": { + \"description\": \"unexpected error\", + \"content\": { + \"application/json\": { + \"schema\": { + \"$ref\": \"#/components/schemas/ErrorResponse\" + } + } + } + } + } + } + }, + \"/v1/users/{id}\": { + \"get\": { + \"operationId\": \"getUser\", + \"summary\": \"Get a User\", +"; + let result = super::compact_grep( + input, + &single_file_info("edgee-cli/openapi/openapi.json", false), + ) + .unwrap(); + assert!( + result.contains("edgee-cli/openapi/openapi.json"), + "filename missing:\n{}", + result + ); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match content missing:\n{}", + result + ); + } + + #[test] + fn test_real_grep_nB_single_file_json() { + // Reproduces: grep -nB 30 getMe openapi.json + // Single file, line numbers, -B only (match is LAST line) + let input = "\ +2396- \"type\": \"string\" +2397- } +2398- } +2399- ], +2400- \"responses\": { +2401- \"200\": { +2402- \"description\": \"The deleted Invitation\", +2403- \"content\": { +2404- \"application/json\": { +2405- \"schema\": { +2406- \"$ref\": \"#/components/schemas/DeletedResponse\" +2407- } +2408- } +2409- } +2410- }, +2411- \"4XX\": { +2412- \"description\": \"unexpected error\", +2413- \"content\": { +2414- \"application/json\": { +2415- \"schema\": { +2416- \"$ref\": \"#/components/schemas/ErrorResponse\" +2417- } +2418- } +2419- } +2420- } +2421- } +2422- } +2423- }, +2424- \"/v1/users/me\": { +2425- \"get\": { +2426: \"operationId\": \"getMe\", +"; + let info = GrepCommandInfo { + single_file: Some("openapi.json".to_string()), + recursive_single_target: None, + has_line_numbers: true, + context_lines: 30, + }; + let result = super::compact_grep(input, &info).unwrap(); + eprintln!("RESULT:\n{}", result); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match line must appear in output, got:\n{}", + result + ); + } + + #[test] + fn test_real_grep_nB_single_file_json_no_context_info() { + // Same data but with context_lines=0 (simulates parse failure) + let input = "\ +2396- \"type\": \"string\" +2397- } +2398- } +2399- ], +2400- \"responses\": { +2401- \"200\": { +2402- \"description\": \"The deleted Invitation\", +2403- \"content\": { +2404- \"application/json\": { +2405- \"schema\": { +2406- \"$ref\": \"#/components/schemas/DeletedResponse\" +2407- } +2408- } +2409- } +2410- }, +2411- \"4XX\": { +2412- \"description\": \"unexpected error\", +2413- \"content\": { +2414- \"application/json\": { +2415- \"schema\": { +2416- \"$ref\": \"#/components/schemas/ErrorResponse\" +2417- } +2418- } +2419- } +2420- } +2421- } +2422- } +2423- }, +2424- \"/v1/users/me\": { +2425- \"get\": { +2426: \"operationId\": \"getMe\", +"; + let info = GrepCommandInfo { + single_file: Some("openapi.json".to_string()), + recursive_single_target: None, + has_line_numbers: true, + context_lines: 0, + }; + let result = super::compact_grep(input, &info).unwrap(); + eprintln!("RESULT (context_lines=0):\n{}", result); + assert!( + result.contains("\"operationId\": \"getMe\","), + "match line must appear in output even with context_lines=0, got:\n{}", + result + ); + } +} diff --git a/crates/compressor/src/strategy/bash/ls.rs b/crates/compressor/src/strategy/bash/ls.rs new file mode 100644 index 0000000..89108ba --- /dev/null +++ b/crates/compressor/src/strategy/bash/ls.rs @@ -0,0 +1,354 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `ls` command output. +//! +//! Strips permissions, owner, group, date columns and noise directories, +//! producing a compact listing: dirs with trailing `/`, files with human-readable sizes. + +use std::collections::HashMap; + +use super::BashCompressor; + +/// Directories that are noise for LLM context. +const NOISE_DIRS: &[&str] = &[ + "node_modules", + ".git", + "target", + "__pycache__", + ".next", + "dist", + "build", + ".cache", + ".turbo", + ".vercel", + ".pytest_cache", + ".mypy_cache", + ".tox", + ".venv", + "venv", + "coverage", + ".nyc_output", + ".DS_Store", + "Thumbs.db", + ".idea", + ".vscode", + ".vs", + "*.egg-info", + ".eggs", +]; + +pub struct LsCompressor; + +impl BashCompressor for LsCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + // Only compress long-format output (ls -l / ls -la / ls -al …) + if !is_long_format(command) { + return None; + } + + let show_all = has_flag(command, 'a'); + compact_ls(output, show_all) + } +} + +/// Check whether the command uses long-format output (`-l`). +fn is_long_format(command: &str) -> bool { + for arg in command.split_whitespace().skip(1) { + if arg == "--" { + break; + } + if arg.starts_with("--") { + continue; + } + if arg.starts_with('-') && arg.contains('l') { + return true; + } + } + false +} + +/// Check whether a short flag character is present in the command. +fn has_flag(command: &str, flag: char) -> bool { + for arg in command.split_whitespace().skip(1) { + if arg == "--" { + break; + } + if arg == "--all" && flag == 'a' { + return true; + } + if arg.starts_with('-') && !arg.starts_with("--") && arg.contains(flag) { + return true; + } + } + false +} + +fn human_size(bytes: u64) -> String { + if bytes >= 1_048_576 { + format!("{:.1}M", bytes as f64 / 1_048_576.0) + } else if bytes >= 1024 { + format!("{:.1}K", bytes as f64 / 1024.0) + } else { + format!("{}B", bytes) + } +} + +/// Parse `ls -l` output into a compact format: +/// name/ (dirs) +/// name size (files) +/// +/// Returns `None` if the output could not be parsed (e.g. unrecognised format). +fn compact_ls(raw: &str, show_all: bool) -> Option { + let mut dirs: Vec = Vec::new(); + let mut files: Vec<(String, String)> = Vec::new(); + let mut by_ext: HashMap = HashMap::new(); + let mut parseable_lines = 0usize; + + for line in raw.lines() { + if line.starts_with("total ") || line.is_empty() { + continue; + } + + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() < 9 { + continue; + } + + parseable_lines += 1; + + // Filename is everything from column 9 onward (handles spaces) + let name = parts[8..].join(" "); + + if name == "." || name == ".." { + continue; + } + + if !show_all && NOISE_DIRS.iter().any(|noise| name == *noise) { + continue; + } + + let is_dir = parts[0].starts_with('d'); + + if is_dir { + dirs.push(name); + } else if parts[0].starts_with('-') || parts[0].starts_with('l') { + let size: u64 = parts[4].parse().unwrap_or(0); + let ext = if let Some(pos) = name.rfind('.') { + name[pos..].to_string() + } else { + "no ext".to_string() + }; + *by_ext.entry(ext).or_insert(0) += 1; + files.push((name, human_size(size))); + } + } + + // If nothing was parseable and the input had content, the format is + // unrecognised — return None so the raw output passes through unchanged. + if dirs.is_empty() && files.is_empty() { + if parseable_lines == 0 { + return None; + } + return Some("(empty)\n".to_string()); + } + + let mut out = String::new(); + + for d in &dirs { + out.push_str(d); + out.push_str("/\n"); + } + + for (name, size) in &files { + out.push_str(name); + out.push_str(" "); + out.push_str(size); + out.push('\n'); + } + + out.push('\n'); + let mut summary = format!("{} files, {} dirs", files.len(), dirs.len()); + if !by_ext.is_empty() { + let mut ext_counts: Vec<_> = by_ext.iter().collect(); + ext_counts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let ext_parts: Vec = ext_counts + .iter() + .take(5) + .map(|(ext, count)| format!("{} {}", count, ext)) + .collect(); + summary.push_str(" ("); + summary.push_str(&ext_parts.join(", ")); + if ext_counts.len() > 5 { + summary.push_str(&format!(", +{} more", ext_counts.len() - 5)); + } + summary.push(')'); + } + out.push_str(&summary); + out.push('\n'); + + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compact_basic() { + let input = "total 48\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 .\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 ..\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 src\n\ + -rw-r--r-- 1 user staff 1234 Jan 1 12:00 Cargo.toml\n\ + -rw-r--r-- 1 user staff 5678 Jan 1 12:00 README.md\n"; + let output = compact_ls(input, false).expect("should compress"); + assert!(output.contains("src/")); + assert!(output.contains("Cargo.toml")); + assert!(output.contains("README.md")); + assert!(output.contains("1.2K")); + assert!(output.contains("5.5K")); + assert!(!output.contains("drwx")); + assert!(!output.contains("staff")); + assert!(!output.contains("total")); + } + + #[test] + fn test_compact_filters_noise() { + let input = "total 8\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 node_modules\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 .git\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 target\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 src\n\ + -rw-r--r-- 1 user staff 100 Jan 1 12:00 main.rs\n"; + let output = compact_ls(input, false).expect("should compress"); + assert!(!output.contains("node_modules")); + assert!(!output.contains(".git")); + assert!(!output.contains("target")); + assert!(output.contains("src/")); + assert!(output.contains("main.rs")); + } + + #[test] + fn test_compact_show_all() { + let input = "total 8\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 .git\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 src\n"; + let output = compact_ls(input, true).expect("should compress"); + assert!(output.contains(".git/")); + assert!(output.contains("src/")); + } + + #[test] + fn test_compact_empty() { + // A truly empty directory (only "total 0") has no parseable lines → None (pass-through) + let input = "total 0\n"; + assert!(compact_ls(input, false).is_none()); + } + + #[test] + fn test_compact_summary() { + let input = "total 48\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 src\n\ + -rw-r--r-- 1 user staff 1234 Jan 1 12:00 main.rs\n\ + -rw-r--r-- 1 user staff 5678 Jan 1 12:00 lib.rs\n\ + -rw-r--r-- 1 user staff 100 Jan 1 12:00 Cargo.toml\n"; + let output = compact_ls(input, false).expect("should compress"); + assert!(output.contains("3 files, 1 dirs")); + assert!(output.contains(".rs")); + assert!(output.contains(".toml")); + } + + #[test] + fn test_compact_symlinks() { + let input = "total 8\n\ + lrwxr-xr-x 1 user staff 10 Jan 1 12:00 link -> target\n"; + let output = compact_ls(input, false).expect("should compress"); + assert!(output.contains("link -> target")); + } + + #[test] + fn test_compact_filenames_with_spaces() { + let input = "total 8\n\ + -rw-r--r-- 1 user staff 1234 Jan 1 12:00 my file.txt\n"; + let output = compact_ls(input, false).expect("should compress"); + assert!(output.contains("my file.txt")); + } + + #[test] + fn test_human_size() { + assert_eq!(human_size(0), "0B"); + assert_eq!(human_size(500), "500B"); + assert_eq!(human_size(1024), "1.0K"); + assert_eq!(human_size(1234), "1.2K"); + assert_eq!(human_size(1_048_576), "1.0M"); + assert_eq!(human_size(2_500_000), "2.4M"); + } + + #[test] + fn test_is_long_format() { + assert!(is_long_format("ls -l")); + assert!(is_long_format("ls -la")); + assert!(is_long_format("ls -al")); + assert!(is_long_format("ls -la /tmp")); + assert!(!is_long_format("ls")); + assert!(!is_long_format("ls -a")); + assert!(!is_long_format("ls /tmp")); + } + + #[test] + fn test_has_flag() { + assert!(has_flag("ls -a", 'a')); + assert!(has_flag("ls -la", 'a')); + assert!(has_flag("ls --all", 'a')); + assert!(!has_flag("ls -l", 'a')); + assert!(!has_flag("ls", 'a')); + } + + #[test] + fn test_compressor_skips_non_long_format() { + let compressor = LsCompressor; + assert!(compressor.compress("ls", "file1\nfile2\n").is_none()); + assert!(compressor.compress("ls -a", "file1\nfile2\n").is_none()); + } + + #[test] + fn test_compressor_compresses_long_format() { + let compressor = LsCompressor; + let input = "total 8\n\ + drwxr-xr-x 2 user staff 64 Jan 1 12:00 src\n\ + -rw-r--r-- 1 user staff 1234 Jan 1 12:00 main.rs\n"; + let result = compressor.compress("ls -la", input); + assert!(result.is_some()); + let output = result.unwrap(); + assert!(output.contains("src/")); + assert!(output.contains("main.rs")); + assert!(!output.contains("drwx")); + } + + #[test] + fn test_compressor_passthrough_exa_format() { + // exa/eza output has fewer columns than POSIX ls -l; the compressor + // should return None (pass-through) rather than a misleading "(empty)". + let compressor = LsCompressor; + let input = ".rw-r--r-- 35k clement 4 Mar 21:12 game-ui.js\n\ + .rw-r--r-- 3,6k clement 2 Jan 17:52 landing-client.js\n\ + drwxr-xr-x - clement 4 Mar 21:12 pages\n"; + let result = compressor.compress("ls -l", input); + assert!(result.is_none(), "exa format should pass through unchanged"); + } +} diff --git a/crates/compressor/src/strategy/bash/mod.rs b/crates/compressor/src/strategy/bash/mod.rs new file mode 100644 index 0000000..5512c13 --- /dev/null +++ b/crates/compressor/src/strategy/bash/mod.rs @@ -0,0 +1,52 @@ +//! Bash command output compressors. +//! +//! Each shell command that can be compressed gets its own module +//! implementing the `BashCompressor` trait. + +mod cargo; +mod curl; +mod diff; +mod docker; +mod env; +mod eslint; +mod find; +mod go; +mod grep; +mod ls; +mod npm; +mod psql; +mod pytest; +mod rg; +mod tree; +mod tsc; + +/// Trait for compressing the output of a specific bash command. +pub trait BashCompressor { + /// Compress the output of a command. + /// Returns `Some(compressed)` if compression was applied, `None` to leave as-is. + fn compress(&self, command: &str, output: &str) -> Option; +} + +/// Select the appropriate compressor for a base command (e.g. "ls", "find"). +/// Returns `None` for commands we don't compress. +pub fn compressor_for(base_command: &str) -> Option<&'static dyn BashCompressor> { + match base_command { + "ls" => Some(&ls::LsCompressor), + "tree" => Some(&tree::TreeCompressor), + "find" => Some(&find::FindCompressor), + "grep" => Some(&grep::GrepCompressor), + "rg" => Some(&rg::RgCompressor), + "diff" | "git" => Some(&diff::DiffCompressor), + "cargo" => Some(&cargo::CargoCompressor), + "docker" => Some(&docker::DockerCompressor), + "env" | "printenv" => Some(&env::EnvCompressor), + "npm" | "pnpm" | "npx" => Some(&npm::NpmCompressor), + "pytest" | "python" => Some(&pytest::PytestCompressor), + "psql" => Some(&psql::PsqlCompressor), + "tsc" => Some(&tsc::TscCompressor), + "eslint" => Some(&eslint::EslintCompressor), + "go" => Some(&go::GoCompressor), + "curl" => Some(&curl::CurlCompressor), + _ => None, + } +} diff --git a/crates/compressor/src/strategy/bash/npm.rs b/crates/compressor/src/strategy/bash/npm.rs new file mode 100644 index 0000000..9f9a9a8 --- /dev/null +++ b/crates/compressor/src/strategy/bash/npm.rs @@ -0,0 +1,131 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `npm` / `pnpm` command output. +//! +//! Strips boilerplate lifecycle scripts, warnings, progress indicators, +//! and empty lines to produce compact output. + +use super::BashCompressor; + +pub struct NpmCompressor; + +impl BashCompressor for NpmCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + let filtered = filter_npm_output(output); + if filtered == output { + return None; + } + + Some(filtered) + } +} + +/// Filter npm/pnpm output — strip boilerplate, progress bars, warnings. +fn filter_npm_output(output: &str) -> String { + let mut result = Vec::new(); + + for line in output.lines() { + // Skip npm lifecycle script header ("> package@version command") + if line.starts_with('>') && line.contains('@') { + continue; + } + // Skip npm warnings and notices + if line.trim_start().starts_with("npm WARN") { + continue; + } + if line.trim_start().starts_with("npm notice") { + continue; + } + // Skip pnpm scope/warning lines + if line.trim_start().starts_with("Scope:") { + continue; + } + if line.trim_start().starts_with("WARN") && line.contains("deprecated") { + continue; + } + // Skip progress indicators + if line.contains('\u{2E29}') || line.contains('\u{2E28}') { + continue; + } + // Skip pnpm install progress lines + if line.contains("Progress:") || line.contains("packages in") && line.contains("reused") { + continue; + } + // Skip empty lines + if line.trim().is_empty() { + continue; + } + + result.push(line); + } + + if result.is_empty() { + "ok".to_string() + } else { + result.join("\n") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_npm_output() { + let output = "> project@1.0.0 build\n> next build\n\nnpm WARN deprecated inflight@1.0.6: This module is not supported\nnpm notice\n\n Creating an optimized production build...\n Build completed\n"; + let result = filter_npm_output(output); + assert!(!result.contains("npm WARN")); + assert!(!result.contains("npm notice")); + assert!(!result.contains("> project@")); + assert!(result.contains("Build completed")); + } + + #[test] + fn test_filter_npm_output_empty() { + let output = "\n\n\n"; + let result = filter_npm_output(output); + assert_eq!(result, "ok"); + } + + #[test] + fn test_compressor_skips_clean_output() { + let compressor = NpmCompressor; + assert!(compressor.compress("npm run build", "").is_none()); + } + + #[test] + fn test_filter_strips_warnings() { + let output = "npm WARN old\nnpm WARN another\nActual output here\n"; + let result = filter_npm_output(output); + assert!(!result.contains("npm WARN")); + assert!(result.contains("Actual output here")); + } + + #[test] + fn test_pnpm_scope_lines_stripped() { + let output = "Scope: all 6 workspace projects\n WARN deprecated inflight@1.0.6: This module is not supported\nBuild succeeded\n"; + let result = filter_npm_output(output); + assert!(!result.contains("Scope:")); + assert!(!result.contains("deprecated")); + assert!(result.contains("Build succeeded")); + } +} diff --git a/crates/compressor/src/strategy/bash/psql.rs b/crates/compressor/src/strategy/bash/psql.rs new file mode 100644 index 0000000..9e750a9 --- /dev/null +++ b/crates/compressor/src/strategy/bash/psql.rs @@ -0,0 +1,325 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `psql` command output. +//! +//! Detects table and expanded display formats, strips borders/padding, +//! and produces compact tab-separated or key=value output. + +use super::BashCompressor; + +const MAX_TABLE_ROWS: usize = 30; +const MAX_EXPANDED_RECORDS: usize = 20; + +pub struct PsqlCompressor; + +impl BashCompressor for PsqlCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + let result = filter_psql_output(output); + if result == output { + return None; + } + + Some(result) + } +} + +fn filter_psql_output(output: &str) -> String { + if is_expanded_format(output) { + filter_expanded(output) + } else if is_table_format(output) { + filter_table(output) + } else { + // Passthrough: COPY results, notices, etc. + output.to_string() + } +} + +fn is_table_format(output: &str) -> bool { + output + .lines() + .any(|line| line.trim().contains("-+-") || line.trim().contains("---+---")) +} + +fn is_expanded_format(output: &str) -> bool { + output.lines().any(|line| { + let trimmed = line.trim(); + trimmed.starts_with("-[ RECORD ") && trimmed.contains("]-") + }) +} + +fn is_separator_line(line: &str) -> bool { + let trimmed = line.trim(); + !trimmed.is_empty() && trimmed.chars().all(|c| c == '-' || c == '+') +} + +fn is_row_count_line(line: &str) -> bool { + let trimmed = line.trim(); + trimmed.starts_with('(') + && (trimmed.ends_with("rows)") || trimmed.ends_with("row)")) + && trimmed[1..] + .chars() + .take_while(|c| c.is_ascii_digit()) + .count() + > 0 +} + +/// Filter psql table format: +/// - Strip separator lines (----+----) +/// - Strip (N rows) footer +/// - Trim column padding +/// - Output tab-separated +fn filter_table(output: &str) -> String { + let mut result = Vec::new(); + let mut data_rows = 0; + let mut total_rows = 0; + + for line in output.lines() { + let trimmed = line.trim(); + + if is_separator_line(trimmed) { + continue; + } + + if is_row_count_line(trimmed) { + continue; + } + + if trimmed.is_empty() { + continue; + } + + // Data or header row with | delimiters + if trimmed.contains('|') { + total_rows += 1; + // First row is header + if total_rows > 1 { + data_rows += 1; + } + + if data_rows <= MAX_TABLE_ROWS || total_rows == 1 { + let cols: Vec<&str> = trimmed.split('|').map(|c| c.trim()).collect(); + result.push(cols.join("\t")); + } + } else { + // Non-table line (e.g., SET, NOTICE) + result.push(trimmed.to_string()); + } + } + + if data_rows > MAX_TABLE_ROWS { + result.push(format!("... +{} more rows", data_rows - MAX_TABLE_ROWS)); + } + + result.join("\n") +} + +/// Filter psql expanded format: +/// Convert -[ RECORD N ]- blocks to one-liner key=val format. +fn filter_expanded(output: &str) -> String { + let mut result = Vec::new(); + let mut current_pairs: Vec = Vec::new(); + let mut current_record: Option = None; + let mut record_count = 0; + + for line in output.lines() { + let trimmed = line.trim(); + + if is_row_count_line(trimmed) { + continue; + } + + if let Some(record_num) = parse_record_header(trimmed) { + // Flush previous record + if let Some(rec) = current_record.take() { + if record_count <= MAX_EXPANDED_RECORDS { + result.push(format!("{} {}", rec, current_pairs.join(" "))); + } + current_pairs.clear(); + } + record_count += 1; + current_record = Some(format!("[{}]", record_num)); + } else if trimmed.contains('|') && current_record.is_some() { + let parts: Vec<&str> = trimmed.splitn(2, '|').collect(); + if parts.len() == 2 { + let key = parts[0].trim(); + let val = parts[1].trim(); + current_pairs.push(format!("{}={}", key, val)); + } + } else if trimmed.is_empty() { + continue; + } else if current_record.is_none() { + result.push(trimmed.to_string()); + } + } + + // Flush last record + if let Some(rec) = current_record.take() + && record_count <= MAX_EXPANDED_RECORDS + { + result.push(format!("{} {}", rec, current_pairs.join(" "))); + } + + if record_count > MAX_EXPANDED_RECORDS { + result.push(format!( + "... +{} more records", + record_count - MAX_EXPANDED_RECORDS + )); + } + + result.join("\n") +} + +/// Parse a record header line like "-[ RECORD 1 ]----" and return the record number. +fn parse_record_header(line: &str) -> Option<&str> { + let line = line.trim(); + if !line.starts_with("-[ RECORD ") { + return None; + } + let after = &line["-[ RECORD ".len()..]; + let end = after.find(' ').or_else(|| after.find(']'))?; + let num = &after[..end]; + if num.chars().all(|c| c.is_ascii_digit()) { + Some(num) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_table_basic() { + let input = " id | name | email\n----+-------+---------\n 1 | alice | a@b.com\n 2 | bob | b@b.com\n(2 rows)\n"; + let result = filter_table(input); + assert!(result.contains("id\tname\temail")); + assert!(result.contains("1\talice\ta@b.com")); + assert!(result.contains("2\tbob\tb@b.com")); + assert!(!result.contains("----")); + assert!(!result.contains("(2 rows)")); + } + + #[test] + fn test_filter_expanded_basic() { + let input = + "-[ RECORD 1 ]----\nid | 1\nname | alice\n-[ RECORD 2 ]----\nid | 2\nname | bob\n"; + let result = filter_expanded(input); + assert!(result.contains("[1] id=1 name=alice")); + assert!(result.contains("[2] id=2 name=bob")); + } + + #[test] + fn test_is_table_format() { + assert!(is_table_format( + " id | name\n----+------\n 1 | foo\n(1 row)\n" + )); + assert!(!is_table_format("COPY 5\n")); + assert!(!is_table_format("SET\n")); + } + + #[test] + fn test_is_expanded_format() { + assert!(is_expanded_format( + "-[ RECORD 1 ]----\nid | 1\nname | foo\n" + )); + assert!(!is_expanded_format(" id | name\n----+------\n 1 | foo\n")); + } + + #[test] + fn test_filter_table_overflow() { + let mut lines = vec![" id | val".to_string(), "----+-----".to_string()]; + for i in 1..=40 { + lines.push(format!(" {} | row{}", i, i)); + } + lines.push("(40 rows)".to_string()); + let input = lines.join("\n"); + + let result = filter_table(&input); + assert!(result.contains("... +10 more rows")); + } + + #[test] + fn test_filter_expanded_overflow() { + let mut lines = Vec::new(); + for i in 1..=25 { + lines.push(format!("-[ RECORD {} ]----", i)); + lines.push(format!("id | {}", i)); + lines.push(format!("name | user{}", i)); + } + let input = lines.join("\n"); + + let result = filter_expanded(&input); + assert!(result.contains("... +5 more records")); + } + + #[test] + fn test_filter_psql_passthrough() { + let input = "COPY 5\n"; + let result = filter_psql_output(input); + assert_eq!(result, "COPY 5\n"); + } + + #[test] + fn test_filter_psql_routes_to_table() { + let input = " id | name\n----+------\n 1 | foo\n(1 row)\n"; + let result = filter_psql_output(input); + assert!(result.contains("id\tname")); + assert!(!result.contains("----")); + } + + #[test] + fn test_filter_psql_routes_to_expanded() { + let input = "-[ RECORD 1 ]----\nid | 1\nname | foo\n"; + let result = filter_psql_output(input); + assert!(result.contains("[1]")); + assert!(result.contains("id=1")); + } + + #[test] + fn test_parse_record_header() { + assert_eq!(parse_record_header("-[ RECORD 1 ]----"), Some("1")); + assert_eq!(parse_record_header("-[ RECORD 42 ]------"), Some("42")); + assert!(parse_record_header("not a record").is_none()); + assert!(parse_record_header("----+----").is_none()); + } + + #[test] + fn test_filter_table_strips_row_count() { + let input = " c\n---\n 1\n(1 row)\n"; + let result = filter_table(input); + assert!(!result.contains("(1 row)")); + } + + #[test] + fn test_filter_expanded_strips_row_count() { + let input = "-[ RECORD 1 ]----\nid | 1\n(1 row)\n"; + let result = filter_expanded(input); + assert!(!result.contains("(1 row)")); + } + + #[test] + fn test_compressor_returns_none_for_empty() { + let compressor = PsqlCompressor; + assert!(compressor.compress("psql", "").is_none()); + } +} diff --git a/crates/compressor/src/strategy/bash/pytest.rs b/crates/compressor/src/strategy/bash/pytest.rs new file mode 100644 index 0000000..532cb82 --- /dev/null +++ b/crates/compressor/src/strategy/bash/pytest.rs @@ -0,0 +1,262 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `pytest` command output. +//! +//! Parses pytest output to show only failures and a compact summary, +//! stripping session headers, passing test lines, and verbose output. + +use super::BashCompressor; + +pub struct PytestCompressor; + +impl BashCompressor for PytestCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + Some(filter_pytest_output(output)) + } +} + +#[derive(Debug, PartialEq)] +enum ParseState { + Header, + TestProgress, + Failures, + Summary, +} + +/// Parse pytest output using state machine. +fn filter_pytest_output(output: &str) -> String { + let mut state = ParseState::Header; + let mut failures: Vec = Vec::new(); + let mut current_failure: Vec = Vec::new(); + let mut summary_line = String::new(); + + for line in output.lines() { + let trimmed = line.trim(); + + // State transitions + if trimmed.starts_with("===") && trimmed.contains("test session starts") { + state = ParseState::Header; + continue; + } else if trimmed.starts_with("===") && trimmed.contains("FAILURES") { + state = ParseState::Failures; + continue; + } else if trimmed.starts_with("===") && trimmed.contains("short test summary") { + state = ParseState::Summary; + if !current_failure.is_empty() { + failures.push(current_failure.join("\n")); + current_failure.clear(); + } + continue; + } else if trimmed.starts_with("===") + && (trimmed.contains("passed") || trimmed.contains("failed")) + { + summary_line = trimmed.to_string(); + continue; + } + + match state { + ParseState::Header => { + if trimmed.starts_with("collected") { + state = ParseState::TestProgress; + } + } + ParseState::TestProgress => { + // Skip individual test lines like "tests/test_foo.py .... [100%]" + } + ParseState::Failures => { + if trimmed.starts_with("___") { + if !current_failure.is_empty() { + failures.push(current_failure.join("\n")); + current_failure.clear(); + } + current_failure.push(trimmed.to_string()); + } else if !trimmed.is_empty() && !trimmed.starts_with("===") { + current_failure.push(trimmed.to_string()); + } + } + ParseState::Summary => { + if trimmed.starts_with("FAILED") || trimmed.starts_with("ERROR") { + failures.push(trimmed.to_string()); + } + } + } + } + + if !current_failure.is_empty() { + failures.push(current_failure.join("\n")); + } + + build_pytest_summary(&summary_line, &failures) +} + +fn build_pytest_summary(summary: &str, failures: &[String]) -> String { + let (passed, failed, skipped) = parse_summary_line(summary); + + if failed == 0 && passed > 0 { + return format!("Pytest: {} passed", passed); + } + + if passed == 0 && failed == 0 { + return "Pytest: No tests collected".to_string(); + } + + let mut result = format!("Pytest: {} passed, {} failed", passed, failed); + if skipped > 0 { + result.push_str(&format!(", {} skipped", skipped)); + } + result.push('\n'); + + if failures.is_empty() { + return result.trim().to_string(); + } + + result.push_str("\nFailures:\n"); + + for (i, failure) in failures.iter().take(5).enumerate() { + let lines: Vec<&str> = failure.lines().collect(); + + if let Some(first_line) = lines.first() { + if first_line.starts_with("___") { + let test_name = first_line.trim_matches('_').trim(); + result.push_str(&format!("{}. {}\n", i + 1, test_name)); + } else if first_line.starts_with("FAILED") { + let parts: Vec<&str> = first_line.split(" - ").collect(); + if let Some(test_path) = parts.first() { + let test_name = test_path.trim_start_matches("FAILED "); + result.push_str(&format!("{}. {}\n", i + 1, test_name)); + } + if parts.len() > 1 { + result.push_str(&format!(" {}\n", truncate(parts[1], 100))); + } + continue; + } + } + + // Show relevant error lines (assertions, errors, file locations) + let mut relevant_lines = 0; + for line in lines.iter().skip(1) { + let line_lower = line.to_lowercase(); + let is_relevant = line.trim().starts_with('>') + || line.trim().starts_with('E') + || line_lower.contains("assert") + || line_lower.contains("error") + || line.contains(".py:"); + + if is_relevant && relevant_lines < 3 { + result.push_str(&format!(" {}\n", truncate(line, 100))); + relevant_lines += 1; + } + } + } + + if failures.len() > 5 { + result.push_str(&format!("\n... +{} more failures\n", failures.len() - 5)); + } + + result.trim().to_string() +} + +fn parse_summary_line(summary: &str) -> (usize, usize, usize) { + let mut passed = 0; + let mut failed = 0; + let mut skipped = 0; + + for part in summary.split(',') { + let words: Vec<&str> = part.split_whitespace().collect(); + for (i, word) in words.iter().enumerate() { + if i > 0 { + if word.contains("passed") { + if let Ok(n) = words[i - 1].parse::() { + passed = n; + } + } else if word.contains("failed") { + if let Ok(n) = words[i - 1].parse::() { + failed = n; + } + } else if word.contains("skipped") + && let Ok(n) = words[i - 1].parse::() + { + skipped = n; + } + } + } + } + + (passed, failed, skipped) +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..s.floor_char_boundary(max.saturating_sub(3))]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_pytest_all_pass() { + let output = "=== test session starts ===\nplatform darwin -- Python 3.11.0\ncollected 5 items\n\ntests/test_foo.py ..... [100%]\n\n=== 5 passed in 0.50s ==="; + let result = filter_pytest_output(output); + assert!(result.contains("Pytest")); + assert!(result.contains("5 passed")); + } + + #[test] + fn test_filter_pytest_with_failures() { + let output = "=== test session starts ===\ncollected 5 items\n\ntests/test_foo.py ..F.. [100%]\n\n=== FAILURES ===\n___ test_something ___\n\n def test_something():\n> assert False\nE assert False\n\ntests/test_foo.py:10: AssertionError\n\n=== short test summary info ===\nFAILED tests/test_foo.py::test_something - assert False\n=== 4 passed, 1 failed in 0.50s ==="; + let result = filter_pytest_output(output); + assert!(result.contains("4 passed, 1 failed")); + assert!(result.contains("test_something")); + assert!(result.contains("assert False")); + } + + #[test] + fn test_filter_pytest_no_tests() { + let output = + "=== test session starts ===\ncollected 0 items\n\n=== no tests ran in 0.00s ==="; + let result = filter_pytest_output(output); + assert!(result.contains("No tests collected")); + } + + #[test] + fn test_parse_summary_line() { + assert_eq!(parse_summary_line("=== 5 passed in 0.50s ==="), (5, 0, 0)); + assert_eq!( + parse_summary_line("=== 4 passed, 1 failed in 0.50s ==="), + (4, 1, 0) + ); + assert_eq!( + parse_summary_line("=== 3 passed, 1 failed, 2 skipped in 1.0s ==="), + (3, 1, 2) + ); + } + + #[test] + fn test_truncate() { + assert_eq!(truncate("hello", 10), "hello"); + assert_eq!(truncate("hello world!", 8), "hello..."); + } +} diff --git a/crates/compressor/src/strategy/bash/rg.rs b/crates/compressor/src/strategy/bash/rg.rs new file mode 100644 index 0000000..ae76360 --- /dev/null +++ b/crates/compressor/src/strategy/bash/rg.rs @@ -0,0 +1,764 @@ +//! Compressor for `rg` (ripgrep) command output. +//! +//! Handles: +//! - match output: file:line:content or file:line:col:content +//! - context output: file-line-content (and single-file line-content) +//! - --heading output: file headings with line:content entries +//! - --files-with-matches / -l output: file lists (compressed like find) + +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::path::Path; + +use super::BashCompressor; + +const MAX_LINE_LEN: usize = 120; +const MAX_MATCHES_PER_FILE: usize = 10; +const MIN_LINES_FOR_COMPRESSION: usize = 10; + +pub struct RgCompressor; + +impl BashCompressor for RgCompressor { + fn compress(&self, command: &str, output: &str) -> Option { + let trimmed = output.trim(); + if trimmed.is_empty() { + return None; + } + + let info = parse_rg_command(command); + if info.files_only { + return compress_file_list(trimmed); + } + + compact_rg(trimmed) + } +} + +#[derive(Debug)] +struct RgCommandInfo { + files_only: bool, +} + +fn parse_rg_command(command: &str) -> RgCommandInfo { + let tokens = shell_tokenize(command); + let mut files_only = false; + + let mut after_dashdash = false; + for tok in tokens { + if after_dashdash { + continue; + } + if tok == "--" { + after_dashdash = true; + continue; + } + if tok == "-l" || tok == "--files-with-matches" || tok == "--files" { + files_only = true; + } + if tok.starts_with('-') && tok.len() > 2 && tok.starts_with("-l") { + // Combined short flags (e.g., -lS) + files_only = true; + } + } + + RgCommandInfo { files_only } +} + +fn shell_tokenize(s: &str) -> Vec { + let mut tokens: Vec = Vec::new(); + let mut current = String::new(); + let mut in_single = false; + let mut in_double = false; + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + '\\' if !in_single => { + if let Some(next) = chars.next() { + current.push(next); + } + } + ' ' | '\t' if !in_single && !in_double => { + if !current.is_empty() { + tokens.push(std::mem::take(&mut current)); + } + } + _ => current.push(c), + } + } + if !current.is_empty() { + tokens.push(current); + } + tokens +} + +fn compress_file_list(output: &str) -> Option { + let lines: Vec<&str> = output.lines().filter(|l| !l.trim().is_empty()).collect(); + if lines.len() < MIN_LINES_FOR_COMPRESSION { + return None; + } + + Some(compact_files(&lines)) +} + +fn compact_files(paths: &[&str]) -> String { + let mut by_dir: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut by_ext: HashMap = HashMap::new(); + + for path in paths { + let p = Path::new(path); + let dir = p.parent().map(|d| d.to_str().unwrap_or(".")).unwrap_or("."); + let dir = if dir.is_empty() { "." } else { dir }; + let filename = p + .file_name() + .map(|f| f.to_str().unwrap_or("")) + .unwrap_or(""); + + by_dir.entry(dir).or_default().push(filename); + + let ext = p + .extension() + .map(|e| format!(".{}", e.to_str().unwrap_or(""))) + .unwrap_or_else(|| "no ext".to_string()); + *by_ext.entry(ext).or_default() += 1; + } + + let mut dirs: Vec<_> = by_dir.keys().copied().collect(); + dirs.sort(); + + let total = paths.len(); + let mut out = format!("{}F {}D:\n\n", total, dirs.len()); + + let mut shown = 0; + let max_results = 50; + + for dir in &dirs { + if shown >= max_results { + break; + } + + let files_in_dir = &by_dir[dir]; + let dir_display = compact_path(dir); + + let remaining = max_results - shown; + if files_in_dir.len() <= remaining { + out.push_str(&format!("{}/ {}\n", dir_display, files_in_dir.join(" "))); + shown += files_in_dir.len(); + } else { + let partial: Vec<&str> = files_in_dir.iter().take(remaining).copied().collect(); + out.push_str(&format!("{}/ {}\n", dir_display, partial.join(" "))); + shown += partial.len(); + break; + } + } + + if shown < total { + out.push_str(&format!("+{} more\n", total - shown)); + } + + if by_ext.len() > 1 { + let mut exts: Vec<_> = by_ext.iter().collect(); + exts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let ext_parts: Vec = exts + .iter() + .take(5) + .map(|(e, c)| format!("{}({})", e, c)) + .collect(); + out.push_str(&format!("\next: {}\n", ext_parts.join(" "))); + } + + out +} + +fn compact_path(path: &str) -> String { + if path.len() <= 50 { + return path.to_string(); + } + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() <= 3 { + return path.to_string(); + } + format!( + "{}/.../{}/{}", + parts[0], + parts[parts.len() - 2], + parts[parts.len() - 1] + ) +} + +#[derive(Clone)] +struct ParsedLine { + file: String, + line_num: usize, + content: String, + is_match: bool, +} + +fn compact_rg(raw: &str) -> Option { + let mut by_file: BTreeMap> = BTreeMap::new(); + let mut total = 0; + + let mut current_file: Option = None; + let mut known_files: HashSet = HashSet::new(); + + // First pass: collect match lines (with file prefix or heading context). + for line in raw.lines() { + if line == "--" { + continue; + } + + if let Some(heading) = parse_heading_line(line) { + current_file = Some(heading); + continue; + } + + // In heading mode, prefer unprefixed parse so "10:content" lines stay + // associated with the current heading file rather than being treated as + // prefixed matches with a numeric filename. + if let Some(ref file) = current_file + && let Some(parsed) = parse_unprefixed_line(line, file) + { + if parsed.is_match { + known_files.insert(parsed.file.clone()); + total += 1; + by_file.entry(parsed.file).or_default().push(( + parsed.line_num, + truncate_line(parsed.content.trim(), MAX_LINE_LEN), + true, + )); + } + continue; + } + + if let Some(parsed) = parse_prefixed_match_line(line) { + known_files.insert(parsed.file.clone()); + total += 1; + by_file.entry(parsed.file).or_default().push(( + parsed.line_num, + truncate_line(parsed.content.trim(), MAX_LINE_LEN), + true, + )); + } + } + + // Second pass: collect context lines now that we know filenames. + current_file = None; + for line in raw.lines() { + if line == "--" { + continue; + } + + // Check prefixed context lines before heading detection so that lines like + // "src/foo.rs-3-content" are not mistakenly consumed as heading lines. + if let Some(parsed) = parse_prefixed_context_line(line, &known_files) { + total += 1; + by_file.entry(parsed.file).or_default().push(( + parsed.line_num, + truncate_line(parsed.content.trim(), MAX_LINE_LEN), + false, + )); + continue; + } + + if let Some(heading) = parse_heading_line(line) { + current_file = Some(heading); + continue; + } + + if let Some(file) = current_file.clone() + && let Some(parsed) = parse_unprefixed_line(line, &file) + && !parsed.is_match + { + total += 1; + by_file.entry(parsed.file).or_default().push(( + parsed.line_num, + truncate_line(parsed.content.trim(), MAX_LINE_LEN), + false, + )); + } + } + + if total < MIN_LINES_FOR_COMPRESSION { + return None; + } + + let mut out = format!("{} in {}F:\n\n", total, by_file.len()); + + for (file, matches) in &by_file { + let file_display = compact_path(file); + out.push_str(&format!("{} ({}):\n", file_display, matches.len())); + + let selected = select_lines(matches, MAX_MATCHES_PER_FILE); + for (line_num, content, is_match) in &selected { + if *line_num > 0 { + let sep = if *is_match { ':' } else { '-' }; + out.push_str(&format!(" {:>4}{} {}\n", line_num, sep, content)); + } else { + out.push_str(&format!(" {}\n", content)); + } + } + + out.push('\n'); + } + + Some(out) +} + +fn parse_heading_line(line: &str) -> Option { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed == "--" { + return None; + } + let first = trimmed.as_bytes().first().copied(); + let starts_with_digit = first.map(|b| b.is_ascii_digit()).unwrap_or(false); + if starts_with_digit { + return None; + } + if trimmed.contains(':') { + return None; + } + Some(trimmed.to_string()) +} + +fn parse_prefixed_match_line(line: &str) -> Option { + let parts: Vec<&str> = line.splitn(4, ':').collect(); + if parts.len() >= 3 { + if let Ok(ln) = parts[1].trim().parse::() { + if parts.len() == 4 && parts[2].trim().parse::().is_ok() { + return Some(ParsedLine { + file: parts[0].to_string(), + line_num: ln, + content: parts[3].to_string(), + is_match: true, + }); + } + if parts.len() == 4 { + return Some(ParsedLine { + file: parts[0].to_string(), + line_num: ln, + content: format!("{}:{}", parts[2], parts[3]), + is_match: true, + }); + } + if parts.len() == 3 { + return Some(ParsedLine { + file: parts[0].to_string(), + line_num: ln, + content: parts[2].to_string(), + is_match: true, + }); + } + } + if parts.len() >= 2 { + return Some(ParsedLine { + file: parts[0].to_string(), + line_num: 0, + content: line[parts[0].len() + 1..].to_string(), + is_match: true, + }); + } + } + if parts.len() == 2 { + return Some(ParsedLine { + file: parts[0].to_string(), + line_num: 0, + content: parts[1].to_string(), + is_match: true, + }); + } + None +} + +fn parse_prefixed_context_line(line: &str, known_files: &HashSet) -> Option { + for file in known_files { + let prefix = format!("{}-", file); + if let Some(rest) = line.strip_prefix(&prefix) { + let mut split = rest.splitn(3, '-'); + let first = split.next().unwrap_or(""); + if let Ok(ln) = first.parse::() { + if let Some(second) = split.next() { + if let Some(third) = split.next() { + if second.parse::().is_ok() { + return Some(ParsedLine { + file: file.clone(), + line_num: ln, + content: third.to_string(), + is_match: false, + }); + } + return Some(ParsedLine { + file: file.clone(), + line_num: ln, + content: format!("{}-{}", second, third), + is_match: false, + }); + } + return Some(ParsedLine { + file: file.clone(), + line_num: ln, + content: second.to_string(), + is_match: false, + }); + } + return Some(ParsedLine { + file: file.clone(), + line_num: ln, + content: String::new(), + is_match: false, + }); + } + return Some(ParsedLine { + file: file.clone(), + line_num: 0, + content: rest.to_string(), + is_match: false, + }); + } + } + None +} + +fn parse_unprefixed_line(line: &str, file: &str) -> Option { + let trimmed = line.trim(); + if trimmed.is_empty() { + return None; + } + let first = trimmed.as_bytes().first().copied(); + let starts_with_digit = first.map(|b| b.is_ascii_digit()).unwrap_or(false); + if !starts_with_digit { + return None; + } + + if let Some(colon_pos) = trimmed.find(':') { + let (left, rest) = trimmed.split_at(colon_pos); + if let Ok(ln) = left.parse::() { + let rest = &rest[1..]; + if let Some(next_colon) = rest.find(':') { + let (maybe_col, content) = rest.split_at(next_colon); + if maybe_col.parse::().is_ok() { + return Some(ParsedLine { + file: file.to_string(), + line_num: ln, + content: content[1..].to_string(), + is_match: true, + }); + } + } + return Some(ParsedLine { + file: file.to_string(), + line_num: ln, + content: rest.to_string(), + is_match: true, + }); + } + } + + if let Some(dash_pos) = trimmed.find('-') { + let (left, rest) = trimmed.split_at(dash_pos); + if let Ok(ln) = left.parse::() { + return Some(ParsedLine { + file: file.to_string(), + line_num: ln, + content: rest[1..].to_string(), + is_match: false, + }); + } + } + + None +} + +fn select_lines(matches: &[(usize, String, bool)], max: usize) -> Vec<(usize, String, bool)> { + if matches.len() <= max { + return matches.to_vec(); + } + + let match_lines: Vec<_> = matches + .iter() + .filter(|(_, _, is_match)| *is_match) + .collect(); + if match_lines.len() >= max { + return match_lines.into_iter().take(max).cloned().collect(); + } + + let context_budget = max - match_lines.len(); + let mut selected: Vec<(usize, String, bool)> = Vec::new(); + + for entry in match_lines { + selected.push(entry.clone()); + } + + for (added, entry) in matches + .iter() + .filter(|(_, _, is_match)| !*is_match) + .enumerate() + { + if added >= context_budget { + break; + } + selected.push(entry.clone()); + } + + selected +} + +fn truncate_line(line: &str, max_len: usize) -> String { + if line.len() <= max_len { + line.to_string() + } else { + let end = max_len.saturating_sub(3); + let end = line.floor_char_boundary(end); + format!("{}...", &line[..end]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_files_with_matches_compressed() { + let input = (0..12) + .map(|i| format!("src/dir/file{}.rs", i)) + .collect::>() + .join("\n"); + let compressor = RgCompressor; + let result = compressor + .compress("rg --files-with-matches foo .", &input) + .unwrap(); + assert!(result.contains("12F")); + assert!(result.contains("src/dir/")); + } + + #[test] + fn test_heading_output() { + let input = "\ +src/main.rs +10:fn main() { +11: println!(\"hi\"); +12: println!(\"hi2\"); +13: println!(\"hi3\"); +14: println!(\"hi4\"); +src/lib.rs +3:pub fn lib() { +4: println!(\"lib\"); +5: println!(\"lib2\"); +6: println!(\"lib3\"); +7: println!(\"lib4\"); +"; + let result = compact_rg(input).unwrap(); + assert!(result.contains("src/main.rs")); + assert!(result.contains("src/lib.rs")); + assert!(result.contains("10: fn main()")); + } + + #[test] + fn test_vimgrep_output() { + let input = "\ +src/main.rs:10:5:fn main() { +src/main.rs:11:2:println!(\"hi\"); +src/lib.rs:3:1:pub fn lib() { +src/lib.rs:4:1:println!(\"lib\"); +src/util.rs:1:1:fn util() { +src/util.rs:2:1:fn util2() { +src/util.rs:3:1:fn util3() { +src/util.rs:4:1:fn util4() { +src/util.rs:5:1:fn util5() { +src/util.rs:6:1:fn util6() { +"; + let result = compact_rg(input).unwrap(); + assert!(result.contains("src/main.rs")); + assert!(result.contains("10: fn main()")); + } + + #[test] + fn test_standard_prefixed_output() { + // Standard rg output: file:line:content (no column numbers) + let input = "\ +core/src/compression/strategy/bash/rg.rs:12:use super::BashCompressor; +core/src/compression/strategy/bash/rg.rs:20:impl BashCompressor for RgCompressor { +core/src/compression/strategy/bash/rg.rs:25: fn compress(&self, command: &str, output: &str) -> Option { +core/src/compression/strategy/bash/rg.rs:30: compact_rg(trimmed) +core/src/compression/strategy/bash/mod.rs:10:pub trait BashCompressor { +core/src/compression/strategy/bash/mod.rs:11: fn compress(&self, command: &str, output: &str) -> Option; +core/src/compression/strategy/bash/mod.rs:20:pub struct BashCompressorRegistry; +core/src/compression/strategy/bash/mod.rs:38: fn get(&self, name: &str) -> Option<&dyn BashCompressor> { +core/src/compression/strategy/bash/curl.rs:6:use super::BashCompressor; +core/src/compression/strategy/bash/curl.rs:13:impl BashCompressor for CurlCompressor { +"; + let result = compact_rg(input).unwrap(); + assert!(result.contains("10 in 3F:")); + assert!(result.contains("core/src/compression/strategy/bash/rg.rs")); + assert!(result.contains("core/src/compression/strategy/bash/mod.rs")); + assert!(result.contains("12: use super::BashCompressor;")); + } + + #[test] + fn test_context_output() { + // rg -C 1 output: file-line-content for context, file:line:content for matches + let input = "\ +src/a.rs-1-// preamble +src/a.rs:2:fn foo() { +src/a.rs-3- let x = 1; +-- +src/b.rs-9-// helper +src/b.rs:10:pub fn bar() { +src/b.rs-11- return 1; +-- +src/c.rs-4-// util +src/c.rs:5:fn baz() { +src/c.rs-6- return 42; +-- +src/d.rs:1:fn extra() {} +src/d.rs:2:fn extra2() {} +src/d.rs:3:fn extra3() {} +"; + let result = compact_rg(input).unwrap(); + assert!(result.contains("src/a.rs")); + assert!(result.contains("src/b.rs")); + // context lines use '-' separator in output + assert!(result.contains('-')); + // match lines use ':' separator + assert!(result.contains(':')); + } + + #[test] + fn test_below_threshold_returns_none() { + // Fewer than MIN_LINES_FOR_COMPRESSION (10) lines → no compression + let input = "\ +src/a.rs:1:use foo; +src/b.rs:2:use bar; +src/c.rs:3:use baz; +"; + let result = compact_rg(input); + assert!(result.is_none()); + } + + #[test] + fn test_empty_output_returns_none() { + let compressor = RgCompressor; + assert!(compressor.compress("rg foo .", "").is_none()); + assert!(compressor.compress("rg foo .", " \n ").is_none()); + } + + #[test] + fn test_short_l_flag_files_only() { + // -l short flag should trigger file list compression + let input = (0..12) + .map(|i| format!("src/module{}/lib.rs", i)) + .collect::>() + .join("\n"); + let compressor = RgCompressor; + let result = compressor.compress("rg -l somepattern .", &input).unwrap(); + assert!(result.contains("12F")); + } + + #[test] + fn test_files_flag_files_only() { + // --files flag (list all files without searching) triggers file list path + let input = (0..12) + .map(|i| format!("src/dir{}/mod.rs", i)) + .collect::>() + .join("\n"); + let compressor = RgCompressor; + let result = compressor.compress("rg --files .", &input).unwrap(); + assert!(result.contains("12F")); + } + + #[test] + fn test_combined_short_flags_files_only() { + // -lS combined flags should still trigger files-only mode + let input = (0..12) + .map(|i| format!("src/pkg{}/main.rs", i)) + .collect::>() + .join("\n"); + let compressor = RgCompressor; + let result = compressor.compress("rg -lS pattern src/", &input).unwrap(); + assert!(result.contains("12F")); + } + + #[test] + fn test_max_matches_per_file_truncated() { + // More than MAX_MATCHES_PER_FILE (10) matches in one file — count shown but lines capped + let input = (1..=15) + .map(|i| format!("src/big.rs:{}:match line {}", i, i)) + .collect::>() + .join("\n"); + let result = compact_rg(&input).unwrap(); + // header should reflect total matches + assert!(result.contains("15 in 1F:")); + // file section shows true count + assert!(result.contains("src/big.rs (15):")); + // only MAX_MATCHES_PER_FILE (10) lines displayed (indented with leading spaces) + let displayed = result + .lines() + .filter(|l| l.starts_with(" ") && !l.trim().is_empty()) + .count(); + assert_eq!(displayed, 10); + } + + #[test] + fn test_long_line_truncated() { + // Lines longer than MAX_LINE_LEN (120) should be truncated with "..." + let long_content = "x".repeat(200); + let input = (1..=10) + .map(|i| format!("src/long.rs:{}:{}", i, long_content)) + .collect::>() + .join("\n"); + let result = compact_rg(&input).unwrap(); + assert!(result.contains("...")); + // No line in the output should exceed MAX_LINE_LEN significantly + for line in result.lines() { + assert!(line.len() <= MAX_LINE_LEN + 20); // allow for line prefix overhead + } + } + + #[test] + fn test_file_list_many_files_shows_more() { + // More than 50 files in file list should show "+X more" + let input = (0..60) + .map(|i| format!("src/gen/file{}.rs", i)) + .collect::>() + .join("\n"); + let compressor = RgCompressor; + let result = compressor + .compress("rg --files-with-matches foo .", &input) + .unwrap(); + assert!(result.contains("60F")); + assert!(result.contains("+10 more")); + } + + #[test] + fn test_file_list_multiple_extensions() { + // File list with multiple extensions shows extension summary + let mut files: Vec = (0..6).map(|i| format!("src/file{}.rs", i)).collect(); + files.extend((0..4).map(|i| format!("tests/test{}.py", i))); + let input = files.join("\n"); + let compressor = RgCompressor; + let result = compressor.compress("rg -l pattern .", &input).unwrap(); + assert!(result.contains(".rs(6)")); + assert!(result.contains(".py(4)")); + } + + #[test] + fn test_multi_file_summary_header() { + // Verify summary header format: "{N} in {M}F:" + let input = "\ +src/alpha.rs:1:fn alpha() {} +src/alpha.rs:2:fn alpha2() {} +src/alpha.rs:3:fn alpha3() {} +src/alpha.rs:4:fn alpha4() {} +src/alpha.rs:5:fn alpha5() {} +src/beta.rs:10:fn beta() {} +src/beta.rs:11:fn beta2() {} +src/beta.rs:12:fn beta3() {} +src/beta.rs:13:fn beta4() {} +src/beta.rs:14:fn beta5() {} +"; + let result = compact_rg(input).unwrap(); + assert!(result.starts_with("10 in 2F:")); + assert!(result.contains("src/alpha.rs (5):")); + assert!(result.contains("src/beta.rs (5):")); + } +} diff --git a/crates/compressor/src/strategy/bash/tree.rs b/crates/compressor/src/strategy/bash/tree.rs new file mode 100644 index 0000000..40290f8 --- /dev/null +++ b/crates/compressor/src/strategy/bash/tree.rs @@ -0,0 +1,285 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `tree` command output. +//! +//! Removes summary lines and trailing empty lines to reduce token usage +//! while preserving the directory structure visualization. + +use super::BashCompressor; + +/// Directories that are noise for LLM context. +const NOISE_DIRS: &[&str] = &[ + "node_modules", + ".git", + "target", + "__pycache__", + ".next", + "dist", + "build", + ".cache", + ".turbo", + ".vercel", + ".pytest_cache", + ".mypy_cache", + ".tox", + ".venv", + "venv", + "env", + ".env", + "coverage", + ".nyc_output", + ".DS_Store", + "Thumbs.db", + ".idea", + ".vscode", + ".vs", + "*.egg-info", + ".eggs", +]; + +pub struct TreeCompressor; + +impl BashCompressor for TreeCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + // Always compress tree output (no format detection needed like ls -l) + Some(filter_tree_output(output)) + } +} + +fn filter_tree_output(raw: &str) -> String { + let lines: Vec<&str> = raw.lines().collect(); + + if lines.is_empty() { + return "\n".to_string(); + } + + let mut filtered_lines = Vec::new(); + let mut skip_depth: Option = None; + + for line in lines { + // Skip the final summary line (e.g., "5 directories, 23 files") + if line.contains("director") && line.contains("file") { + continue; + } + + // Skip empty lines at the end + if line.trim().is_empty() && filtered_lines.is_empty() { + continue; + } + + // Calculate indentation depth (number of tree characters before content) + let depth = line + .chars() + .take_while(|c| *c == ' ' || *c == '│' || *c == '├' || *c == '└' || *c == '─') + .count(); + + // If we're skipping a noise directory, skip all nested content + if let Some(skip_d) = skip_depth { + if depth > skip_d { + continue; // Still inside the noise directory + } else { + skip_depth = None; // Exited the noise directory + } + } + + // Extract the actual filename/dirname from tree's formatted output + let trimmed = line + .trim_start_matches(|c: char| { + c.is_whitespace() || c == '│' || c == '├' || c == '└' || c == '─' + }) + .trim(); + + // Check if this line is a noise directory + let is_noise = NOISE_DIRS.iter().any(|noise| { + // Check exact match or wildcard pattern match + if noise.starts_with('*') { + let suffix = noise.trim_start_matches('*'); + trimmed.ends_with(suffix) + } else { + trimmed == *noise || trimmed.starts_with(&format!("{}/", noise)) + } + }); + + if is_noise { + skip_depth = Some(depth); // Start skipping this directory and its children + continue; + } + + filtered_lines.push(line); + } + + // Remove trailing empty lines + while filtered_lines.last().is_some_and(|l| l.trim().is_empty()) { + filtered_lines.pop(); + } + + filtered_lines.join("\n") + "\n" +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_removes_summary() { + let input = ".\n├── src\n│ └── main.rs\n└── Cargo.toml\n\n2 directories, 3 files\n"; + let output = filter_tree_output(input); + assert!(!output.contains("directories")); + assert!(!output.contains("files")); + assert!(output.contains("main.rs")); + assert!(output.contains("Cargo.toml")); + } + + #[test] + fn test_filter_preserves_structure() { + let input = ".\n├── src\n│ ├── main.rs\n│ └── lib.rs\n└── tests\n └── test.rs\n"; + let output = filter_tree_output(input); + assert!(output.contains("├──")); + assert!(output.contains("│")); + assert!(output.contains("└──")); + assert!(output.contains("main.rs")); + assert!(output.contains("test.rs")); + } + + #[test] + fn test_filter_handles_empty() { + let input = ""; + let output = filter_tree_output(input); + assert_eq!(output, "\n"); + } + + #[test] + fn test_filter_removes_trailing_empty_lines() { + let input = ".\n├── file.txt\n\n\n"; + let output = filter_tree_output(input); + assert_eq!(output.matches('\n').count(), 2); // Root + file.txt + final newline + } + + #[test] + fn test_filter_summary_variations() { + // Test different summary formats + let inputs = vec![ + (".\n└── file.txt\n\n0 directories, 1 file\n", "1 file"), + (".\n└── file.txt\n\n1 directory, 0 files\n", "1 directory"), + (".\n└── file.txt\n\n10 directories, 25 files\n", "25 files"), + ]; + + for (input, summary_fragment) in inputs { + let output = filter_tree_output(input); + assert!( + !output.contains(summary_fragment), + "Should remove summary '{}' from output", + summary_fragment + ); + assert!( + output.contains("file.txt"), + "Should preserve file.txt in output" + ); + } + } + + #[test] + fn test_noise_dirs_constant() { + // Verify NOISE_DIRS contains expected patterns + assert!(NOISE_DIRS.contains(&"node_modules")); + assert!(NOISE_DIRS.contains(&".git")); + assert!(NOISE_DIRS.contains(&"target")); + assert!(NOISE_DIRS.contains(&"__pycache__")); + assert!(NOISE_DIRS.contains(&".next")); + assert!(NOISE_DIRS.contains(&"dist")); + assert!(NOISE_DIRS.contains(&"build")); + } + + #[test] + fn test_compressor_compresses_tree_output() { + let compressor = TreeCompressor; + let input = ".\n├── src\n│ └── main.rs\n└── Cargo.toml\n\n2 directories, 3 files\n"; + let result = compressor.compress("tree", input); + assert!(result.is_some()); + let output = result.unwrap(); + assert!(output.contains("src")); + assert!(output.contains("main.rs")); + assert!(output.contains("Cargo.toml")); + assert!(!output.contains("directories")); + assert!(!output.contains("files")); + } + + #[test] + fn test_compressor_handles_empty_output() { + let compressor = TreeCompressor; + let result = compressor.compress("tree", ""); + assert!(result.is_some()); + assert_eq!(result.unwrap(), "\n"); + } + + #[test] + fn test_compressor_preserves_structure_chars() { + let compressor = TreeCompressor; + let input = ".\n├── dir1\n│ ├── file1.txt\n│ └── file2.txt\n└── dir2\n └── file3.txt\n\n2 directories, 3 files\n"; + let result = compressor.compress("tree -L 2", input); + assert!(result.is_some()); + let output = result.unwrap(); + assert!(output.contains("├──")); + assert!(output.contains("│")); + assert!(output.contains("└──")); + } + + #[test] + fn test_filter_removes_noise_dirs() { + let input = ".\n├── src\n│ └── main.rs\n├── node_modules\n│ └── package\n├── target\n│ └── debug\n└── .git\n └── config\n\n5 directories, 3 files\n"; + let output = filter_tree_output(input); + assert!(output.contains("src")); + assert!(output.contains("main.rs")); + assert!(!output.contains("node_modules")); + assert!(!output.contains("target")); + assert!(!output.contains(".git")); + } + + #[test] + fn test_filter_removes_noise_dirs_nested() { + let input = ".\n├── src\n│ ├── main.rs\n│ └── lib.rs\n├── dist\n│ ├── bundle.js\n│ └── bundle.css\n├── .next\n│ └── cache\n└── build\n └── output\n"; + let output = filter_tree_output(input); + assert!(output.contains("src")); + assert!(output.contains("main.rs")); + assert!(output.contains("lib.rs")); + assert!(!output.contains("dist")); + assert!(!output.contains("bundle.js")); + assert!(!output.contains(".next")); + assert!(!output.contains("build")); + } + + #[test] + fn test_filter_removes_wildcard_patterns() { + let input = ".\n├── src\n│ └── main.rs\n├── mypackage.egg-info\n│ └── PKG-INFO\n└── .eggs\n └── package\n"; + let output = filter_tree_output(input); + assert!(output.contains("src")); + assert!(output.contains("main.rs")); + assert!(!output.contains(".egg-info")); + assert!(!output.contains(".eggs")); + } + + #[test] + fn test_filter_preserves_similar_names() { + // Ensure we don't accidentally filter legitimate directories + let input = ".\n├── src\n│ └── main.rs\n├── targets\n│ └── file.txt\n└── node_modules_backup\n └── package.json\n"; + let output = filter_tree_output(input); + // These should be preserved because they're not exact matches + assert!(output.contains("targets")); + assert!(output.contains("node_modules_backup")); + } +} diff --git a/crates/compressor/src/strategy/bash/tsc.rs b/crates/compressor/src/strategy/bash/tsc.rs new file mode 100644 index 0000000..9f52e9c --- /dev/null +++ b/crates/compressor/src/strategy/bash/tsc.rs @@ -0,0 +1,246 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for `tsc` (TypeScript compiler) output. +//! +//! Groups TypeScript errors by file, shows error codes and messages, +//! and provides a summary with top error codes. + +use std::collections::HashMap; + +use super::BashCompressor; + +pub struct TscCompressor; + +impl BashCompressor for TscCompressor { + fn compress(&self, _command: &str, output: &str) -> Option { + if output.trim().is_empty() { + return None; + } + + Some(filter_tsc_output(output)) + } +} + +struct TsError { + file: String, + line: usize, + code: String, + message: String, + context_lines: Vec, +} + +/// Filter TypeScript compiler output — group errors by file. +fn filter_tsc_output(output: &str) -> String { + let mut errors: Vec = Vec::new(); + let lines: Vec<&str> = output.lines().collect(); + let mut i = 0; + + while i < lines.len() { + let line = lines[i]; + if let Some(err) = parse_tsc_error(line) { + let mut ts_err = err; + + // Capture continuation lines (indented context from tsc) + i += 1; + while i < lines.len() { + let next = lines[i]; + if !next.is_empty() + && (next.starts_with(" ") || next.starts_with('\t')) + && parse_tsc_error(next).is_none() + { + ts_err.context_lines.push(next.trim().to_string()); + i += 1; + } else { + break; + } + } + + errors.push(ts_err); + } else { + i += 1; + } + } + + if errors.is_empty() { + if output.contains("Found 0 errors") { + return "TypeScript: No errors found".to_string(); + } + return output.to_string(); + } + + // Group by file + let mut by_file: HashMap> = HashMap::new(); + for err in &errors { + by_file.entry(err.file.clone()).or_default().push(err); + } + + // Count by error code + let mut by_code: HashMap = HashMap::new(); + for err in &errors { + *by_code.entry(err.code.clone()).or_insert(0) += 1; + } + + let mut result = format!( + "TypeScript: {} errors in {} files\n", + errors.len(), + by_file.len() + ); + + // Top error codes summary + let mut code_counts: Vec<_> = by_code.iter().collect(); + code_counts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + + if code_counts.len() > 1 { + let codes_str: Vec = code_counts + .iter() + .take(5) + .map(|(code, count)| format!("{} ({}x)", code, count)) + .collect(); + result.push_str(&format!("Top codes: {}\n\n", codes_str.join(", "))); + } + + // Files sorted by error count + let mut files_sorted: Vec<_> = by_file.iter().collect(); + files_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + + for (file, file_errors) in &files_sorted { + result.push_str(&format!("{} ({} errors)\n", file, file_errors.len())); + + for err in *file_errors { + result.push_str(&format!( + " L{}: {} {}\n", + err.line, + err.code, + truncate(&err.message, 120) + )); + for ctx in &err.context_lines { + result.push_str(&format!(" {}\n", truncate(ctx, 120))); + } + } + result.push('\n'); + } + + result.trim().to_string() +} + +/// Parse a tsc error line like: src/file.ts(12,5): error TS2322: Type 'string' is not assignable. +fn parse_tsc_error(line: &str) -> Option { + // Find the pattern: file(line,col): error TSxxxx: message + let paren_start = line.find('(')?; + let paren_end = line[paren_start..].find(')')? + paren_start; + + let file = &line[..paren_start]; + let coords = &line[paren_start + 1..paren_end]; + + let after_paren = &line[paren_end + 1..]; + if !after_paren.contains("error TS") && !after_paren.contains("warning TS") { + return None; + } + + let line_num: usize = coords.split(',').next()?.parse().ok()?; + + // Extract TS code + let ts_start = after_paren.find("TS")?; + let code_start = ts_start; + let code_end = after_paren[code_start..] + .find(':') + .map(|i| i + code_start) + .unwrap_or(after_paren.len()); + let code = after_paren[code_start..code_end].trim().to_string(); + + let message = if code_end < after_paren.len() { + after_paren[code_end + 1..].trim().to_string() + } else { + String::new() + }; + + Some(TsError { + file: file.trim().to_string(), + line: line_num, + code, + message, + context_lines: Vec::new(), + }) +} + +fn truncate(s: &str, max: usize) -> String { + if s.len() <= max { + s.to_string() + } else { + format!("{}...", &s[..s.floor_char_boundary(max.saturating_sub(3))]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_tsc_output() { + let output = "src/server/api/auth.ts(12,5): error TS2322: Type 'string' is not assignable to type 'number'.\nsrc/server/api/auth.ts(15,10): error TS2345: Argument of type 'number' is not assignable to parameter of type 'string'.\nsrc/components/Button.tsx(8,3): error TS2339: Property 'onClick' does not exist on type 'ButtonProps'.\nsrc/components/Button.tsx(10,5): error TS2322: Type 'string' is not assignable to type 'number'.\n\nFound 4 errors in 2 files.\n"; + let result = filter_tsc_output(output); + assert!(result.contains("TypeScript: 4 errors in 2 files")); + assert!(result.contains("auth.ts (2 errors)")); + assert!(result.contains("Button.tsx (2 errors)")); + assert!(result.contains("TS2322")); + } + + #[test] + fn test_every_error_message_shown() { + let output = "src/api.ts(10,5): error TS2322: Type 'string' is not assignable to type 'number'.\nsrc/api.ts(20,5): error TS2322: Type 'boolean' is not assignable to type 'string'.\nsrc/api.ts(30,5): error TS2322: Type 'null' is not assignable to type 'object'.\n"; + let result = filter_tsc_output(output); + assert!(result.contains("Type 'string' is not assignable to type 'number'")); + assert!(result.contains("Type 'boolean' is not assignable to type 'string'")); + assert!(result.contains("Type 'null' is not assignable to type 'object'")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + assert!(result.contains("L30:")); + } + + #[test] + fn test_no_errors() { + let output = "Found 0 errors. Watching for file changes."; + let result = filter_tsc_output(output); + assert!(result.contains("No errors found")); + } + + #[test] + fn test_parse_tsc_error() { + let line = "src/file.ts(12,5): error TS2322: Type 'string' is not assignable."; + let err = parse_tsc_error(line).unwrap(); + assert_eq!(err.file, "src/file.ts"); + assert_eq!(err.line, 12); + assert_eq!(err.code, "TS2322"); + assert!(err.message.contains("Type 'string'")); + } + + #[test] + fn test_parse_tsc_error_not_tsc() { + assert!(parse_tsc_error("normal log output").is_none()); + assert!(parse_tsc_error("src/file.ts: some other message").is_none()); + } + + #[test] + fn test_continuation_lines() { + let output = "src/app.tsx(10,3): error TS2322: Type '{ children: Element; }' is not assignable to type 'Props'.\n Property 'children' does not exist on type 'Props'.\nsrc/app.tsx(20,5): error TS2345: Argument of type 'number' is not assignable to parameter of type 'string'.\n"; + let result = filter_tsc_output(output); + assert!(result.contains("Property 'children' does not exist on type 'Props'")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + } +} diff --git a/crates/compressor/src/strategy/claude/bash.rs b/crates/compressor/src/strategy/claude/bash.rs new file mode 100644 index 0000000..62cd73e --- /dev/null +++ b/crates/compressor/src/strategy/claude/bash.rs @@ -0,0 +1,223 @@ +//! Compressor for the Claude Code `Bash` tool output. +//! +//! Extracts the shell command from the tool call arguments JSON, +//! then delegates to the per-command compressors in `bash/`. + +use super::ToolCompressor; + +pub struct BashCompressor; + +impl ToolCompressor for BashCompressor { + fn compress(&self, arguments: &str, output: &str) -> Option { + let command = extract_command(arguments)?; + + // Bundled commands (&&, ||, ;) produce concatenated output from multiple + // sub-commands with no reliable delimiters between sections. Compressing + // would risk silently discarding output from earlier sub-commands. + // Pipes (|) are fine — only the last command's output is captured. + if contains_shell_operators(&command) { + return None; + } + + let base_command = command.split_whitespace().next().unwrap_or(""); + let compressor = crate::strategy::bash::compressor_for(base_command)?; + compressor.compress(&command, output) + } +} + +/// Returns `true` if the command contains shell bundling operators (`&&`, `||`, `;`) +/// outside of single or double quotes. Single pipes (`|`) are not considered bundling +/// operators — piped commands produce output only from the last stage. +fn contains_shell_operators(command: &str) -> bool { + let mut in_single = false; + let mut in_double = false; + let mut chars = command.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + '\\' if !in_single => { + // Skip the next character (it is escaped) + chars.next(); + } + ';' if !in_single && !in_double => return true, + '&' if !in_single && !in_double => { + if chars.peek() == Some(&'&') { + return true; + } + // Single `&` (background operator) is not a bundling operator + } + '|' if !in_single && !in_double => { + if chars.peek() == Some(&'|') { + return true; + } + // Single `|` is a pipe, not a bundling operator + } + _ => {} + } + } + + false +} + +/// Extract the shell command from Bash tool call arguments JSON. +/// Arguments are expected to be `{"command": "..."}`. +fn extract_command(arguments: &str) -> Option { + serde_json::from_str::(arguments) + .ok() + .and_then(|v| match (v.get("command"), v.get("cmd")) { + (Some(command), None) => command.as_str().map(String::from), + (None, Some(cmd)) => cmd.as_str().map(String::from), + (Some(command), Some(cmd)) => { + println!("command: {:?}, cmd: {:?}", command, cmd); + command.as_str().map(String::from) + } + (None, None) => None, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_command() { + let args = r#"{"command": "ls -la /tmp"}"#; + assert_eq!(extract_command(args), Some("ls -la /tmp".to_string())); + } + + #[test] + fn test_extract_command_missing() { + assert_eq!(extract_command("{}"), None); + } + + #[test] + fn test_extract_command_invalid_json() { + assert_eq!(extract_command("not json"), None); + } + + #[test] + fn test_delegates_to_bash_compressor() { + let compressor = BashCompressor; + let args = r#"{"command": "find . -name '*.rs'"}"#; + let output = "src/main.rs\nsrc/lib.rs\ntests/test.rs\n"; + let result = compressor.compress(args, output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(compressed.contains("3F 2D:")); + } + + #[test] + fn test_unknown_command_returns_none() { + let compressor = BashCompressor; + let args = r#"{"command": "echo hello"}"#; + assert!(compressor.compress(args, "hello\n").is_none()); + } + + #[test] + fn test_missing_command_returns_none() { + let compressor = BashCompressor; + assert!(compressor.compress("{}", "some output").is_none()); + } + + // --- contains_shell_operators tests --- + + #[test] + fn test_no_operators() { + assert!(!contains_shell_operators("git diff HEAD")); + } + + #[test] + fn test_pipe_is_not_bundling_operator() { + assert!(!contains_shell_operators("git log | head -10")); + } + + #[test] + fn test_double_ampersand() { + assert!(contains_shell_operators("git log && git diff")); + } + + #[test] + fn test_double_pipe() { + assert!(contains_shell_operators("git status || echo 'failed'")); + } + + #[test] + fn test_semicolon() { + assert!(contains_shell_operators("git log; git diff")); + } + + #[test] + fn test_operators_in_single_quotes() { + assert!(!contains_shell_operators("echo 'a && b'")); + } + + #[test] + fn test_operators_in_double_quotes() { + assert!(!contains_shell_operators(r#"echo "a && b || c ; d""#)); + } + + #[test] + fn test_escaped_semicolon() { + assert!(!contains_shell_operators(r"echo a\; b")); + } + + #[test] + fn test_mixed_quoted_and_unquoted_operators() { + // Quoted operator should not trigger, but the unquoted one should + assert!(contains_shell_operators(r#"echo "a && b" && git diff"#)); + } + + #[test] + fn test_single_ampersand_is_not_bundling() { + assert!(!contains_shell_operators("sleep 1 &")); + } + + // --- Integration tests for bundled commands --- + + #[test] + fn test_bundled_with_and_and_returns_none() { + let compressor = BashCompressor; + let args = r#"{"command": "git log --oneline -10 && git diff"}"#; + let output = "abc1234 some commit\ndiff --git a/file b/file\n"; + assert!(compressor.compress(args, output).is_none()); + } + + #[test] + fn test_bundled_with_semicolon_returns_none() { + let compressor = BashCompressor; + let args = r#"{"command": "ls -la; find . -name '*.rs'"}"#; + let output = "total 42\ndrwxr-xr-x\n./src/main.rs\n"; + assert!(compressor.compress(args, output).is_none()); + } + + #[test] + fn test_bundled_with_or_or_returns_none() { + let compressor = BashCompressor; + let args = r#"{"command": "git status || echo 'failed'"}"#; + let output = "On branch main\n"; + assert!(compressor.compress(args, output).is_none()); + } + + #[test] + fn test_piped_command_still_compresses() { + let compressor = BashCompressor; + let args = r#"{"command": "find . -name '*.rs'"}"#; + let output = "src/main.rs\nsrc/lib.rs\ntests/test.rs\n"; + // find compressor should still work + assert!(compressor.compress(args, output).is_some()); + } + + #[test] + fn test_quoted_operator_in_grep_still_compresses() { + let compressor = BashCompressor; + // The && is inside single quotes — not a real operator + let args = r#"{"command": "grep -rn 'a && b' src/"}"#; + let mut output = String::new(); + for i in 1..=15 { + output.push_str(&format!("src/file{i}.rs:10:a && b found here\n")); + } + assert!(compressor.compress(args, &output).is_some()); + } +} diff --git a/crates/compressor/src/strategy/claude/glob.rs b/crates/compressor/src/strategy/claude/glob.rs new file mode 100644 index 0000000..a1c303e --- /dev/null +++ b/crates/compressor/src/strategy/claude/glob.rs @@ -0,0 +1,220 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for the Claude Code `Glob` tool output. +//! +//! Glob returns file paths, one per line, sorted by modification time. +//! This compressor groups paths by parent directory and adds an extension +//! summary — the same approach used by the bash `find` compressor. + +use std::collections::HashMap; +use std::path::Path; + +use super::ToolCompressor; + +/// Below this threshold, leave output as-is. +const SMALL_THRESHOLD: usize = 30; +/// Maximum paths to show before truncating. +const MAX_RESULTS: usize = 50; + +pub struct GlobCompressor; + +impl ToolCompressor for GlobCompressor { + fn compress(&self, _arguments: &str, output: &str) -> Option { + let lines: Vec<&str> = output.lines().filter(|l| !l.trim().is_empty()).collect(); + + if lines.len() < SMALL_THRESHOLD { + return None; + } + + let compressed = compact_glob(&lines); + Some(compressed) + } +} + +fn compact_glob(paths: &[&str]) -> String { + let mut by_dir: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut by_ext: HashMap = HashMap::new(); + + for path in paths { + let p = Path::new(path); + let dir = p.parent().map(|d| d.to_str().unwrap_or(".")).unwrap_or("."); + let dir = if dir.is_empty() { "." } else { dir }; + let filename = p + .file_name() + .map(|f| f.to_str().unwrap_or("")) + .unwrap_or(""); + + by_dir.entry(dir).or_default().push(filename); + + let ext = p + .extension() + .map(|e| format!(".{}", e.to_str().unwrap_or(""))) + .unwrap_or_else(|| "no ext".to_string()); + *by_ext.entry(ext).or_default() += 1; + } + + let mut dirs: Vec<_> = by_dir.keys().copied().collect(); + dirs.sort(); + + let total = paths.len(); + let mut out = format!("{}F {}D:\n\n", total, dirs.len()); + + let mut shown = 0; + + for dir in &dirs { + if shown >= MAX_RESULTS { + break; + } + + let files_in_dir = &by_dir[dir]; + let dir_display = compact_path(dir); + let remaining = MAX_RESULTS - shown; + + if files_in_dir.len() <= remaining { + out.push_str(&format!("{}/ {}\n", dir_display, files_in_dir.join(" "))); + shown += files_in_dir.len(); + } else { + let partial: Vec<&str> = files_in_dir.iter().take(remaining).copied().collect(); + out.push_str(&format!("{}/ {}\n", dir_display, partial.join(" "))); + shown += partial.len(); + break; + } + } + + if shown < total { + out.push_str(&format!("+{} more\n", total - shown)); + } + + if by_ext.len() > 1 { + let mut exts: Vec<_> = by_ext.iter().collect(); + exts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let ext_parts: Vec = exts + .iter() + .take(5) + .map(|(e, c)| format!("{}({})", e, c)) + .collect(); + out.push_str(&format!("\next: {}\n", ext_parts.join(" "))); + } + + out +} + +fn compact_path(path: &str) -> String { + if path.len() <= 50 { + return path.to_string(); + } + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() <= 3 { + return path.to_string(); + } + format!( + "{}/.../{}/{}", + parts[0], + parts[parts.len() - 2], + parts[parts.len() - 1] + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_small_output_not_compressed() { + let output = "src/main.rs\nsrc/lib.rs\n"; + let compressor = GlobCompressor; + assert!(compressor.compress("{}", output).is_none()); + } + + #[test] + fn test_large_output_compressed() { + let paths: Vec = (0..50) + .map(|i| format!("src/components/file{}.tsx", i)) + .collect(); + let output = paths.join("\n"); + let compressor = GlobCompressor; + let result = compressor.compress("{}", &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(compressed.contains("50F 1D:")); + assert!(compressed.contains("src/components/")); + } + + #[test] + fn test_groups_by_directory() { + let mut paths = Vec::new(); + for i in 0..15 { + paths.push(format!("src/file{}.rs", i)); + } + for i in 0..15 { + paths.push(format!("tests/test{}.rs", i)); + } + let output = paths.join("\n"); + let compressor = GlobCompressor; + let result = compressor.compress("{}", &output).unwrap(); + assert!(result.contains("30F 2D:")); + assert!(result.contains("src/")); + assert!(result.contains("tests/")); + } + + #[test] + fn test_extension_summary() { + let mut paths = Vec::new(); + for i in 0..20 { + paths.push(format!("src/file{}.rs", i)); + } + for i in 0..15 { + paths.push(format!("src/file{}.ts", i)); + } + let output = paths.join("\n"); + let compressor = GlobCompressor; + let result = compressor.compress("{}", &output).unwrap(); + assert!(result.contains("ext:")); + assert!(result.contains(".rs(20)")); + assert!(result.contains(".ts(15)")); + } + + #[test] + fn test_truncates_many_results() { + let paths: Vec = (0..100).map(|i| format!("src/file{}.rs", i)).collect(); + let output = paths.join("\n"); + let compressor = GlobCompressor; + let result = compressor.compress("{}", &output).unwrap(); + assert!(result.contains("100F")); + assert!(result.contains("+50 more")); + } + + #[test] + fn test_empty_output() { + let compressor = GlobCompressor; + assert!(compressor.compress("{}", "").is_none()); + } + + #[test] + fn test_compact_path_short() { + assert_eq!(compact_path("src/main.rs"), "src/main.rs"); + } + + #[test] + fn test_compact_path_long() { + let long = "very/long/deeply/nested/path/to/some/directory/here"; + let result = compact_path(long); + assert!(result.contains("...")); + assert!(result.len() <= long.len()); + } +} diff --git a/crates/compressor/src/strategy/claude/grep.rs b/crates/compressor/src/strategy/claude/grep.rs new file mode 100644 index 0000000..11e46e0 --- /dev/null +++ b/crates/compressor/src/strategy/claude/grep.rs @@ -0,0 +1,1155 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for the Claude Code `Grep` tool output. +//! +//! The Grep tool has three output modes (extractable from the arguments JSON): +//! - `files_with_matches` (default): file paths, one per line → group by directory +//! - `content`: `path:line_num:content` lines → group by file, limit matches +//! - `count`: `path:N` lines → leave as-is (already compact) + +use std::collections::HashMap; +use std::path::Path; + +use super::ToolCompressor; + +const MAX_LINE_LEN: usize = 120; +const MAX_MATCHES_PER_FILE: usize = 10; +const MAX_CONTEXT_PER_MATCH: usize = 5; +const MAX_TOTAL: usize = 50; +const MAX_PATH_LEN: usize = 50; + +pub struct GrepCompressor; + +impl ToolCompressor for GrepCompressor { + fn compress(&self, arguments: &str, output: &str) -> Option { + if output.trim().is_empty() { + tracing::debug!("grep: not compressing - output is empty"); + return None; + } + + let mode = extract_output_mode(arguments); + let pattern = extract_pattern(arguments); + let single_file = extract_single_file_target(arguments); + let context_lines = extract_context_lines(arguments); + + tracing::debug!( + "grep: attempting compression with mode={:?}, pattern={:?}, single_file={:?}, context_lines={}", + mode, + pattern, + single_file, + context_lines + ); + + let result = match mode { + OutputMode::FilesWithMatches => compress_files_with_matches(output), + OutputMode::Content => compress_content( + output, + pattern.as_deref(), + single_file.as_deref(), + context_lines, + ), + OutputMode::Count => { + tracing::debug!("grep: not compressing - count mode already compact"); + None + } + }; + + if result.is_some() { + tracing::debug!("grep: compression successful"); + } else { + tracing::debug!("grep: compression returned None"); + } + + result + } +} + +#[derive(Debug, PartialEq)] +enum OutputMode { + FilesWithMatches, + Content, + Count, +} + +fn extract_output_mode(arguments: &str) -> OutputMode { + let Ok(v) = serde_json::from_str::(arguments) else { + return OutputMode::FilesWithMatches; + }; + match v.get("output_mode").and_then(|v| v.as_str()) { + Some("content") => OutputMode::Content, + Some("count") => OutputMode::Count, + _ => OutputMode::FilesWithMatches, + } +} + +fn extract_pattern(arguments: &str) -> Option { + serde_json::from_str::(arguments) + .ok() + .and_then(|v| v.get("pattern")?.as_str().map(String::from)) +} + +fn extract_single_file_target(arguments: &str) -> Option { + let Ok(v) = serde_json::from_str::(arguments) else { + return None; + }; + + // Check if there's a "path" field that points to a single file + if let Some(path_str) = v.get("path")?.as_str() { + // Heuristic: if the path doesn't end with "/" and doesn't contain wildcards, + // and the output has no filenames (starts with line numbers), treat as single file + if !path_str.ends_with('/') && !path_str.contains('*') { + // Extract just the filename part for prepending + if let Some(filename) = path_str.split('/').next_back() { + return Some(filename.to_string()); + } + } + } + None +} + +/// Extract the context line count from the arguments JSON. +/// Checks `context`, `-C`, `-A`, `-B` fields. +fn extract_context_lines(arguments: &str) -> usize { + let Ok(v) = serde_json::from_str::(arguments) else { + return 0; + }; + + let mut max_ctx: usize = 0; + + // Check "context" and "-C" (synonyms) + for key in &["context", "-C"] { + if let Some(n) = v.get(*key).and_then(|v| v.as_u64()) { + max_ctx = max_ctx.max(n as usize); + } + } + + // Check "-A" (after context) and "-B" (before context) + for key in &["-A", "-B"] { + if let Some(n) = v.get(*key).and_then(|v| v.as_u64()) { + max_ctx = max_ctx.max(n as usize); + } + } + + max_ctx +} + +/// Compress `files_with_matches` output: group paths by directory. +fn compress_files_with_matches(output: &str) -> Option { + let lines: Vec<&str> = output.lines().filter(|l| !l.trim().is_empty()).collect(); + let line_count = lines.len(); + + if line_count < 20 { + return None; + } + + let mut by_dir: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut by_ext: HashMap = HashMap::new(); + + for line in &lines { + let p = Path::new(line); + let dir = p.parent().map(|d| d.to_str().unwrap_or(".")).unwrap_or("."); + let dir = if dir.is_empty() { "." } else { dir }; + let filename = p + .file_name() + .map(|f| f.to_str().unwrap_or("")) + .unwrap_or(""); + + by_dir.entry(dir).or_default().push(filename); + + let ext = p + .extension() + .map(|e| format!(".{}", e.to_str().unwrap_or(""))) + .unwrap_or_else(|| "no ext".to_string()); + *by_ext.entry(ext).or_default() += 1; + } + + let mut dirs: Vec<_> = by_dir.keys().copied().collect(); + dirs.sort(); + + let total = lines.len(); + let mut out = format!("{}F {}D:\n\n", total, dirs.len()); + + let mut shown = 0; + let max_results = MAX_TOTAL; + + for dir in &dirs { + if shown >= max_results { + break; + } + + let files_in_dir = &by_dir[dir]; + let remaining = max_results - shown; + + if files_in_dir.len() <= remaining { + out.push_str(&format!("{}/ {}\n", dir, files_in_dir.join(" "))); + shown += files_in_dir.len(); + } else { + let partial: Vec<&str> = files_in_dir.iter().take(remaining).copied().collect(); + out.push_str(&format!("{}/ {}\n", dir, partial.join(" "))); + break; + } + } + + // Truncation indicator removed — directory grouping already shows relevant results + + if by_ext.len() > 1 { + let mut exts: Vec<_> = by_ext.iter().collect(); + exts.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let ext_parts: Vec = exts + .iter() + .take(5) + .map(|(e, c)| format!("{}({})", e, c)) + .collect(); + out.push_str(&format!("\next: {}\n", ext_parts.join(" "))); + } + + Some(out) +} + +/// A parsed grep output line (match or context). +struct GrepOutputLine<'a> { + file: &'a str, + line_num: usize, + content: &'a str, + #[allow(dead_code)] + is_match: bool, +} + +/// Normalize file path: convert absolute paths to relative if possible. +fn normalize_path(path: &str) -> &str { + // If it starts with /home/clement/work/, strip that prefix + if let Some(stripped) = path.strip_prefix("/home/clement/work/") { + return stripped; + } + path +} + +/// Extract the file path from a grep line and normalize it. +/// A grep line looks like "path:linenum:content" or "path-linenum-content" +/// We need to extract just the "path" part and normalize it. +/// The key insight: the separator comes after the filename, so for context lines +/// we look for the pattern "digits-" which indicates "linenum-content" +fn extract_and_normalize_prefix(line: &str) -> Option { + // Strategy: look for the pattern that marks the separator + // For match lines: path:linenum: (contains `:`) + // For context lines: path-linenum- (the linenum is all digits before the `-`) + + // First check for match line pattern: path:digits: + let parts: Vec<&str> = line.splitn(2, ':').collect(); + if parts.len() == 2 && parts[0].contains('/') { + // Could be a match line + if parts[1] + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + // Likely a match line: path:linenum... + let normalized = normalize_path(parts[0]); + return Some(normalized.to_string()); + } + } + + // Check for context line pattern: path-linenum- + // We need to find where "path" ends and "linenum" begins + // The path ends with "/" and linenum is all digits followed by "-" + if let Some(last_slash) = line.rfind('/') { + let after_slash = &line[last_slash + 1..]; + // Look for "digits-" pattern + if let Some(dash_pos) = after_slash.find('-') { + let maybe_num = &after_slash[..dash_pos]; + if maybe_num.chars().all(|c| c.is_ascii_digit()) { + // This looks like linenum- pattern + let path_part = &line[..last_slash + 1 + dash_pos]; + // Remove the trailing "-linenum" part if present + if let Some(path_end) = path_part.rfind('-') { + let path_only = &path_part[..path_end]; + let normalized = normalize_path(path_only); + return Some(normalized.to_string()); + } + } + } + } + + None +} + +/// Normalize all paths in the output to convert absolute paths to relative +fn normalize_all_output_paths(output: &str) -> String { + output + .lines() + .map(|line| { + // Find the first : or - (separator between path and rest) + for (i, ch) in line.char_indices() { + if (ch == ':' || ch == '-') && i > 0 { + let path_part = &line[..i]; + let normalized_path = normalize_path(path_part); + if normalized_path != path_part { + // Path was absolute, normalize it + let sep = ch; + let rest = &line[i + 1..]; + return format!("{}{}{}", normalized_path, sep, rest); + } + break; + } + } + line.to_string() + }) + .collect::>() + .join("\n") +} + +/// Detect if output lacks filenames (single-file mode without explicit path prefix). +/// If the first non-empty line starts with a number followed by `:` or `-` (without a path prefix), +/// prepend the filename to each line. +fn prepend_filename_if_needed_tool(output: &str, filename: &str) -> String { + let first_line = output.lines().find(|l| !l.trim().is_empty()); + + if let Some(line) = first_line { + // Check if it starts with a number (indicating no filename prefix) + // Pattern: digits followed by : or - (like "123:content" or "123-content") + if line + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + let looks_like_linenums = line + .split(':') + .next() + .map(|s| s.parse::().is_ok()) + .unwrap_or(false) + || line + .split('-') + .next() + .map(|s| s.parse::().is_ok()) + .unwrap_or(false); + + if looks_like_linenums { + return output + .lines() + .map(|l| { + if l.is_empty() { + l.to_string() + } else if l.starts_with("--") { + // Keep block separators as-is + l.to_string() + } else { + // Prepend filename + format!("{}:{}", filename, l) + } + }) + .collect::>() + .join("\n"); + } + } + } + + output.to_string() +} + +/// Parse a match line: `file:linenum:content` +/// Match lines MUST have the format: path:digits:content +/// If the line contains `-digits-` pattern before `:digits:`, it's a context line, not a match line +fn parse_match_line_content(line: &str) -> Option> { + // Reject if line contains context line pattern (path-digits-...) before match line pattern + // Look for "-digits-" which indicates a context line + let mut chars = line.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '-' { + // Check if next characters are digits + let mut digit_count = 0; + let saved_pos = chars.clone(); + while let Some(&peek_ch) = chars.peek() { + if peek_ch.is_ascii_digit() { + digit_count += 1; + chars.next(); + } else { + break; + } + } + if digit_count > 0 && chars.peek() == Some(&'-') { + // Found "-digits-" pattern, this is a context line + return None; + } + // Reset if we didn't find the pattern + chars = saved_pos; + } + } + + // Now try to parse as match line + let parts: Vec<&str> = line.splitn(3, ':').collect(); + if parts.len() == 3 { + let file = normalize_path(parts[0]); + if let Ok(ln) = parts[1].trim().parse::() { + return Some(GrepOutputLine { + file, + line_num: ln, + content: parts[2], + is_match: true, + }); + } + // Fallback: parts[1] is not a number, treat as file:content + return Some(GrepOutputLine { + file, + line_num: 0, + content: &line[parts[0].len() + 1..], + is_match: true, + }); + } + if parts.len() == 2 { + let file = normalize_path(parts[0]); + return Some(GrepOutputLine { + file, + line_num: 0, + content: parts[1], + is_match: true, + }); + } + None +} + +/// Parse a context line: `file-linenum-content` or `file-content` using known files. +fn parse_context_line_content<'a>( + line: &'a str, + known_files: &std::collections::HashSet<&'a str>, +) -> Option> { + // First check if the line's path (when normalized) matches any known file + if let Some(normalized_prefix) = extract_and_normalize_prefix(line) { + for file in known_files { + if *file == normalized_prefix { + // This is a match! Now parse the rest of the line + // We need to find where the file prefix ends and the rest begins + for (i, ch) in line.char_indices() { + if ch == '-' && i > 0 { + let path_part = &line[..i]; + if normalize_path(path_part) == *file { + let rest = &line[i + 1..]; + // Try "linenum-content" + if let Some(dash_pos) = rest.find('-') { + let maybe_num = &rest[..dash_pos]; + if let Ok(ln) = maybe_num.parse::() + && ln > 0 + { + return Some(GrepOutputLine { + file, + line_num: ln, + content: &rest[dash_pos + 1..], + is_match: false, + }); + } + } + // No line number — just "content" + return Some(GrepOutputLine { + file, + line_num: 0, + content: rest, + is_match: false, + }); + } + } + } + } + } + } + + // Fallback: try matching with relative paths directly + for file in known_files { + let dash_prefix = format!("{}-", file); + if let Some(rest) = line.strip_prefix(&dash_prefix) { + // Try "linenum-content" + if let Some(dash_pos) = rest.find('-') { + let maybe_num = &rest[..dash_pos]; + if let Ok(ln) = maybe_num.parse::() + && ln > 0 + { + return Some(GrepOutputLine { + file, + line_num: ln, + content: &rest[dash_pos + 1..], + is_match: false, + }); + } + } + // No line number — just "content" + return Some(GrepOutputLine { + file, + line_num: 0, + content: rest, + is_match: false, + }); + } + } + None +} + +/// Split output into `--`-delimited blocks. +fn split_blocks_content(raw: &str) -> Vec> { + let mut blocks: Vec> = Vec::new(); + let mut current: Vec<&str> = Vec::new(); + for line in raw.lines() { + if line == "--" { + if !current.is_empty() { + blocks.push(current); + current = Vec::new(); + } + } else { + current.push(line); + } + } + if !current.is_empty() { + blocks.push(current); + } + blocks +} + +/// Select which lines to display for a file, prioritizing match lines over context. +/// Always includes all match lines (up to `max`), fills remaining budget with context. +fn select_lines(matches: &[(usize, String, bool)], max: usize) -> Vec<(usize, String, bool)> { + if matches.len() <= max { + return matches.to_vec(); + } + + let match_lines: Vec<_> = matches + .iter() + .filter(|(_, _, is_match)| *is_match) + .collect(); + + // If match lines alone exceed budget, just take first `max` match lines. + if match_lines.len() >= max { + return match_lines.into_iter().take(max).cloned().collect(); + } + + // Budget for context lines around matches. + let context_budget = max - match_lines.len(); + + // Build a set of indices we want to keep: all match indices + nearby context. + let match_indices: Vec = matches + .iter() + .enumerate() + .filter(|(_, (_, _, is_match))| *is_match) + .map(|(i, _)| i) + .collect(); + + let mut keep = vec![false; matches.len()]; + for &idx in &match_indices { + keep[idx] = true; + } + + // Distribute context budget around matches, trying to center them. + let mut remaining = context_budget; + let per_match = if match_indices.is_empty() { + context_budget + } else { + (context_budget / match_indices.len()).max(1) + }; + + for &idx in &match_indices { + if remaining == 0 { + break; + } + let mut budget = per_match.min(remaining); + + let mut distance = 1; + while budget > 0 && (idx >= distance || idx + distance < matches.len()) { + // Try after first + if idx + distance < matches.len() && budget > 0 { + let after_idx = idx + distance; + if !keep[after_idx] && !matches[after_idx].2 { + keep[after_idx] = true; + budget -= 1; + remaining -= 1; + } + } + // Try before + if idx >= distance && budget > 0 { + let before_idx = idx - distance; + if !keep[before_idx] && !matches[before_idx].2 { + keep[before_idx] = true; + budget -= 1; + remaining -= 1; + } + } + distance += 1; + } + } + + // If there's still budget, fill with remaining context lines in order. + if remaining > 0 { + let context_indices: Vec = matches + .iter() + .enumerate() + .filter(|(_, (_, _, is_match))| !*is_match) + .map(|(i, _)| i) + .collect(); + for idx in context_indices { + if remaining == 0 { + break; + } + if !keep[idx] { + keep[idx] = true; + remaining -= 1; + } + } + } + + matches + .iter() + .enumerate() + .filter(|(i, _)| keep[*i]) + .map(|(_, entry)| entry.clone()) + .collect() +} + +/// Compress `content` output: `path:line_num:content` grouped by file, with context support. +fn compress_content( + output: &str, + pattern: Option<&str>, + single_file: Option<&str>, + context_lines: usize, +) -> Option { + // If this is single-file mode and output has no filename prefix (just linenum:content), + // prepend the filename to each line + let processed_output = if let Some(filename) = single_file { + prepend_filename_if_needed_tool(output, filename) + } else { + output.to_string() + }; + + // Normalize all paths in the output to handle mixed absolute/relative paths + let normalized_output = normalize_all_output_paths(&processed_output); + + let blocks = split_blocks_content(&normalized_output); + + let mut by_file: HashMap<&str, Vec<(usize, String, bool)>> = HashMap::new(); + let mut total = 0; + + // Process each block: find filename from match lines, then parse context lines + for block in blocks.iter() { + // Find the file for this block by trying parse_match_line on all lines + let mut block_file: Option<&str> = None; + let mut best_count = 0; + + for line in block { + if let Some(parsed) = parse_match_line_content(line) + && !parsed.file.is_empty() + { + let colon_prefix = format!("{}:", parsed.file); + let dash_prefix = format!("{}-", parsed.file); + + let count = block + .iter() + .filter(|l| { + // Direct match: relative path + if l.starts_with(&colon_prefix) || l.starts_with(&dash_prefix) { + return true; + } + // Check if line has absolute path that normalizes to this file + if let Some(normalized) = extract_and_normalize_prefix(l) { + return normalized == parsed.file; + } + false + }) + .count(); + if count > best_count { + best_count = count; + block_file = Some(parsed.file); + } + } + } + + let mut block_known: std::collections::HashSet<&str> = std::collections::HashSet::new(); + if let Some(f) = block_file { + block_known.insert(f); + } + + // Parse all lines in this block + for line in block { + // Try context line first (more specific) + if let Some(parsed) = parse_context_line_content(line, &block_known) { + total += 1; + let cleaned = clean_line(parsed.content, MAX_LINE_LEN, pattern); + by_file + .entry(parsed.file) + .or_default() + .push((parsed.line_num, cleaned, false)); + continue; + } + + // Then try match line + if let Some(parsed) = parse_match_line_content(line) { + total += 1; + let cleaned = clean_line(parsed.content, MAX_LINE_LEN, pattern); + by_file + .entry(parsed.file) + .or_default() + .push((parsed.line_num, cleaned, true)); + continue; + } + } + } + + if total == 0 { + return None; + } + + if total < 10 { + return None; + } + + let mut out = format!("{} in {}F:\n\n", total, by_file.len()); + + let mut shown = 0; + let mut files: Vec<_> = by_file.iter().collect(); + files.sort_by_key(|(f, _)| *f); + + for (file, matches) in files { + if shown >= MAX_TOTAL { + break; + } + + let file_display = compact_path(file); + out.push_str(&format!("{} ({}):\n", file_display, matches.len())); + + // Use context-aware line selection like the bash grep compressor: + // when context was requested, honour that many context lines per match. + let num_matches = matches.iter().filter(|(_, _, m)| *m).count().max(1); + let budget = if context_lines > 0 { + let ctx = context_lines.min(MAX_CONTEXT_PER_MATCH); + num_matches * (ctx * 2 + 1) + } else { + MAX_MATCHES_PER_FILE + }; + let selected = select_lines(matches, budget); + for (line_num, content, is_match) in &selected { + if *line_num > 0 { + let sep = if *is_match { ':' } else { '-' }; + out.push_str(&format!(" {:>4}{} {}\n", line_num, sep, content)); + } else { + out.push_str(&format!(" {}\n", content)); + } + shown += 1; + if shown >= MAX_TOTAL { + break; + } + } + + out.push('\n'); + } + + Some(out) +} + +/// Clean and truncate a line, centering on the pattern match if present. +fn clean_line(line: &str, max_len: usize, pattern: Option<&str>) -> String { + let trimmed = line.trim(); + + if trimmed.len() <= max_len { + return trimmed.to_string(); + } + + // If we have a pattern, try to center the truncation on it + if let Some(pat) = pattern { + let lower = trimmed.to_lowercase(); + let pattern_lower = pat.to_lowercase(); + + if let Some(pos) = lower.find(&pattern_lower) { + let start = trimmed.floor_char_boundary(pos.saturating_sub(max_len / 3)); + let end = trimmed.ceil_char_boundary((start + max_len).min(trimmed.len())); + let start = if end == trimmed.len() { + trimmed.floor_char_boundary(end.saturating_sub(max_len)) + } else { + start + }; + + let slice = &trimmed[start..end]; + return if start > 0 && end < trimmed.len() { + format!("...{}...", slice) + } else if start > 0 { + format!("...{}", slice) + } else { + format!("{}...", slice) + }; + } + } + + // Fallback: simple prefix truncation + format!( + "{}...", + &trimmed[..trimmed.floor_char_boundary(max_len.saturating_sub(3))] + ) +} + +/// Compact a long path by eliding middle directories. +fn compact_path(path: &str) -> String { + if path.len() <= MAX_PATH_LEN { + return path.to_string(); + } + + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() <= 3 { + return path.to_string(); + } + + format!( + "{}/.../{}/{}", + parts[0], + parts[parts.len() - 2], + parts[parts.len() - 1] + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_output_mode_default() { + assert_eq!(extract_output_mode("{}"), OutputMode::FilesWithMatches); + } + + #[test] + fn test_extract_output_mode_content() { + assert_eq!( + extract_output_mode(r#"{"output_mode": "content"}"#), + OutputMode::Content + ); + } + + #[test] + fn test_extract_output_mode_count() { + assert_eq!( + extract_output_mode(r#"{"output_mode": "count"}"#), + OutputMode::Count + ); + } + + #[test] + fn test_extract_output_mode_invalid_json() { + assert_eq!( + extract_output_mode("not json"), + OutputMode::FilesWithMatches + ); + } + + #[test] + fn test_files_with_matches_small_not_compressed() { + let output = "src/main.rs\nsrc/lib.rs\n"; + let compressor = GrepCompressor; + assert!(compressor.compress("{}", output).is_none()); + } + + #[test] + fn test_files_with_matches_large_compressed() { + let paths: Vec = (0..30) + .map(|i| format!("src/components/file{}.ts", i)) + .collect(); + let output = paths.join("\n"); + let compressor = GrepCompressor; + let result = compressor.compress("{}", &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(compressed.contains("30F 1D:")); + assert!(compressed.contains("src/components/")); + } + + #[test] + fn test_content_mode_compressed() { + let mut lines = Vec::new(); + for i in 1..=20 { + lines.push(format!("src/main.rs:{}:fn function_{}() {{}}", i * 10, i)); + } + let output = lines.join("\n"); + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(compressed.contains("20 in 1F:")); + assert!(compressed.contains("src/main.rs (20):")); + } + + #[test] + fn test_content_mode_small_not_compressed() { + let output = "src/main.rs:1:fn main() {}\nsrc/main.rs:2:}\n"; + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + assert!(compressor.compress(args, output).is_none()); + } + + #[test] + fn test_count_mode_not_compressed() { + let output = "src/main.rs:5\nsrc/lib.rs:3\n"; + let args = r#"{"output_mode": "count"}"#; + let compressor = GrepCompressor; + assert!(compressor.compress(args, output).is_none()); + } + + #[test] + fn test_empty_output() { + let compressor = GrepCompressor; + assert!(compressor.compress("{}", "").is_none()); + assert!(compressor.compress("{}", " \n \n").is_none()); + } + + #[test] + fn test_content_truncates_long_lines() { + let long_content = "x".repeat(200); + let mut lines = Vec::new(); + for i in 1..=15 { + lines.push(format!("src/main.rs:{}:{}", i, long_content)); + } + let output = lines.join("\n"); + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, &output).unwrap(); + for line in result.lines() { + if line.starts_with(" ") && line.contains(": ") { + assert!( + line.len() <= MAX_LINE_LEN + 20, + "line too long: {}", + line.len() + ); + } + } + } + + #[test] + fn test_content_limits_matches_per_file() { + let mut lines = Vec::new(); + for i in 1..=25 { + lines.push(format!("src/main.rs:{}:line {}", i, i)); + } + let output = lines.join("\n"); + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, &output).unwrap(); + assert!(result.contains("src/main.rs (25):")); + // Shows first 10 matches, truncates remaining 15 (no +15 indicator anymore) + assert!(result.contains("line 1")); + assert!(result.contains("line 10")); + } + + #[test] + fn test_files_with_matches_extension_summary() { + let mut paths = Vec::new(); + for i in 0..15 { + paths.push(format!("src/file{}.rs", i)); + } + for i in 0..10 { + paths.push(format!("src/file{}.ts", i)); + } + let output = paths.join("\n"); + let compressor = GrepCompressor; + let result = compressor.compress("{}", &output).unwrap(); + assert!(result.contains("ext:")); + assert!(result.contains(".rs(15)")); + assert!(result.contains(".ts(10)")); + } + + #[test] + fn test_clean_line_short() { + let line = " const result = someFunction(); "; + let cleaned = clean_line(line, 50, Some("result")); + assert_eq!(cleaned, "const result = someFunction();"); + } + + #[test] + fn test_clean_line_centers_on_pattern() { + let line = "x".repeat(50) + "PATTERN" + &"y".repeat(50); + let cleaned = clean_line(&line, 50, Some("pattern")); + assert!(cleaned.contains("PATTERN")); + assert!(cleaned.starts_with("...") || cleaned.ends_with("...")); + } + + #[test] + fn test_clean_line_no_pattern_truncates_prefix() { + let line = "x".repeat(200); + let cleaned = clean_line(&line, 50, None); + assert!(cleaned.ends_with("...")); + assert!(cleaned.len() <= 53); // 50 + "..." + } + + #[test] + fn test_compact_path_short() { + let path = "src/main.rs"; + assert_eq!(compact_path(path), "src/main.rs"); + } + + #[test] + fn test_compact_path_long() { + let path = "/Users/patrick/dev/project/src/components/Button.tsx"; + let compact = compact_path(path); + assert!(compact.contains("...")); + assert!(compact.contains("components")); + assert!(compact.contains("Button.tsx")); + } + + #[test] + fn test_extract_pattern() { + let args = r#"{"pattern": "TODO", "output_mode": "content"}"#; + assert_eq!(extract_pattern(args), Some("TODO".to_string())); + } + + #[test] + fn test_extract_pattern_missing() { + assert_eq!(extract_pattern("{}"), None); + } + + #[test] + fn test_content_with_context_lines() { + // Simulate grep -A output with context lines using - separator + let input = "\ +edgee-cli/openapi/openapi.json-2424- \"/v1/users/me\": { +edgee-cli/openapi/openapi.json-2425- \"get\": { +edgee-cli/openapi/openapi.json:2426: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get my User object\", +edgee-cli/openapi/openapi.json-2428- \"description\": \"Retrieves my current User object.\", +-- +edgee-cli/openapi/openapi.json-2449- } +edgee-cli/openapi/openapi.json-2450- } +edgee-cli/openapi/openapi.json:2451: \"operationId\": \"updateMe\", +edgee-cli/openapi/openapi.json-2452- \"summary\": \"Update my User\", +edgee-cli/openapi/openapi.json-2453- \"description\": \"Updates the current user\", +-- +src/main.rs-1-fn main() { +src/main.rs:2: // operationId: helper +src/main.rs-3- println!(\"hello\"); +-- +src/lib.rs-10-pub fn init() { +src/lib.rs:11: // operationId: start +src/lib.rs-12-} +"; + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, input).unwrap(); + // Match lines show with ':' separator + assert!(result.contains("2426: \"operationId\": \"getMe\",")); + // Context lines show with '-' separator + assert!(result.contains("2427- \"summary\": \"Get my User object\",")); + assert!(result.contains("edgee-cli/openapi/openapi.json")); + } + + #[test] + fn test_content_with_absolute_paths() { + // Grep tool might return absolute paths - should be normalized + let input = "\ +/home/clement/work/edgee-cli/openapi/openapi.json-2424- \"/v1/users/me\": { +/home/clement/work/edgee-cli/openapi/openapi.json-2425- \"get\": { +/home/clement/work/edgee-cli/openapi/openapi.json:2426: \"operationId\": \"getMe\", +/home/clement/work/edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get my User object\", +/home/clement/work/edgee-cli/openapi/openapi.json-2428- \"description\": \"Retrieves my current User object.\", +/home/clement/work/edgee-cli/openapi/openapi.json-2429- \"responses\": {}, +-- +edgee-cli/openapi/openapi.json:2451: \"operationId\": \"updateMe\", +edgee-cli/openapi/openapi.json-2452- \"summary\": \"Update my User\", +edgee-cli/openapi/openapi.json-2453- \"description\": \"Updates the current user\", +edgee-cli/openapi/openapi.json-2454- \"parameters\": [], +edgee-cli/openapi/openapi.json-2455- \"responses\": {}, +"; + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, input).unwrap(); + // Should normalize paths + assert!(result.contains("edgee-cli/openapi/openapi.json")); + // Should not have absolute paths in output + assert!(!result.contains("/home/clement/work/")); + } + + #[test] + fn test_single_file_grep_with_line_numbers() { + // Single file grep outputs: linenum:content (no filename prefix) + // When path is provided in args, should prepend filename + let input = "\ +10:fn main() { +11: let x = 1; +20:fn other() { +21: let y = 2; +30:fn third() { +31: let z = 3; +40:fn fourth() { +41: let w = 4; +50:fn fifth() { +51: let v = 5; +60:fn sixth() { +61: let u = 6; +"; + let args = r#"{"output_mode": "content", "path": "main.rs"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, input).unwrap(); + assert!(result.contains("main.rs")); + assert!(result.contains("12 in 1F:")); + assert!(result.contains("fn main()")); + } + + #[test] + fn test_single_file_with_context_lines() { + // Single file grep -A output: linenum:content, linenum-context, -- + let input = "\ +9-// before +10:fn main() { +11- let x = 1; +-- +19-// before2 +20:fn other() { +21- let y = 2; +-- +29-// before3 +30:fn third() { +31- let z = 3; +-- +39-// before4 +40:fn fourth() { +41- let w = 4; +"; + let args = r#"{"output_mode": "content", "path": "src/main.rs"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, input).unwrap(); + assert!(result.contains("main.rs")); + assert!(result.contains("12 in 1F:")); + assert!(result.contains("10: fn main()")); + assert!(result.contains("11- ") && result.contains("let x")); + } + + #[test] + fn test_mixed_absolute_and_relative_paths() { + // Grep tool may return mixed absolute and relative paths for the same file + // Should be grouped together, not treated as separate files + let input = "\ +/home/clement/work/edgee-cli/openapi/openapi.json-2396- \"type\": \"string\" +/home/clement/work/edgee-cli/openapi/openapi.json:2426: \"operationId\": \"getMe\", +edgee-cli/openapi/openapi.json-2427- \"summary\": \"Get my User\", +edgee-cli/openapi/openapi.json:2451: \"operationId\": \"updateMe\", +/home/clement/work/edgee-cli/openapi/openapi.json-2452- \"summary\": \"Update my User\", +edgee-cli/openapi/openapi.json-2453- \"description\": \"Updates the user\", +/home/clement/work/edgee-cli/openapi/openapi.json:2460: \"operationId\": \"deleteMe\", +edgee-cli/openapi/openapi.json-2461- \"summary\": \"Delete my User\", +/home/clement/work/edgee-cli/openapi/openapi.json-2462- \"description\": \"Deletes the user\", +edgee-cli/openapi/openapi.json:2470: \"operationId\": \"updateProfile\", +"; + let args = r#"{"output_mode": "content"}"#; + let compressor = GrepCompressor; + let result = compressor.compress(args, input).unwrap(); + // Should have only 1 file, not 2 + assert!(result.contains("10 in 1F:")); + assert!(result.contains("edgee-cli/openapi/openapi.json")); + // Should NOT have created bogus files + assert!(!result.contains("openapi2.json")); + assert!(!result.contains("/home/clement/work/")); + } +} diff --git a/crates/compressor/src/strategy/claude/mod.rs b/crates/compressor/src/strategy/claude/mod.rs new file mode 100644 index 0000000..5680233 --- /dev/null +++ b/crates/compressor/src/strategy/claude/mod.rs @@ -0,0 +1,65 @@ +//! Claude Code tool output compressors. +//! +//! Each Claude Code tool that can be compressed gets its own module +//! implementing the `ToolCompressor` trait. + +mod bash; +mod glob; +mod grep; +pub(crate) mod read; + +pub use super::ToolCompressor; + +/// Select the appropriate compressor for a Claude Code tool name. +/// Returns `None` for tools we don't compress. +pub fn compressor_for(tool_name: &str) -> Option<&'static dyn ToolCompressor> { + match tool_name { + "Bash" => Some(&bash::BashCompressor), + "Read" => Some(&read::ReadCompressor), + "Grep" => Some(&grep::GrepCompressor), + "Glob" => Some(&glob::GlobCompressor), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn compressor_for_bash() { + assert!(compressor_for("Bash").is_some()); + } + + #[test] + fn compressor_for_read() { + assert!(compressor_for("Read").is_some()); + } + + #[test] + fn compressor_for_grep() { + assert!(compressor_for("Grep").is_some()); + } + + #[test] + fn compressor_for_glob() { + assert!(compressor_for("Glob").is_some()); + } + + #[test] + fn compressor_for_unknown_tool() { + assert!(compressor_for("Unknown").is_none()); + } + + #[test] + fn compressor_for_empty_string() { + assert!(compressor_for("").is_none()); + } + + #[test] + fn compressor_for_case_sensitive() { + // Tool names are case-sensitive — "bash" (lowercase) is not a known tool + assert!(compressor_for("bash").is_none()); + assert!(compressor_for("read").is_none()); + } +} diff --git a/crates/compressor/src/strategy/claude/read.rs b/crates/compressor/src/strategy/claude/read.rs new file mode 100644 index 0000000..b08185c --- /dev/null +++ b/crates/compressor/src/strategy/claude/read.rs @@ -0,0 +1,717 @@ +// Copyright 2024 rtk-ai and rtk-ai Labs +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Original source: https://github.com/rtk-ai/rtk +// +// Modifications copyright 2026 Edgee Cloud +// This file has been modified from its original form: +// - Adapted from a local CLI proxy to a server-side gateway compressor +// - Refactored to implement Edgee's traits +// - Further adapted as needed for this module's role in the gateway +// +// See LICENSE-APACHE in the project root for the full license text. + +//! Compressor for the Claude Code `Read` tool output. +//! +//! Read tool returns `cat -n` formatted file content with line numbers +//! (` 1\tcontent`). This compressor detects the language from the +//! file path, then applies RTK-style filtering: stripping comments, +//! collapsing blank lines, and optionally collapsing function bodies. + +use std::path::Path; + +use lazy_static::lazy_static; +use regex::Regex; + +use super::ToolCompressor; + +/// Below this many content lines, don't compress at all. +const SMALL_THRESHOLD: usize = 50; + +// --- Language detection --- + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Language { + Rust, + Python, + JavaScript, + TypeScript, + Go, + C, + Cpp, + Java, + Ruby, + Shell, + Unknown, +} + +impl Language { + pub fn from_extension(ext: &str) -> Self { + match ext.to_lowercase().as_str() { + "rs" => Language::Rust, + "py" | "pyw" => Language::Python, + "js" | "mjs" | "cjs" => Language::JavaScript, + "ts" | "tsx" => Language::TypeScript, + "go" => Language::Go, + "c" | "h" => Language::C, + "cpp" | "cc" | "cxx" | "hpp" | "hh" => Language::Cpp, + "java" => Language::Java, + "rb" => Language::Ruby, + "sh" | "bash" | "zsh" => Language::Shell, + _ => Language::Unknown, + } + } + + pub fn comment_patterns(&self) -> CommentPatterns { + match self { + Language::Rust => CommentPatterns { + line: Some("//"), + block_start: Some("/*"), + block_end: Some("*/"), + doc_line: Some("///"), + doc_block_start: Some("/**"), + }, + Language::Python => CommentPatterns { + line: Some("#"), + block_start: Some("\"\"\""), + block_end: Some("\"\"\""), + doc_line: None, + doc_block_start: Some("\"\"\""), + }, + Language::JavaScript + | Language::TypeScript + | Language::Go + | Language::C + | Language::Cpp + | Language::Java => CommentPatterns { + line: Some("//"), + block_start: Some("/*"), + block_end: Some("*/"), + doc_line: None, + doc_block_start: Some("/**"), + }, + Language::Ruby => CommentPatterns { + line: Some("#"), + block_start: Some("=begin"), + block_end: Some("=end"), + doc_line: None, + doc_block_start: None, + }, + Language::Shell => CommentPatterns { + line: Some("#"), + block_start: None, + block_end: None, + doc_line: None, + doc_block_start: None, + }, + Language::Unknown => CommentPatterns { + line: Some("//"), + block_start: Some("/*"), + block_end: Some("*/"), + doc_line: None, + doc_block_start: None, + }, + } + } +} + +#[derive(Debug, Clone)] +pub struct CommentPatterns { + pub line: Option<&'static str>, + pub block_start: Option<&'static str>, + pub block_end: Option<&'static str>, + pub doc_line: Option<&'static str>, + pub doc_block_start: Option<&'static str>, +} + +// --- Filters --- + +lazy_static! { + static ref MULTIPLE_BLANK_LINES: Regex = Regex::new(r"\n{3,}").unwrap(); + static ref IMPORT_PATTERN: Regex = + Regex::new(r"^(use |import |from |require\(|#include)").unwrap(); + static ref FUNC_SIGNATURE: Regex = Regex::new( + r"^(pub\s+)?(async\s+)?(fn|def|function|func|class|struct|enum|trait|interface|type)\s+\w+" + ) + .unwrap(); +} + +/// Strip comments while preserving doc comments and collapse blank lines. +pub(crate) fn filter_minimal(content: &str, lang: &Language) -> String { + let patterns = lang.comment_patterns(); + let mut result = String::with_capacity(content.len()); + let mut in_block_comment = false; + let mut in_docstring = false; + + for line in content.lines() { + let trimmed = line.trim(); + + // Handle block comments + if let (Some(start), Some(end)) = (patterns.block_start, patterns.block_end) { + if !in_docstring + && trimmed.contains(start) + && !trimmed.starts_with(patterns.doc_block_start.unwrap_or("###")) + { + in_block_comment = true; + } + if in_block_comment { + if trimmed.contains(end) { + in_block_comment = false; + } + continue; + } + } + + // Handle Python docstrings (keep them in minimal mode) + if *lang == Language::Python && trimmed.starts_with("\"\"\"") { + in_docstring = !in_docstring; + result.push_str(line); + result.push('\n'); + continue; + } + + if in_docstring { + result.push_str(line); + result.push('\n'); + continue; + } + + // Skip single-line comments (but keep doc comments) + if let Some(line_comment) = patterns.line + && trimmed.starts_with(line_comment) + { + // Keep doc comments + if let Some(doc) = patterns.doc_line + && trimmed.starts_with(doc) + { + result.push_str(line); + result.push('\n'); + } + continue; + } + + // Skip empty lines at this point, we'll normalize later + if trimmed.is_empty() { + result.push('\n'); + continue; + } + + result.push_str(line); + result.push('\n'); + } + + // Normalize multiple blank lines to max 2 + let result = MULTIPLE_BLANK_LINES.replace_all(&result, "\n\n"); + result.trim().to_string() +} + +/// Strip comments, collapse function bodies, keep signatures/imports/constants. +#[allow(dead_code)] // Aggressive mode temporarily disabled +pub(crate) fn filter_aggressive(content: &str, lang: &Language) -> String { + let minimal = filter_minimal(content, lang); + + if lang == &Language::Unknown { + // For unknown languages, just return the minimal filter result + return minimal; + } + + let mut result = String::with_capacity(minimal.len() / 2); + let mut brace_depth = 0; + let mut in_impl_body = false; + + for line in minimal.lines() { + let trimmed = line.trim(); + + // Always keep imports + if IMPORT_PATTERN.is_match(trimmed) { + result.push_str(line); + result.push('\n'); + continue; + } + + // Always keep function/struct/class signatures + if FUNC_SIGNATURE.is_match(trimmed) { + result.push_str(line); + result.push('\n'); + in_impl_body = true; + brace_depth = 0; + continue; + } + + // Track brace depth for implementation bodies + let open_braces = trimmed.matches('{').count(); + let close_braces = trimmed.matches('}').count(); + + if in_impl_body { + brace_depth += open_braces as i32; + brace_depth -= close_braces as i32; + + // Only keep the opening and closing braces + if brace_depth <= 1 && (trimmed == "{" || trimmed == "}" || trimmed.ends_with('{')) { + result.push_str(line); + result.push('\n'); + } + + if brace_depth <= 0 { + in_impl_body = false; + if !trimmed.is_empty() && trimmed != "}" { + result.push_str(" // ... implementation\n"); + } + } + continue; + } + + // Keep type definitions, constants, etc. + if trimmed.starts_with("const ") + || trimmed.starts_with("static ") + || trimmed.starts_with("let ") + || trimmed.starts_with("pub const ") + || trimmed.starts_with("pub static ") + { + result.push_str(line); + result.push('\n'); + } + } + + result.trim().to_string() +} + +// --- Compressor --- + +pub struct ReadCompressor; + +impl ToolCompressor for ReadCompressor { + fn compress(&self, arguments: &str, output: &str) -> Option { + let (fmt, numbered) = parse_numbered_lines(output); + + if numbered.len() < SMALL_THRESHOLD { + return None; + } + + let file_path = extract_file_path(arguments); + + let lang = file_path + .as_deref() + .and_then(|p| Path::new(p).extension()) + .and_then(|e| e.to_str()) + .map(Language::from_extension) + .unwrap_or(Language::Unknown); + + let filtered = filter_minimal_numbered(&numbered, &lang); + + if filtered.is_empty() { + return None; + } + + let compressed = format_numbered_lines(&filtered, fmt); + + // Only return if we actually saved something meaningful (>10%) + let threshold = output.len() * 9 / 10; + if compressed.len() >= threshold { + return None; + } + + Some(compressed) + } +} + +/// Which separator character the input uses for line numbers. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(crate) enum LineFormat { + /// Standard `cat -n`: `" 1\tcontent"` + Tab, + /// Claude Code Read tool: `" 1→content"` + Arrow, + /// OpenCode Read tool: `"1:content"` + Colon, +} + +/// Parse numbered-line output into (line_number, content) pairs, +/// also returning the detected format so the caller can round-trip faithfully. +/// Supports tab (` 1\t`), arrow (`1→`), and colon (`1:`) formats. +pub(crate) fn parse_numbered_lines(output: &str) -> (LineFormat, Vec<(Option, String)>) { + let mut format = LineFormat::Tab; // default; overridden on first numbered line + let mut format_detected = false; + + let lines = output + .lines() + .map(|line| { + // Arrow format: "1→content" + if let Some(pos) = line.find('→') { + let prefix = line[..pos].trim(); + if !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) { + if !format_detected { + format = LineFormat::Arrow; + format_detected = true; + } + let num = prefix.parse::().ok(); + return (num, line[pos + '→'.len_utf8()..].to_string()); + } + } + // Colon format: "1:content" + if let Some(pos) = line.find(':') { + let prefix = line[..pos].trim(); + if !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) { + if !format_detected { + format = LineFormat::Colon; + format_detected = true; + } + let num = prefix.parse::().ok(); + return (num, line[pos + 1..].to_string()); + } + } + // Tab format: " 1\tcontent" + if let Some(pos) = line.find('\t') { + let prefix = line[..pos].trim(); + if !prefix.is_empty() && prefix.chars().all(|c| c.is_ascii_digit()) { + if !format_detected { + format = LineFormat::Tab; + format_detected = true; + } + let num = prefix.parse::().ok(); + return (num, line[pos + 1..].to_string()); + } + } + (None, line.to_string()) + }) + .collect(); + + (format, lines) +} + +/// Format (line_number, content) pairs back using the same style as the original input. +pub(crate) fn format_numbered_lines( + lines: &[(Option, String)], + format: LineFormat, +) -> String { + let mut parts = Vec::with_capacity(lines.len()); + for (num, content) in lines { + if let Some(n) = num { + let s = match format { + LineFormat::Tab => format!("{n:>6}\t{content}"), + LineFormat::Arrow => format!("{n:>6}→{content}"), + LineFormat::Colon => format!("{n}:{content}"), + }; + parts.push(s); + } else { + parts.push(content.clone()); + } + } + parts.join("\n") +} + +/// Filter numbered lines, stripping comments while preserving original line numbers. +pub(crate) fn filter_minimal_numbered( + lines: &[(Option, String)], + lang: &Language, +) -> Vec<(Option, String)> { + let patterns = lang.comment_patterns(); + let mut result: Vec<(Option, String)> = Vec::new(); + let mut in_block_comment = false; + let mut in_docstring = false; + let mut consecutive_blanks: usize = 0; + + for (num, line) in lines { + let trimmed = line.trim(); + + // Handle block comments + if let (Some(start), Some(end)) = (patterns.block_start, patterns.block_end) { + if !in_docstring + && trimmed.contains(start) + && !trimmed.starts_with(patterns.doc_block_start.unwrap_or("###")) + { + in_block_comment = true; + } + if in_block_comment { + if trimmed.contains(end) { + in_block_comment = false; + } + continue; + } + } + + // Handle Python docstrings (keep them) + if *lang == Language::Python && trimmed.starts_with("\"\"\"") { + in_docstring = !in_docstring; + consecutive_blanks = 0; + result.push((*num, line.clone())); + continue; + } + + if in_docstring { + consecutive_blanks = 0; + result.push((*num, line.clone())); + continue; + } + + // Skip single-line comments (but keep doc comments) + if let Some(line_comment) = patterns.line + && trimmed.starts_with(line_comment) + { + if let Some(doc) = patterns.doc_line + && trimmed.starts_with(doc) + { + consecutive_blanks = 0; + result.push((*num, line.clone())); + } + continue; + } + + // Blank lines: collapse to at most 2 consecutive + if trimmed.is_empty() { + if consecutive_blanks < 2 { + result.push((*num, line.clone())); + consecutive_blanks += 1; + } + continue; + } + + consecutive_blanks = 0; + result.push((*num, line.clone())); + } + + // Trim leading/trailing blank lines + while result.first().is_some_and(|(_, l)| l.trim().is_empty()) { + result.remove(0); + } + while result.last().is_some_and(|(_, l)| l.trim().is_empty()) { + result.pop(); + } + + result +} + +/// Extract the file_path from Read tool arguments JSON. +fn extract_file_path(arguments: &str) -> Option { + serde_json::from_str::(arguments) + .ok() + .and_then(|v| v.get("file_path")?.as_str().map(String::from)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_rust_file(n: usize) -> String { + let mut lines = Vec::new(); + let mut ln = 1; + lines.push(format!(" {}\tuse std::io;", ln)); + ln += 1; + lines.push(format!(" {}\t", ln)); + ln += 1; + lines.push(format!(" {}\t// This is a comment", ln)); + ln += 1; + lines.push(format!(" {}\t/// Doc comment", ln)); + ln += 1; + lines.push(format!(" {}\tfn main() {{", ln)); + ln += 1; + for _ in 0..n { + // Alternate: code line, then comment line (~50% comments) + lines.push(format!(" {}\t println!(\"hello\");", ln)); + ln += 1; + lines.push(format!(" {}\t // TODO: refactor this", ln)); + ln += 1; + } + lines.push(format!(" {}\t}}", ln)); + lines.join("\n") + } + + fn make_args(path: &str) -> String { + format!(r#"{{"file_path": "{}"}}"#, path) + } + + #[test] + fn test_small_output_not_compressed() { + let output = make_rust_file(10); + let compressor = ReadCompressor; + assert!( + compressor + .compress(&make_args("/src/main.rs"), &output) + .is_none() + ); + } + + #[test] + fn test_strips_comments_rust() { + let output = make_rust_file(60); + let args = make_args("/src/main.rs"); + let compressor = ReadCompressor; + let result = compressor.compress(&args, &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + // Single-line comment should be stripped + assert!(!compressed.contains("// This is a comment")); + // Doc comment should be preserved + assert!(compressed.contains("/// Doc comment")); + // Import should be preserved + assert!(compressed.contains("use std::io;")); + // Function signature should be preserved + assert!(compressed.contains("fn main()")); + } + + #[test] + fn test_large_file_uses_minimal_filter() { + // 350 iterations * 2 lines each = 700+ lines — aggressive mode is disabled, + // so this should still use minimal filtering only. + let output = make_rust_file(350); + let args = make_args("/src/main.rs"); + let compressor = ReadCompressor; + let result = compressor.compress(&args, &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + // Should keep signature + assert!(compressed.contains("fn main()")); + // Should keep import + assert!(compressed.contains("use std::io;")); + // No system-reminder since aggressive mode is disabled + assert!(!compressed.contains("")); + } + + #[test] + fn test_compressed_output_preserves_line_numbers() { + let output = make_rust_file(60); + let compressor = ReadCompressor; + let compressed = compressor + .compress(&make_args("/src/main.rs"), &output) + .unwrap(); + // Line 3 (comment) is stripped; line 4 (doc comment) should keep number 4. + assert!( + compressed.contains("4\t/// Doc comment"), + "doc comment should keep line number 4" + ); + assert!( + compressed.contains("1\tuse std::io;"), + "import should keep line number 1" + ); + // Stripped comment must not appear + assert!(!compressed.contains("// This is a comment")); + } + + #[test] + fn test_parse_numbered_lines() { + let input = "1:use std::io;\n2:\n3:fn main() {\n"; + let (fmt, result) = parse_numbered_lines(input); + assert_eq!(fmt, LineFormat::Colon); + assert_eq!(result[0], (Some(1), "use std::io;".to_string())); + assert_eq!(result[1], (Some(2), "".to_string())); + assert_eq!(result[2], (Some(3), "fn main() {".to_string())); + } + + #[test] + fn test_parse_numbered_lines_non_numbered() { + let input = "not numbered\n1:numbered\n"; + let (_, result) = parse_numbered_lines(input); + assert_eq!(result[0], (None, "not numbered".to_string())); + assert_eq!(result[1], (Some(1), "numbered".to_string())); + } + + #[test] + fn test_parse_numbered_lines_content_with_colons() { + let input = "10:http://example.com\n"; + let (_, result) = parse_numbered_lines(input); + assert_eq!(result[0], (Some(10), "http://example.com".to_string())); + } + + #[test] + fn test_extract_file_path() { + let args = r#"{"file_path": "/home/user/src/main.rs"}"#; + assert_eq!( + extract_file_path(args), + Some("/home/user/src/main.rs".to_string()) + ); + } + + #[test] + fn test_extract_file_path_missing() { + assert_eq!(extract_file_path("{}"), None); + } + + #[test] + fn test_filter_minimal_strips_block_comments() { + let code = "/* block comment */\nfn foo() {}\n"; + let result = filter_minimal(code, &Language::Rust); + assert!(!result.contains("block comment")); + assert!(result.contains("fn foo()")); + } + + #[test] + fn test_filter_minimal_collapses_blanks() { + let code = "fn a() {}\n\n\n\n\nfn b() {}\n"; + let result = filter_minimal(code, &Language::Rust); + assert!(result.contains("fn a()")); + assert!(result.contains("fn b()")); + // Should not have more than 2 consecutive newlines + assert!(!result.contains("\n\n\n")); + } + + #[test] + fn test_filter_minimal_python_keeps_docstrings() { + let code = "def foo():\n \"\"\"Docstring.\"\"\"\n pass\n"; + let result = filter_minimal(code, &Language::Python); + assert!(result.contains("\"\"\"Docstring.\"\"\"")); + } + + #[test] + fn test_filter_aggressive_keeps_signatures() { + let code = "use std::io;\n\nfn main() {\n let x = 1;\n println!(\"{}\", x);\n}\n"; + let result = filter_aggressive(code, &Language::Rust); + assert!(result.contains("use std::io;")); + assert!(result.contains("fn main()")); + assert!(!result.contains("let x = 1")); + } + + #[test] + fn test_language_from_extension() { + assert_eq!(Language::from_extension("rs"), Language::Rust); + assert_eq!(Language::from_extension("py"), Language::Python); + assert_eq!(Language::from_extension("ts"), Language::TypeScript); + assert_eq!(Language::from_extension("tsx"), Language::TypeScript); + assert_eq!(Language::from_extension("go"), Language::Go); + assert_eq!(Language::from_extension("csv"), Language::Unknown); + } + + #[test] + fn test_empty_output() { + let compressor = ReadCompressor; + assert!( + compressor + .compress(&make_args("/src/main.rs"), "") + .is_none() + ); + } + + #[test] + fn test_javascript_comments_stripped() { + let mut lines = Vec::new(); + let mut ln = 1; + lines.push(format!(" {}\timport React from 'react';", ln)); + ln += 1; + lines.push(format!(" {}\t", ln)); + ln += 1; + lines.push(format!(" {}\t// TODO: remove this later", ln)); + ln += 1; + lines.push(format!(" {}\tfunction App() {{", ln)); + ln += 1; + for _ in 0..30 { + lines.push(format!(" {}\t return
hello
;", ln)); + ln += 1; + lines.push(format!(" {}\t // comment line", ln)); + ln += 1; + } + lines.push(format!(" {}\t}}", ln)); + let output = lines.join("\n"); + let args = make_args("/src/App.jsx"); + let compressor = ReadCompressor; + let result = compressor.compress(&args, &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(!compressed.contains("// TODO: remove this later")); + assert!(!compressed.contains("// comment line")); + assert!(compressed.contains("import React from 'react';")); + assert!(compressed.contains("function App()")); + } +} diff --git a/crates/compressor/src/strategy/codex/mod.rs b/crates/compressor/src/strategy/codex/mod.rs new file mode 100644 index 0000000..cf4174f --- /dev/null +++ b/crates/compressor/src/strategy/codex/mod.rs @@ -0,0 +1,129 @@ +//! Codex CLI tool output compressors. +//! +//! Codex CLI uses the same compression logic as Claude Code, but with +//! different tool names. Tool name mapping: +//! - `shell_command` → reuses the Claude `Bash` compressor +//! - `read_file` → reuses the Claude `Read` compressor +//! - `grep` → reuses the Claude `Grep` compressor +//! - `list_directory` → reuses the Claude `Glob` compressor +//! +//! Codex tool outputs are prefixed with a header block: +//! Exit code: N\nWall time: N seconds\nOutput:\n +//! This is stripped before compression so compressors see only the raw output. + +use super::ToolCompressor; + +/// Compress a Codex tool output, stripping the Codex header before delegating +/// to the appropriate compressor. +pub fn compress(tool_name: &str, arguments: &str, output: &str) -> Option { + let compressor = compressor_for(tool_name)?; + let stripped = strip_header(output); + + crate::util::compress_claude_tool_with_segment_protection(compressor, arguments, stripped) +} + +/// Strip the Codex shell output header, which always ends with "\nOutput:\n". +/// +/// Everything up to and including the first "\nOutput:\n" is treated as +/// header metadata and discarded. If the marker is not present the output +/// is returned unchanged. +fn strip_header(output: &str) -> &str { + if let Some(pos) = output.find("\nOutput:\n") { + &output[pos + "\nOutput:\n".len()..] + } else { + output + } +} + +/// Select the appropriate compressor for a Codex CLI tool name. +/// Returns `None` for tools we don't compress. +pub fn compressor_for(tool_name: &str) -> Option<&'static dyn ToolCompressor> { + match tool_name { + "exec_command" => super::claude::compressor_for("Bash"), + "shell_command" => super::claude::compressor_for("Bash"), + "read_file" => super::claude::compressor_for("Read"), + "grep" => super::claude::compressor_for("Grep"), + "list_directory" => super::claude::compressor_for("Glob"), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn compressor_for_shell_command() { + assert!(compressor_for("shell_command").is_some()); + } + + #[test] + fn compressor_for_read_file() { + assert!(compressor_for("read_file").is_some()); + } + + #[test] + fn compressor_for_grep() { + assert!(compressor_for("grep").is_some()); + } + + #[test] + fn compressor_for_list_directory() { + assert!(compressor_for("list_directory").is_some()); + } + + #[test] + fn compressor_for_unknown_tool() { + assert!(compressor_for("unknown").is_none()); + } + + #[test] + fn compressor_for_empty_string() { + assert!(compressor_for("").is_none()); + } + + #[test] + fn compressor_for_case_sensitive() { + assert!(compressor_for("Shell_Command").is_none()); + assert!(compressor_for("shell").is_none()); + } + + #[test] + fn strip_header_new_format() { + let output = "Command: zsh -lc 'ls'\nChunk ID: abc123\nWall time: 0.0000 seconds\nProcess exited with code 0\nOriginal token count: 42\nOutput:\nhello\nworld\n"; + assert_eq!(strip_header(output), "hello\nworld\n"); + } + + #[test] + fn strip_header_full() { + let output = "Exit code: 0\nWall time: 0 seconds\nOutput:\nhello\nworld\n"; + assert_eq!(strip_header(output), "hello\nworld\n"); + } + + #[test] + fn strip_header_partial() { + let output = "Exit code: 1\nOutput:\nerror text\n"; + assert_eq!(strip_header(output), "error text\n"); + } + + #[test] + fn strip_header_none() { + let output = "plain output\n"; + assert_eq!(strip_header(output), "plain output\n"); + } + + #[test] + fn strip_header_output_at_start_of_content() { + // Starts with "Output:\n" but no preceding "\n" — not a header boundary + let output = "Output:\nsome content\n"; + assert_eq!(strip_header(output), "Output:\nsome content\n"); + } + + #[test] + fn compress_strips_header_before_compressing() { + // ls -la output wrapped in Codex header — should compress successfully + let args = r#"{"command":"ls -la","workdir":"/tmp"}"#; + let output = "Exit code: 0\nWall time: 0 seconds\nOutput:\ntotal 8\ndrwxr-xr-x 2 user staff 64 Jan 1 12:00 .\ndrwxr-xr-x 2 user staff 64 Jan 1 12:00 ..\n-rw-r--r-- 1 user staff 10 Jan 1 12:00 file.txt\n"; + assert!(compress("shell_command", args, output).is_some()); + } +} diff --git a/crates/compressor/src/strategy/mod.rs b/crates/compressor/src/strategy/mod.rs new file mode 100644 index 0000000..f6e7c1f --- /dev/null +++ b/crates/compressor/src/strategy/mod.rs @@ -0,0 +1,12 @@ +pub mod bash; +pub mod claude; +pub mod codex; +pub mod opencode; +pub mod util; + +/// Trait for compressing the output of a specific tool. +/// `arguments` is the raw JSON string from tool_call.function.arguments. +/// Returns `Some(compressed)` if compression was applied, `None` to leave as-is. +pub trait ToolCompressor { + fn compress(&self, arguments: &str, output: &str) -> Option; +} diff --git a/crates/compressor/src/strategy/opencode/mod.rs b/crates/compressor/src/strategy/opencode/mod.rs new file mode 100644 index 0000000..3171602 --- /dev/null +++ b/crates/compressor/src/strategy/opencode/mod.rs @@ -0,0 +1,63 @@ +//! OpenCode tool output compressors. +//! +//! OpenCode uses the same compression logic as Claude Code, but with lowercase +//! tool names. The `read` tool has a different output format (XML-wrapped with +//! `N:` line number prefixes) and uses its own compressor. + +mod read; + +use super::ToolCompressor; + +/// Select the appropriate compressor for an OpenCode tool name. +/// Returns `None` for tools we don't compress. +pub fn compressor_for(tool_name: &str) -> Option<&'static dyn ToolCompressor> { + match tool_name { + "bash" => super::claude::compressor_for("Bash"), + "read" => Some(&read::ReadCompressor), + "grep" => super::claude::compressor_for("Grep"), + "glob" => super::claude::compressor_for("Glob"), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn compressor_for_bash() { + assert!(compressor_for("bash").is_some()); + } + + #[test] + fn compressor_for_read() { + assert!(compressor_for("read").is_some()); + } + + #[test] + fn compressor_for_grep() { + assert!(compressor_for("grep").is_some()); + } + + #[test] + fn compressor_for_glob() { + assert!(compressor_for("glob").is_some()); + } + + #[test] + fn compressor_for_unknown_tool() { + assert!(compressor_for("unknown").is_none()); + } + + #[test] + fn compressor_for_empty_string() { + assert!(compressor_for("").is_none()); + } + + #[test] + fn compressor_for_case_sensitive() { + // Tool names are case-sensitive — "Bash" (PascalCase) is not an OpenCode tool + assert!(compressor_for("Bash").is_none()); + assert!(compressor_for("Read").is_none()); + } +} diff --git a/crates/compressor/src/strategy/opencode/read.rs b/crates/compressor/src/strategy/opencode/read.rs new file mode 100644 index 0000000..0805c25 --- /dev/null +++ b/crates/compressor/src/strategy/opencode/read.rs @@ -0,0 +1,200 @@ +//! Compressor for the OpenCode `read` tool output. +//! +//! OpenCode's read tool outputs content in a different format than Claude Code: +//! +//! ```text +//! filepath +//! file +//! 1:line content +//! 2:more content +//! +//! ``` +//! +//! This compressor extracts the file path and content, strips the `N:` line +//! number prefixes, then applies the same language-aware filtering as the +//! Claude Code Read compressor. + +use std::path::Path; + +use crate::strategy::ToolCompressor; +use crate::strategy::claude::read::{ + Language, filter_minimal_numbered, format_numbered_lines, parse_numbered_lines, +}; + +/// Below this many content lines, don't compress at all. +const SMALL_THRESHOLD: usize = 50; + +pub struct ReadCompressor; + +impl ToolCompressor for ReadCompressor { + fn compress(&self, _arguments: &str, output: &str) -> Option { + let file_path = extract_path(output); + let raw_content = extract_content(output)?; + let (fmt, numbered) = parse_numbered_lines(&raw_content); + + if numbered.len() < SMALL_THRESHOLD { + return None; + } + + let lang = file_path + .as_deref() + .and_then(|p| Path::new(p).extension()) + .and_then(|e| e.to_str()) + .map(Language::from_extension) + .unwrap_or(Language::Unknown); + + let filtered = filter_minimal_numbered(&numbered, &lang); + + if filtered.is_empty() { + return None; + } + + let compressed = format_numbered_lines(&filtered, fmt); + + // Only return if we actually saved something meaningful (>10%) + let threshold = raw_content.len() * 9 / 10; + if compressed.len() >= threshold { + return None; + } + + Some(compressed) + } +} + +/// Extract the file path from `...` in the output. +fn extract_path(output: &str) -> Option { + let start = output.find("")? + "".len(); + let end = output[start..].find("")? + start; + Some(output[start..end].trim().to_string()) +} + +/// Extract the content from `...` in the output. +fn extract_content(output: &str) -> Option { + let start = output.find("")? + "".len(); + let end = output[start..].find("")? + start; + Some(output[start..end].to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::strategy::claude::read::LineFormat; + + fn make_output(path: &str, lines: usize) -> String { + let mut content = String::new(); + content.push_str(&format!("{}\n", path)); + content.push_str("file\n"); + content.push_str(""); + // 4 header lines + body + content.push_str("1:use std::io;\n"); + content.push_str("2:\n"); + content.push_str("3:// This is a comment\n"); + content.push_str("4:/// Doc comment\n"); + content.push_str("5:fn main() {\n"); + let mut ln = 6; + for _ in 0..lines { + content.push_str(&format!("{}: println!(\"hello\");\n", ln)); + ln += 1; + content.push_str(&format!("{}: // TODO: refactor\n", ln)); + ln += 1; + } + content.push_str(&format!("{}:{{}}\n", ln)); + content.push_str(""); + content + } + + #[test] + fn test_extract_path() { + let output = "/src/main.rs\nfile\n1:hello"; + assert_eq!(extract_path(output), Some("/src/main.rs".to_string())); + } + + #[test] + fn test_extract_path_missing() { + assert_eq!(extract_path("1:hello"), None); + } + + #[test] + fn test_extract_content() { + let output = "/src/main.rs\n1:hello\n2:world\n"; + assert_eq!( + extract_content(output), + Some("1:hello\n2:world\n".to_string()) + ); + } + + #[test] + fn test_extract_content_missing() { + assert_eq!(extract_content("/src/main.rs"), None); + } + + #[test] + fn test_parse_numbered_lines() { + let input = "1:use std::io;\n2:\n3:fn main() {\n"; + let (fmt, result) = parse_numbered_lines(input); + assert_eq!(fmt, LineFormat::Colon); + assert_eq!(result[0], (Some(1), "use std::io;".to_string())); + assert_eq!(result[1], (Some(2), "".to_string())); + assert_eq!(result[2], (Some(3), "fn main() {".to_string())); + } + + #[test] + fn test_parse_numbered_lines_non_numbered() { + let input = "not numbered\n1:numbered\n"; + let (_, result) = parse_numbered_lines(input); + assert_eq!(result[0], (None, "not numbered".to_string())); + assert_eq!(result[1], (Some(1), "numbered".to_string())); + } + + #[test] + fn test_parse_numbered_lines_content_with_colons() { + let input = "10:http://example.com\n"; + let (_, result) = parse_numbered_lines(input); + assert_eq!(result[0], (Some(10), "http://example.com".to_string())); + } + + #[test] + fn test_compressed_output_preserves_line_numbers() { + let output = make_output("/src/main.rs", 60); + let compressor = ReadCompressor; + let compressed = compressor.compress("{}", &output).unwrap(); + // Line 3 (comment) is stripped; line 4 (doc comment) should keep number 4. + assert!( + compressed.contains("4:/// Doc comment"), + "doc comment should keep line number 4" + ); + assert!( + compressed.contains("1:use std::io;"), + "import should keep line number 1" + ); + // Stripped comment must not appear + assert!(!compressed.contains("// This is a comment")); + } + + #[test] + fn test_small_output_not_compressed() { + let output = make_output("/src/main.rs", 10); + let compressor = ReadCompressor; + assert!(compressor.compress("{}", &output).is_none()); + } + + #[test] + fn test_strips_comments_rust() { + let output = make_output("/src/main.rs", 60); + let compressor = ReadCompressor; + let result = compressor.compress("{}", &output); + assert!(result.is_some()); + let compressed = result.unwrap(); + assert!(!compressed.contains("// This is a comment")); + assert!(compressed.contains("/// Doc comment")); + assert!(compressed.contains("use std::io;")); + assert!(compressed.contains("fn main()")); + } + + #[test] + fn test_no_content_tag_returns_none() { + let output = "/src/main.rs\nfile\nsome raw content"; + let compressor = ReadCompressor; + assert!(compressor.compress("{}", output).is_none()); + } +} diff --git a/crates/compressor/src/strategy/util.rs b/crates/compressor/src/strategy/util.rs new file mode 100644 index 0000000..fb329ec --- /dev/null +++ b/crates/compressor/src/strategy/util.rs @@ -0,0 +1,119 @@ +//! Shared utilities for compression strategies. + +use lazy_static::lazy_static; +use regex::Regex; + +lazy_static! { + /// Matches `` blocks, including newlines. + pub static ref SYSTEM_REMINDER_RE: Regex = + Regex::new(r"(?s).*?").unwrap(); +} + +/// A segment of text that is either eligible for compression or must be passed through verbatim. +#[derive(Debug, PartialEq)] +pub enum TextSegment { + Compressible(String), + /// Content that must be preserved exactly (e.g. `` blocks). + Protected(String), +} + +/// Split `text` into alternating compressible / protected segments. +/// +/// The result always starts and ends with a `Compressible` segment (possibly empty). +pub fn split_into_segments(text: &str) -> Vec { + let mut segments = Vec::new(); + let mut last_end = 0usize; + for m in SYSTEM_REMINDER_RE.find_iter(text) { + segments.push(TextSegment::Compressible( + text[last_end..m.start()].to_string(), + )); + segments.push(TextSegment::Protected(m.as_str().to_string())); + last_end = m.end(); + } + segments.push(TextSegment::Compressible(text[last_end..].to_string())); + segments +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── split_into_segments ─────────────────────────────────────────────── + + #[test] + fn split_no_protected_returns_single_compressible() { + let text = "plain text with no tags"; + let segments = split_into_segments(text); + assert_eq!(segments, vec![TextSegment::Compressible(text.to_string())]); + } + + #[test] + fn split_block_in_middle() { + let text = "beforetag contentafter"; + let segments = split_into_segments(text); + assert_eq!( + segments, + vec![ + TextSegment::Compressible("before".to_string()), + TextSegment::Protected( + "tag content".to_string() + ), + TextSegment::Compressible("after".to_string()), + ] + ); + } + + #[test] + fn split_block_at_start() { + let text = "tagafter"; + let segments = split_into_segments(text); + assert_eq!( + segments, + vec![ + TextSegment::Compressible(String::new()), + TextSegment::Protected("tag".to_string()), + TextSegment::Compressible("after".to_string()), + ] + ); + } + + #[test] + fn split_block_at_end() { + let text = "beforetag"; + let segments = split_into_segments(text); + assert_eq!( + segments, + vec![ + TextSegment::Compressible("before".to_string()), + TextSegment::Protected("tag".to_string()), + TextSegment::Compressible(String::new()), + ] + ); + } + + #[test] + fn split_multiple_blocks() { + let text = "axbyc"; + let segments = split_into_segments(text); + assert_eq!( + segments, + vec![ + TextSegment::Compressible("a".to_string()), + TextSegment::Protected("x".to_string()), + TextSegment::Compressible("b".to_string()), + TextSegment::Protected("y".to_string()), + TextSegment::Compressible("c".to_string()), + ] + ); + } + + #[test] + fn split_multiline_block() { + let text = "before\n\nmultiline\ncontent\n\nafter"; + let segments = split_into_segments(text); + assert_eq!(segments.len(), 3); + assert!( + matches!(&segments[1], TextSegment::Protected(s) if s.contains("multiline\ncontent")) + ); + } +} diff --git a/crates/compressor/src/util.rs b/crates/compressor/src/util.rs new file mode 100644 index 0000000..25bdc01 --- /dev/null +++ b/crates/compressor/src/util.rs @@ -0,0 +1,172 @@ +//! Shared utilities for compression. + +use crate::strategy::ToolCompressor; +use crate::strategy::util::{TextSegment, split_into_segments}; + +/// Wrap a call to `compressor.compress()`, preserving any `` blocks verbatim. +/// +/// Strategy: +/// 1. Split `output` into compressible / protected segments (using shared util). +/// 2. If no protected segments → delegate to compressor directly (fast path). +/// 3. Concatenate all compressible text → pass to compressor as one unit. +/// 4. If compressor returns `None` → return `None` (caller keeps original). +/// 5. Rebuild in segment order: the first non-empty compressible slot receives the +/// compressed output; remaining compressible slots are skipped (their content was +/// included in the combined input); protected slots are emitted verbatim. +pub fn compress_claude_tool_with_segment_protection( + compressor: &dyn ToolCompressor, + arguments: &str, + output: &str, +) -> Option { + // Never compress tool outputs containing error or persisted-output tags — + // these carry important context that must be preserved verbatim. + if output.contains("") || output.contains("") { + return None; + } + + let segments = split_into_segments(output); + + if !segments + .iter() + .any(|s| matches!(s, TextSegment::Protected(_))) + { + return compressor.compress(arguments, output); + } + + let compressible_combined: String = segments + .iter() + .filter_map(|s| match s { + TextSegment::Compressible(t) => Some(t.as_str()), + _ => None, + }) + .collect(); + + if compressible_combined.trim().is_empty() { + // All content is protected — nothing to compress; tell caller to keep original. + return None; + } + + // Compress all compressible segments as one unit: tool compressors rely on full context + // (line-count thresholds, file-type detection) that would be broken by per-segment calls. + // The result is placed at the first non-empty compressible slot; later slots are dropped + // because their content was already included in the combined input. + let compressed = compressor.compress(arguments, &compressible_combined)?; + + let mut result = String::new(); + let mut compressed_inserted = false; + for segment in &segments { + match segment { + TextSegment::Protected(text) => result.push_str(text), + TextSegment::Compressible(text) => { + if !compressed_inserted && !text.trim().is_empty() { + result.push_str(&compressed); + compressed_inserted = true; + } + } + } + } + if !compressed_inserted { + result.push_str(&compressed); + } + + Some(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A minimal compressor that returns the first half of any sufficiently long input. + struct HalfCompressor; + impl crate::strategy::ToolCompressor for HalfCompressor { + fn compress(&self, _args: &str, output: &str) -> Option { + if output.len() < 10 { + return None; + } + Some(output[..output.len() / 2].to_string()) + } + } + + #[test] + fn protect_system_reminder_at_start() { + let reminder = "secret injection"; + let body = " this is long enough compressible content for the test"; + let output = format!("{reminder}{body}"); + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", &output); + + let result = result.expect("should return Some"); + assert!( + result.contains(reminder), + "reminder must be preserved verbatim; got: {result:?}" + ); + // The compressed portion is `body` halved; reminder comes before it. + assert!(result.starts_with(reminder), "reminder should be at start"); + } + + #[test] + fn protect_system_reminder_at_end() { + let reminder = "secret injection"; + let body = "this is long enough compressible content for the test "; + let output = format!("{body}{reminder}"); + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", &output); + + let result = result.expect("should return Some"); + assert!( + result.contains(reminder), + "reminder must be preserved verbatim; got: {result:?}" + ); + assert!(result.ends_with(reminder), "reminder should be at end"); + } + + #[test] + fn protect_no_system_reminder_delegates_directly() { + let output = "plain compressible output long enough to compress"; + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", output); + + // HalfCompressor returns first half; no reminder → direct delegation + let result = result.expect("should compress plain text"); + assert_eq!(result, &output[..output.len() / 2]); + } + + #[test] + fn protect_all_system_reminder_returns_none() { + // All content is protected — compressor should not be invoked; returns None + let output = "only protected"; + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", output); + + assert!( + result.is_none(), + "all-protected input must return None; got: {result:?}" + ); + } + + #[test] + fn skip_compression_when_tool_use_error_present() { + let output = + "some error message plus long compressible content"; + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", output); + + assert!( + result.is_none(), + "tool_use_error output must not be compressed; got: {result:?}" + ); + } + + #[test] + fn skip_compression_when_persisted_output_present() { + let output = + "important data plus long compressible content"; + + let result = compress_claude_tool_with_segment_protection(&HalfCompressor, "{}", output); + + assert!( + result.is_none(), + "persisted-output must not be compressed; got: {result:?}" + ); + } +}