Skip to content

Commit

Permalink
Merge pull request #28 from seapagan/exclude-only
Browse files Browse the repository at this point in the history
  • Loading branch information
seapagan authored Nov 27, 2024
2 parents 1370569 + 8c880e5 commit 1515598
Show file tree
Hide file tree
Showing 8 changed files with 302 additions and 66 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Rust

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

env:
CARGO_TERM_COLOR: always

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
9 changes: 7 additions & 2 deletions Makefile.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
[tasks.cov]
[tasks.test]
# this needs the crates 'nextest' and 'cargo-llvm-cov' installed locally
command = "cargo"
args = ["tarpaulin", "--out", "html"]
args = ["llvm-cov", "nextest"]

[tasks.test-html]
command = "cargo"
args = ["llvm-cov", "nextest", "--html"]
39 changes: 25 additions & 14 deletions README-cratesio.md
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ Options:
-l, --lnumbers Add line numbers to each code file in the output.
-t, --token <TOKEN> GitHub personal access token (required for private repos and to pass rate limits)
-e, --extend-exclude <PATTERN> Additional file pattern to exclude (can be specified multiple times)
-x, --exclude <PATTERN> File pattern to exclude, replacing the default ignore list (can be specified multiple times)
-V, --version Print version information and exit
-h, --help Print help
```
Expand Down Expand Up @@ -351,31 +352,41 @@ Available configuration options:
- `line_numbers`: Whether to add line numbers by default (default: false)
- `token`: Your GitHub personal access token (default: none)
- `extend_exclude`: Additional file patterns to exclude (default: none)
- `exclude`: File patterns to exclude, replacing the default ignore list
(default: none)
The `extend_exclude` option can be specified either by using multiple `-e` flags
on the command line:
The `extend_exclude` and `exclude` options can be specified either by using
multiple `-e` or `-x` flags on the command line:
```bash
bundlerepo user/repo -e "*.md" -e "*.txt" -e "docs/*"
bundlerepo user/repo -x "*.exe" -x "*.dll" -x "node_modules/*"
```
Or as an array in the TOML configuration file:
Or as arrays in the TOML configuration file:
```toml
extend_exclude = ["*.md", "*.txt", "docs/*"]
exclude = ["*.exe", "*.dll", "node_modules/*"]
```
These patterns will be **added** to the default ignore list.
> The `extend_exclude` option is useful for excluding additional files that
> aren't in the default ignore list but that you don't want to include in your
> XML output. This can help reduce token usage and remove irrelevant files from
> the LLM context.
>
> Storing your GitHub token in the configuration file can be more convenient
> than passing it via command line, especially if you frequently work with
> private repositories. Just be sure to keep your configuration file secure -
> use the Global TOML file rather than the Local (in repo) TOML file.
The `extend_exclude` patterns will be **added** to the default ignore list,
while the `exclude` patterns will **replace** the default ignore list entirely.
**Important**: When the `exclude` option is used (either via command line or
config file), both the default ignore list and any `extend_exclude` patterns are
completely ignored. The `exclude` patterns become the only ignore rules in
effect.
**Note**: The `extend_exclude` option is useful for excluding additional files
that aren't in the default ignore list but that you don't want to include in
your XML output. The `exclude` option gives you complete control over what files
are ignored, replacing the built-in ignore list. Both options can help reduce
token usage and remove irrelevant files from the LLM context.
Storing your GitHub token in the configuration file can be more convenient than
passing it via command line, especially if you frequently work with private
repositories. Just be sure to keep your configuration file secure.
## Ignored Files
Expand Down
26 changes: 20 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ Options:
-l, --lnumbers Add line numbers to each code file in the output.
-t, --token <TOKEN> GitHub personal access token (required for private repos and to pass rate limits)
-e, --extend-exclude <PATTERN> Additional file pattern to exclude (can be specified multiple times)
-x, --exclude <PATTERN> File pattern to exclude, replacing the default ignore list (can be specified multiple times)
-V, --version Print version information and exit
-h, --help Print help
```
Expand All @@ -345,6 +346,7 @@ clipboard = false
line_numbers = true
token = "your-github-token"
extend_exclude = ["*.md", "*.txt", "docs/*"] # Additional patterns to exclude
exclude = ["*.exe", "*.dll", "node_modules/*"] # File patterns to exclude
```

All settings are optional. Settings are applied in the following order of
Expand All @@ -364,28 +366,40 @@ Available configuration options:
- `line_numbers`: Whether to add line numbers by default (default: false)
- `token`: Your GitHub personal access token (default: none)
- `extend_exclude`: Additional file patterns to exclude (default: none)
- `exclude`: File patterns to exclude, replacing the default ignore list
(default: none)

The `extend_exclude` option can be specified either by using multiple `-e` flags
on the command line:
The `extend_exclude` and `exclude` options can be specified either by using
multiple `-e` or `-x` flags on the command line:

```bash
bundlerepo user/repo -e "*.md" -e "*.txt" -e "docs/*"
bundlerepo user/repo -x "*.exe" -x "*.dll" -x "node_modules/*"
```

Or as an array in the TOML configuration file:
Or as arrays in the TOML configuration file:

```toml
extend_exclude = ["*.md", "*.txt", "docs/*"]
exclude = ["*.exe", "*.dll", "node_modules/*"]
```

These patterns will be **added** to the default ignore list.
The `extend_exclude` patterns will be **added** to the default ignore list,
while the `exclude` patterns will **replace** the default ignore list entirely.

> [!IMPORTANT]
>
> When the `exclude` option is used (either via command line or config file),
> both the default ignore list and any `extend_exclude` patterns are completely
> ignored. The `exclude` patterns become the only ignore rules in effect.
> [!TIP]
>
> The `extend_exclude` option is useful for excluding additional files that
> aren't in the default ignore list but that you don't want to include in your
> XML output. This can help reduce token usage and remove irrelevant files from
> the LLM context.
> XML output. The `exclude` option gives you complete control over what files
> are ignored, replacing the built-in ignore list. Both options can help reduce
> token usage and remove irrelevant files from the LLM context.
>
> Storing your GitHub token in the configuration file can be more convenient
> than passing it via command line, especially if you frequently work with
Expand Down
9 changes: 9 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ pub struct Flags {
action = ArgAction::Append
)]
pub extend_exclude: Option<Vec<String>>,

#[arg(
long = "exclude",
short = 'x',
value_name = "PATTERN",
help = "Replace the existing exclude patterns with the specified pattern(s). Can be specified multiple times.",
action = ArgAction::Append
)]
pub exclude: Option<Vec<String>>,
}

pub fn version_info() -> String {
Expand Down
59 changes: 36 additions & 23 deletions src/filelist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,45 @@ pub struct FileTree {
pub fn list_files_in_repo(
repo_path: &PathBuf,
extend_exclude: Option<&[String]>,
exclude: Option<&[String]>,
) -> Vec<String> {
let mut file_list = Vec::new();

// Base ignore patterns as String
let mut ignore_patterns: Vec<String> = vec![
r"(?i)\.gitignore",
r"(?i)renovate\.json",
r"(?i)requirement.*\.txt",
r"(?i)\.lock$",
r"(?i)license(\..*)?",
// r"(?i)todo\..*",
r"(?i)\.github",
r"(?i)\.git",
r"(?i)\.vscode",
]
.into_iter()
.map(String::from)
.collect();

// Add additional patterns if provided
if let Some(patterns) = extend_exclude {
ignore_patterns.extend(patterns.iter().map(|p| {
let escaped = regex::escape(p);
format!(r"(?i){}", escaped)
}));
}
// Initialize ignore patterns based on whether exclude is set
let ignore_patterns: Vec<String> = if let Some(patterns) = exclude {
// If exclude is set, use only those patterns
patterns
.iter()
.map(|p| {
let escaped = regex::escape(p);
format!(r"(?i){}", escaped)
})
.collect()
} else {
// Otherwise use default patterns
let mut patterns: Vec<String> = vec![
r"(?i)\.gitignore",
r"(?i)renovate\.json",
r"(?i)requirement.*\.txt",
r"(?i)\.lock$",
r"(?i)license(\..*)?",
r"(?i)\.github",
r"(?i)\.git",
r"(?i)\.vscode",
]
.into_iter()
.map(String::from)
.collect();

// Add additional patterns if provided
if let Some(extend_patterns) = extend_exclude {
patterns.extend(extend_patterns.iter().map(|p| {
let escaped = regex::escape(p);
format!(r"(?i){}", escaped)
}));
}
patterns
};

let regex_list: Vec<Regex> = ignore_patterns
.iter()
Expand Down
Loading

0 comments on commit 1515598

Please sign in to comment.