From 440301e470006843d5e203f66bc97718b007ea56 Mon Sep 17 00:00:00 2001 From: Lukasz Date: Fri, 12 Dec 2025 08:57:47 +0100 Subject: [PATCH] Add configurable work dir to optimize the repo clone --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 9 ++ README.md | 50 +++++++ autopr/__init__.py | 11 +- autopr/config.py | 18 ++- autopr/workdir.py | 41 +++++- test/test_config.py | 144 +++++++++++++++++++++ test/test_workdir_custom_repos.py | 208 ++++++++++++++++++++++++++++++ 8 files changed, 474 insertions(+), 9 deletions(-) create mode 100644 test/test_workdir_custom_repos.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b32fc98..06749d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: - id: mypy additional_dependencies: ['types-PyYAML==6.0.1'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eb7a24..ed5f079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## Unreleased + +- Add custom repositories directory support + - Add `custom_repos_dir` configuration option to use existing cloned repositories + - Add `--repos-dir` CLI flag to override repository directory location + - Supports environment variable expansion with `${VAR_NAME}` syntax + - Useful for organizations with >1k repositories to avoid duplicating disk space + - Priority order: CLI flag > config file > default `repos/` subdirectory + ## 1.2.0 - Add environment variable expansion support in `config.yaml` for credentials diff --git a/README.md b/README.md index e311f66..5adb78b 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,56 @@ If a referenced environment variable is not set, auto-pr will display a clear er Alternatively, if you wish to keep your API Key outside of `config.yaml` without modifying the config file, you can set the env var `APR_API_KEY` with your GitHub Token +#### Using a Custom Repositories Directory + +By default, auto-pr clones repositories into a `repos/` subdirectory within your working directory. If you already have repositories cloned locally (e.g., >1k repositories), you can configure auto-pr to use your existing directory instead: + +**Option 1: Config file** + +Add `custom_repos_dir` to your `config.yaml`: + +```yaml +credentials: + api_key: ${GITHUB_API_KEY} + ssh_key_file: ${HOME}/.ssh/id_rsa +pr: + title: 'My awesome change' + branch: auto-pr + message: Update dependencies + body: Automated update + draft: false +repositories: + - mode: add + match_owner: myorg +update_command: + - echo + - "Hello" +custom_repos_dir: /path/to/existing/repos # Point to your existing cloned repos +``` + +The `custom_repos_dir` field supports environment variable expansion: + +```yaml +custom_repos_dir: ${HOME}/all-my-repos +``` + +**Option 2: CLI flag** + +Use the `--repos-dir` option when running commands: + +```bash +auto-pr --repos-dir=/path/to/existing/repos pull +auto-pr --repos-dir=/path/to/existing/repos test +auto-pr --repos-dir=/path/to/existing/repos run +``` + +The CLI option takes precedence over the config file setting. + +**Important notes:** +- The custom repos directory must exist before running `auto-pr init` +- auto-pr will use the existing cloned repositories and apply its cleanup operations as normal +- This is useful for organizations with many repositories to avoid duplicating disk space + ### Repositories You can define the list of repositories to pull and build into the database to update using a list of rules. diff --git a/autopr/__init__.py b/autopr/__init__.py index 8efcbc7..0abce37 100644 --- a/autopr/__init__.py +++ b/autopr/__init__.py @@ -52,6 +52,12 @@ def _ensure_set_up(cfg: config.Config, db: database.Database): ), help="Working directory to store configuration and repositories", ) +@click.option( + "--repos-dir", + "repos_dir_path", + type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), + help="Custom directory containing cloned repositories", +) @click.option( "--debug/--no-debug", envvar="APR_DEBUG", @@ -60,9 +66,10 @@ def _ensure_set_up(cfg: config.Config, db: database.Database): help="Whether to enable debug mode or not", ) @click.version_option(__version__, message="%(prog)s: %(version)s") -def cli(wd_path: str, debug: bool): +def cli(wd_path: str, repos_dir_path: Optional[str], debug: bool): global WORKDIR - WORKDIR = workdir.get(wd_path) + custom_repos = Path(repos_dir_path) if repos_dir_path else None + WORKDIR = workdir.get(wd_path, custom_repos_dir=custom_repos) set_debug(debug) diff --git a/autopr/config.py b/autopr/config.py index 122d53b..6224373 100644 --- a/autopr/config.py +++ b/autopr/config.py @@ -103,6 +103,22 @@ class Config: pr: PrTemplate repositories: List[Filter] = field(default_factory=list) # is equal to assigning [] update_command: List[str] = field(default_factory=list) + custom_repos_dir: Optional[str] = None -CONFIG_SCHEMA = marshmallow_dataclass.class_schema(Config)() +class ConfigSchema(Schema): + credentials = fields.Nested(CredentialsSchema, required=True) + pr = fields.Nested(PR_TEMPLATE_SCHEMA, required=True) + repositories = fields.List(fields.Nested(FILTERS_SCHEMA), load_default=list) + update_command = fields.List(fields.Str(), load_default=list) + custom_repos_dir = fields.Str(required=False, allow_none=True) + + @post_load + def expand_config_env_vars(self, data: Dict[str, Any], **kwargs: Any) -> Config: + """Expand environment variables in custom_repos_dir after loading.""" + if "custom_repos_dir" in data and data["custom_repos_dir"] is not None: + data["custom_repos_dir"] = expand_env_vars(data["custom_repos_dir"]) + return Config(**data) + + +CONFIG_SCHEMA = ConfigSchema() diff --git a/autopr/workdir.py b/autopr/workdir.py index 84e20e0..51c520c 100644 --- a/autopr/workdir.py +++ b/autopr/workdir.py @@ -1,5 +1,6 @@ import json from pathlib import Path +from typing import Optional import yaml from marshmallow import ValidationError @@ -14,9 +15,11 @@ class WorkDir: location: Path + _custom_repos_dir: Optional[Path] - def __init__(self, location: Path): + def __init__(self, location: Path, custom_repos_dir: Optional[Path] = None): self.location = location + self._custom_repos_dir = custom_repos_dir @property def config_file(self) -> Path: @@ -28,12 +31,40 @@ def database_file(self) -> Path: @property def repos_dir(self) -> Path: + # Priority: 1. CLI option, 2. Config file, 3. Default + if self._custom_repos_dir: + return self._custom_repos_dir + + # Try reading from config + if self.config_file.exists(): + try: + cfg = read_config(self) + if cfg.custom_repos_dir: + return Path(cfg.custom_repos_dir) + except Exception: + # If config reading fails, fall back to default + pass + + # Default behavior return self.location / REPOS_DIR_NAME def init(wd: WorkDir, credentials: config.Credentials): - # create work dir and repos dir - wd.repos_dir.mkdir(parents=True, exist_ok=True) + # Determine repos dir and validate/create + repos_dir_to_use = wd.repos_dir + is_custom_repos_dir = wd._custom_repos_dir is not None or ( + wd.config_file.exists() and read_config(wd).custom_repos_dir is not None + ) + + if is_custom_repos_dir: + # Custom repos dir - must exist + if not repos_dir_to_use.exists(): + raise CliException( + f"Custom repos directory does not exist: {repos_dir_to_use}" + ) + else: + # Default repos dir - create it + repos_dir_to_use.mkdir(parents=True, exist_ok=True) # create default config if not wd.config_file.exists(): @@ -109,10 +140,10 @@ def read_database(wd: WorkDir) -> database.Database: raise CliException(f"Failed to deserialize database: {err.messages}") -def get(wd_path: str) -> WorkDir: +def get(wd_path: str, custom_repos_dir: Optional[Path] = None) -> WorkDir: if wd_path: workdir_path = Path(wd_path) else: workdir_path = Path.cwd() - return WorkDir(workdir_path) + return WorkDir(workdir_path, custom_repos_dir=custom_repos_dir) diff --git a/test/test_config.py b/test/test_config.py index 9b8c55a..380634c 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -279,5 +279,149 @@ def test_credentials_roundtrip_with_env_vars(self): del os.environ["TEST_SSH_KEY"] +class TestConfigSchema(unittest.TestCase): + def test_config_with_custom_repos_dir(self): + """Test that Config correctly parses custom_repos_dir field""" + from autopr.config import CONFIG_SCHEMA + + data = { + "credentials": {"api_key": "test_key", "ssh_key_file": "/test/key"}, + "pr": { + "title": "Test PR", + "message": "Test message", + "branch": "test-branch", + "body": "Test body", + "draft": False, + }, + "repositories": [], + "update_command": ["echo", "test"], + "custom_repos_dir": "/custom/repos/path", + } + + cfg = CONFIG_SCHEMA.load(data) + + self.assertEqual(cfg.custom_repos_dir, "/custom/repos/path") + + def test_config_without_custom_repos_dir(self): + """Test that Config works when custom_repos_dir is not specified""" + from autopr.config import CONFIG_SCHEMA + + data = { + "credentials": {"api_key": "test_key", "ssh_key_file": "/test/key"}, + "pr": { + "title": "Test PR", + "message": "Test message", + "branch": "test-branch", + "body": "Test body", + "draft": False, + }, + "repositories": [], + "update_command": ["echo", "test"], + } + + cfg = CONFIG_SCHEMA.load(data) + + self.assertIsNone(cfg.custom_repos_dir) + + def test_config_custom_repos_dir_with_env_var(self): + """Test that custom_repos_dir correctly expands environment variables""" + from autopr.config import CONFIG_SCHEMA + + os.environ["TEST_REPOS_BASE"] = "/home/user/repos" + + data = { + "credentials": {"api_key": "test_key", "ssh_key_file": "/test/key"}, + "pr": { + "title": "Test PR", + "message": "Test message", + "branch": "test-branch", + "body": "Test body", + "draft": False, + }, + "repositories": [], + "update_command": ["echo", "test"], + "custom_repos_dir": "${TEST_REPOS_BASE}/projects", + } + + cfg = CONFIG_SCHEMA.load(data) + + self.assertEqual(cfg.custom_repos_dir, "/home/user/repos/projects") + + del os.environ["TEST_REPOS_BASE"] + + def test_config_custom_repos_dir_with_missing_env_var(self): + """Test that missing env var in custom_repos_dir raises error""" + from autopr.config import CONFIG_SCHEMA + + data = { + "credentials": {"api_key": "test_key", "ssh_key_file": "/test/key"}, + "pr": { + "title": "Test PR", + "message": "Test message", + "branch": "test-branch", + "body": "Test body", + "draft": False, + }, + "repositories": [], + "update_command": ["echo", "test"], + "custom_repos_dir": "${MISSING_REPOS_VAR}", + } + + with self.assertRaises(ValueError) as context: + CONFIG_SCHEMA.load(data) + self.assertIn("MISSING_REPOS_VAR", str(context.exception)) + self.assertIn("not set", str(context.exception)) + + def test_config_custom_repos_dir_serialization(self): + """Test that Config correctly serializes custom_repos_dir""" + from autopr.config import CONFIG_SCHEMA, Config, Credentials, PrTemplate + + credentials = Credentials(api_key="test_key", ssh_key_file="/test/key") + pr = PrTemplate( + title="Test PR", + message="Test message", + branch="test-branch", + body="Test body", + draft=False, + ) + cfg = Config( + credentials=credentials, + pr=pr, + repositories=[], + update_command=["echo", "test"], + custom_repos_dir="/custom/repos", + ) + + data = CONFIG_SCHEMA.dump(cfg) + + self.assertIn("custom_repos_dir", data) + self.assertEqual(data["custom_repos_dir"], "/custom/repos") + + def test_config_roundtrip_with_custom_repos_dir(self): + """Test serialize -> deserialize roundtrip with custom_repos_dir""" + from autopr.config import CONFIG_SCHEMA, Config, Credentials, PrTemplate + + os.environ["TEST_REPOS_DIR"] = "/home/user/all-repos" + + credentials = Credentials(api_key="test_key", ssh_key_file="/test/key") + pr = PrTemplate() + cfg = Config( + credentials=credentials, + pr=pr, + custom_repos_dir="${TEST_REPOS_DIR}", + ) + + # Serialize + data = CONFIG_SCHEMA.dump(cfg) + + # Deserialize + restored = CONFIG_SCHEMA.load(data) + + # custom_repos_dir should be expanded now + self.assertEqual(restored.custom_repos_dir, "/home/user/all-repos") + + del os.environ["TEST_REPOS_DIR"] + + if __name__ == "__main__": unittest.main() diff --git a/test/test_workdir_custom_repos.py b/test/test_workdir_custom_repos.py new file mode 100644 index 0000000..166e950 --- /dev/null +++ b/test/test_workdir_custom_repos.py @@ -0,0 +1,208 @@ +import os +from pathlib import Path + +import pytest + +from autopr import config, workdir +from autopr.util import CliException + + +def test_workdir_custom_repos_dir_via_constructor(tmp_path): + """Test WorkDir with custom_repos_dir set via constructor""" + custom_dir = Path(tmp_path) / "custom-repos" + custom_dir.mkdir() + + wd = workdir.WorkDir(Path(tmp_path), custom_repos_dir=custom_dir) + assert wd.repos_dir == custom_dir + + +def test_workdir_custom_repos_dir_via_config(tmp_path): + """Test WorkDir with custom_repos_dir set via config file""" + custom_dir = Path(tmp_path) / "custom-repos" + custom_dir.mkdir() + + # Create a config with custom_repos_dir + wd = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file="test_key") + pr = config.PrTemplate() + cfg = config.Config( + credentials=credentials, pr=pr, custom_repos_dir=str(custom_dir) + ) + workdir.write_config(wd, cfg) + + # Read it back and verify repos_dir + assert wd.repos_dir == custom_dir + + +def test_workdir_priority_cli_over_config(tmp_path): + """Test that CLI option takes precedence over config file""" + cli_custom_dir = Path(tmp_path) / "cli-repos" + cli_custom_dir.mkdir() + config_custom_dir = Path(tmp_path) / "config-repos" + config_custom_dir.mkdir() + + # Create a config with custom_repos_dir + wd_setup = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file="test_key") + pr = config.PrTemplate() + cfg = config.Config( + credentials=credentials, pr=pr, custom_repos_dir=str(config_custom_dir) + ) + workdir.write_config(wd_setup, cfg) + + # Create WorkDir with CLI custom_repos_dir + wd = workdir.WorkDir(Path(tmp_path), custom_repos_dir=cli_custom_dir) + assert wd.repos_dir == cli_custom_dir + assert wd.repos_dir != config_custom_dir + + +def test_workdir_default_behavior(tmp_path): + """Test default behavior when no custom_repos_dir is specified""" + wd = workdir.WorkDir(Path(tmp_path)) + expected_default = Path(tmp_path) / "repos" + assert wd.repos_dir == expected_default + + +def test_init_validates_custom_repos_dir_exists(tmp_path): + """Test init fails with clear error when custom repos dir doesn't exist""" + nonexistent_dir = Path(tmp_path) / "nonexistent-repos" + + wd = workdir.WorkDir(Path(tmp_path), custom_repos_dir=nonexistent_dir) + credentials = config.Credentials(api_key="test", ssh_key_file="test_key") + + with pytest.raises(CliException) as exc_info: + workdir.init(wd, credentials) + + assert str(nonexistent_dir) in str(exc_info.value) + assert "does not exist" in str(exc_info.value) + + +def test_init_success_with_existing_custom_repos_dir(tmp_path): + """Test init succeeds when custom repos dir exists""" + custom_dir = Path(tmp_path) / "existing-repos" + custom_dir.mkdir() + + test_key = Path(tmp_path) / "test_key" + test_key.touch() + + wd = workdir.WorkDir(Path(tmp_path), custom_repos_dir=custom_dir) + credentials = config.Credentials(api_key="test", ssh_key_file=str(test_key)) + + # Should not raise + workdir.init(wd, credentials) + + # Verify custom dir still exists and was not modified + assert custom_dir.exists() + assert custom_dir.is_dir() + + +def test_init_creates_default_repos_dir(tmp_path): + """Test init creates default repos dir when no custom dir specified""" + test_key = Path(tmp_path) / "test_key" + test_key.touch() + + wd = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file=str(test_key)) + + default_repos_dir = Path(tmp_path) / "repos" + assert not default_repos_dir.exists() + + workdir.init(wd, credentials) + + # Verify default repos dir was created + assert default_repos_dir.exists() + assert default_repos_dir.is_dir() + + +def test_config_custom_repos_dir_env_var_expansion(tmp_path): + """Test environment variable expansion in custom_repos_dir""" + # Set environment variable + custom_repos_base = Path(tmp_path) / "my-repos" + custom_repos_base.mkdir() + os.environ["TEST_CUSTOM_REPOS_DIR"] = str(custom_repos_base) + + try: + # Create config with env var + wd = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file="test_key") + pr = config.PrTemplate() + cfg = config.Config( + credentials=credentials, + pr=pr, + custom_repos_dir="${TEST_CUSTOM_REPOS_DIR}", + ) + workdir.write_config(wd, cfg) + + # Read it back and verify expansion happened + loaded_cfg = workdir.read_config(wd) + assert loaded_cfg.custom_repos_dir == str(custom_repos_base) + finally: + del os.environ["TEST_CUSTOM_REPOS_DIR"] + + +def test_get_function_with_custom_repos_dir(tmp_path): + """Test workdir.get() function with custom_repos_dir parameter""" + custom_dir = Path(tmp_path) / "custom-repos" + custom_dir.mkdir() + + wd = workdir.get(str(tmp_path), custom_repos_dir=custom_dir) + assert wd.repos_dir == custom_dir + + +def test_get_function_without_custom_repos_dir(tmp_path): + """Test workdir.get() function without custom_repos_dir parameter""" + wd = workdir.get(str(tmp_path)) + expected_default = Path(tmp_path) / "repos" + assert wd.repos_dir == expected_default + + +def test_init_with_custom_repos_dir_in_config(tmp_path): + """Test init with custom_repos_dir specified in config file""" + custom_dir = Path(tmp_path) / "custom-repos" + custom_dir.mkdir() + + test_key = Path(tmp_path) / "test_key" + test_key.touch() + + # First, create config with custom_repos_dir + wd_setup = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file=str(test_key)) + pr = config.PrTemplate() + cfg = config.Config( + credentials=credentials, pr=pr, custom_repos_dir=str(custom_dir) + ) + workdir.write_config(wd_setup, cfg) + + # Now create a fresh WorkDir and init (simulating re-init) + wd = workdir.WorkDir(Path(tmp_path)) + + # Should not raise since custom_dir exists + workdir.init(wd, credentials) + + # Verify repos_dir points to custom dir + assert wd.repos_dir == custom_dir + + +def test_init_fails_with_nonexistent_custom_repos_dir_in_config(tmp_path): + """Test init fails when custom_repos_dir in config doesn't exist""" + nonexistent_dir = Path(tmp_path) / "nonexistent-repos" + + test_key = Path(tmp_path) / "test_key" + test_key.touch() + + # Create config with nonexistent custom_repos_dir + wd_setup = workdir.WorkDir(Path(tmp_path)) + credentials = config.Credentials(api_key="test", ssh_key_file=str(test_key)) + pr = config.PrTemplate() + cfg = config.Config( + credentials=credentials, pr=pr, custom_repos_dir=str(nonexistent_dir) + ) + workdir.write_config(wd_setup, cfg) + + # Try to init - should fail + wd = workdir.WorkDir(Path(tmp_path)) + with pytest.raises(CliException) as exc_info: + workdir.init(wd, credentials) + + assert str(nonexistent_dir) in str(exc_info.value) + assert "does not exist" in str(exc_info.value)