Skip to content

Commit f38294e

Browse files
committed
Add github workflow
1 parent ad88e07 commit f38294e

File tree

8 files changed

+1359
-0
lines changed

8 files changed

+1359
-0
lines changed

.github/workflows/index.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
name: Index Repository
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- develop
8+
- 'feature/**' # Index feature branches
9+
- 'release/**' # Index release branches
10+
workflow_dispatch:
11+
inputs:
12+
branch:
13+
description: 'Branch to index (leave empty for current branch)'
14+
required: false
15+
type: string
16+
force_full_reindex:
17+
description: 'Force full re-index'
18+
required: false
19+
type: boolean
20+
default: false
21+
22+
jobs:
23+
index:
24+
runs-on: ubuntu-latest
25+
26+
steps:
27+
- name: Checkout repository
28+
uses: actions/checkout@v4
29+
with:
30+
fetch-depth: 0 # Full history for comparison
31+
32+
- name: Setup Python
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version: '3.11'
36+
cache: 'pip'
37+
38+
- name: Install dependencies
39+
run: pip install -r requirements.txt
40+
41+
- name: Restore index state
42+
uses: actions/cache@v4
43+
with:
44+
path: .augment-index-state
45+
# Use branch-specific cache key
46+
key: augment-index-${{ github.ref_name }}-${{ github.sha }}
47+
restore-keys: |
48+
augment-index-${{ github.ref_name }}-
49+
50+
- name: Index repository
51+
id: index
52+
run: python src/main.py
53+
env:
54+
AUGMENT_API_TOKEN: ${{ secrets.AUGMENT_API_TOKEN }}
55+
AUGMENT_API_URL: ${{ secrets.AUGMENT_API_URL }}
56+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57+
STORAGE_TYPE: file
58+
# Branch-specific state path (automatically determined from GITHUB_REF)
59+
# STATE_PATH is optional - defaults to .augment-index-state/{branch}/state.json
60+
MAX_COMMITS: 100
61+
MAX_FILES: 500
62+
63+
- name: Print results
64+
if: always()
65+
run: |
66+
echo "Success: ${{ steps.index.outputs.success }}"
67+
echo "Type: ${{ steps.index.outputs.type }}"
68+
echo "Files Indexed: ${{ steps.index.outputs.files_indexed }}"
69+
echo "Files Deleted: ${{ steps.index.outputs.files_deleted }}"
70+
echo "Checkpoint ID: ${{ steps.index.outputs.checkpoint_id }}"
71+
echo "Commit SHA: ${{ steps.index.outputs.commit_sha }}"
72+
73+
- name: Upload state artifact
74+
if: success()
75+
uses: actions/upload-artifact@v4
76+
with:
77+
name: index-state
78+
path: .augment-index-state/
79+
retention-days: 30
80+

src/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
GitHub Action Repository Indexer
3+
4+
A Python example showing how to index a GitHub repository using the Augment SDK
5+
Direct Mode with incremental updates.
6+
7+
See README.md for usage instructions.
8+
"""
9+
10+
from .models import FileChange, IndexConfig, IndexResult, IndexState
11+
from .file_filter import should_filter_file
12+
from .github_client import GitHubClient
13+
from .index_manager import IndexManager
14+
15+
__all__ = [
16+
"FileChange",
17+
"IndexConfig",
18+
"IndexResult",
19+
"IndexState",
20+
"should_filter_file",
21+
"GitHubClient",
22+
"IndexManager",
23+
]
24+

src/file_filter.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""
2+
File filtering logic for GitHub repository indexing.
3+
"""
4+
5+
import re
6+
from pathlib import Path
7+
from typing import Optional
8+
9+
# Keyish pattern regex - matches files that likely contain secrets/keys
10+
KEYISH_PATTERN = re.compile(
11+
r'^(\.git|.*\.pem|.*\.key|.*\.pfx|.*\.p12|.*\.jks|.*\.keystore|.*\.pkcs12|.*\.crt|.*\.cer|id_rsa|id_ed25519|id_ecdsa|id_dsa)$'
12+
)
13+
14+
# Default max file size in bytes (1 MB)
15+
DEFAULT_MAX_FILE_SIZE = 1024 * 1024 # 1 MB
16+
17+
18+
def always_ignore_path(path: str) -> bool:
19+
"""
20+
Check if a path should always be ignored (security measure).
21+
22+
Args:
23+
path: The file path to check.
24+
25+
Returns:
26+
True if the path contains ".." and should be ignored.
27+
"""
28+
return ".." in path
29+
30+
31+
def is_keyish_path(path: str) -> bool:
32+
"""
33+
Check if a path matches the keyish pattern (secrets/keys).
34+
35+
Args:
36+
path: The file path to check.
37+
38+
Returns:
39+
True if the filename matches patterns for secret/key files.
40+
"""
41+
# Extract filename from path
42+
filename = Path(path).name
43+
return bool(KEYISH_PATTERN.match(filename))
44+
45+
46+
def is_valid_file_size(size_bytes: int, max_file_size: int = DEFAULT_MAX_FILE_SIZE) -> bool:
47+
"""
48+
Check if file size is valid for upload.
49+
50+
Args:
51+
size_bytes: The size of the file in bytes.
52+
max_file_size: Maximum allowed file size in bytes. Defaults to 1 MB.
53+
54+
Returns:
55+
True if the file size is within the allowed limit.
56+
"""
57+
return size_bytes <= max_file_size
58+
59+
60+
def is_valid_utf8(content: bytes) -> bool:
61+
"""
62+
Check if file content is valid UTF-8 (not binary).
63+
64+
Args:
65+
content: The file content as bytes.
66+
67+
Returns:
68+
True if the content is valid UTF-8, False if it's binary or invalid.
69+
"""
70+
try:
71+
content.decode("utf-8")
72+
return True
73+
except UnicodeDecodeError:
74+
return False
75+
76+
77+
def should_filter_file(
78+
path: str,
79+
content: bytes,
80+
max_file_size: Optional[int] = None,
81+
) -> dict:
82+
"""
83+
Check if a file should be filtered out.
84+
85+
Returns {"filtered": True, "reason": "..."} if file should be skipped.
86+
Returns {"filtered": False} if file should be included.
87+
88+
Priority order (from file-filtering.md):
89+
1. Path validation (contains "..")
90+
2. File size check
91+
3. .augmentignore rules (checked by caller)
92+
4. Keyish patterns
93+
5. .gitignore rules (checked by caller)
94+
6. UTF-8 validation
95+
96+
Args:
97+
path: The file path to check.
98+
content: The file content as bytes.
99+
max_file_size: Maximum allowed file size in bytes. Defaults to DEFAULT_MAX_FILE_SIZE.
100+
101+
Returns:
102+
A dict with "filtered" (bool) and optionally "reason" (str) keys.
103+
"""
104+
effective_max_size = max_file_size if max_file_size is not None else DEFAULT_MAX_FILE_SIZE
105+
106+
# 1. Check for ".." in path (security)
107+
if always_ignore_path(path):
108+
return {"filtered": True, "reason": "path_contains_dotdot"}
109+
110+
# 2. Check file size
111+
if not is_valid_file_size(len(content), effective_max_size):
112+
return {"filtered": True, "reason": f"file_too_large ({len(content)} bytes)"}
113+
114+
# 3. Check keyish patterns (secrets/keys)
115+
if is_keyish_path(path):
116+
return {"filtered": True, "reason": "keyish_pattern"}
117+
118+
# 4. Check UTF-8 validity (binary detection)
119+
if not is_valid_utf8(content):
120+
return {"filtered": True, "reason": "binary_file"}
121+
122+
return {"filtered": False}
123+

0 commit comments

Comments
 (0)