Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
types:
- opened

permissions:
issues: write

jobs:
auto-assign:
runs-on: ubuntu-latest
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/auto-label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Auto Label
on:
pull_request:
types: [opened, reopened, synchronized]
permissions:
pull-requests: write
issues: write
jobs:
label:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/issue-triage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: Issue Triage
on:
issues:
types: [opened]
permissions:
issues: write
jobs:
triage:
runs-on: ubuntu-latest
Expand Down
21 changes: 15 additions & 6 deletions .github/workflows/pr-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
types: [opened, reopened, synchronize, edited]
permissions:
issues: write
pull-requests: write
jobs:
validate:
runs-on: ubuntu-latest
Expand All @@ -18,22 +19,28 @@ jobs:
issues.push('❌ PR title too short (minimum 10 characters)');
}
if (!/^(feat|fix|docs|style|refactor|test|chore|perf|ci|build|revert)(\(.+\))?:/.test(pr.title)) {
issues.push('⚠️ PR title should follow conventional commits format');
issues.push(' PR title should follow conventional commits format');
}

if (!pr.body || pr.body.length < 20) {
issues.push('❌ PR description is required (minimum 20 characters)');
}

const totalChanges = (pr.additions || 0) + (pr.deletions || 0);

// For warnings (like large PR), we don't fail the PR. We only fail for errors (like title format or description missing).
const errors = issues.filter(issue => issue.startsWith('❌'));
const warnings = issues.filter(issue => issue.startsWith('⚠️'));

if (totalChanges > 500) {
issues.push(`⚠️ Large PR detected (${totalChanges} lines changed)`);
warnings.push(`⚠️ Large PR detected (${totalChanges} lines changed)`);
}

if (issues.length > 0) {
if (errors.length > 0 || warnings.length > 0) {
const allIssues = [...errors, ...warnings];
// Fork PRs get a read-only GITHUB_TOKEN; skip commenting to avoid errors
if (pr.head.repo.full_name === pr.base.repo.full_name) {
const comment = `## 🔍 PR Validation\n\n${issues.join('\n')}`;
const comment = `## 🔍 PR Validation\n\n${allIssues.join('\n')}`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
Expand All @@ -42,7 +49,9 @@ jobs:
});
} else {
core.warning('Skipping PR comment for fork PR (read-only token)');
issues.forEach(issue => core.warning(issue));
allIssues.forEach(issue => core.warning(issue));
}
if (errors.length > 0) {
core.setFailed('PR validation failed due to errors');
}
core.setFailed('PR validation failed');
}
166 changes: 90 additions & 76 deletions src/youtube_extension/backend/deployment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pathlib import Path
from typing import Any, Optional

import requests
import aiohttp

from youtube_extension.backend.deploy import deploy_project as _adapter_deploy

Expand Down Expand Up @@ -493,43 +493,45 @@ async def _create_github_repository(self, repo_name: str, project_config: dict[s
"Accept": "application/vnd.github.v3+json"
}

# Get user info
user_response = requests.get("https://api.github.com/user", headers=headers)
if user_response.status_code != 200:
raise Exception(f"Failed to get GitHub user info: {user_response.text}")

user_data = user_response.json()
username = user_data["login"]

# Create repository
repo_data = {
"name": repo_name,
"description": f"Generated by UVAI from YouTube tutorial - {project_config.get('title', 'Unknown')}",
"private": False,
"auto_init": True,
"has_issues": True,
"has_projects": True,
"has_wiki": False
}

response = requests.post(
"https://api.github.com/user/repos",
headers=headers,
json=repo_data
)
async with aiohttp.ClientSession() as session:
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new aiohttp requests don’t set any client or per-request timeout. If GitHub stalls, these awaits can hang indefinitely and block the whole deployment workflow. Consider configuring aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=...)) (or passing timeout= to individual calls) with a sensible total timeout for GitHub API operations.

Suggested change
async with aiohttp.ClientSession() as session:
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:

Copilot uses AI. Check for mistakes.
# Get user info
async with session.get("https://api.github.com/user", headers=headers) as user_response:
if user_response.status != 200:
error_text = await user_response.text()
raise Exception(f"Failed to get GitHub user info: {error_text}")
user_data = await user_response.json()
username = user_data["login"]

if response.status_code not in [201, 422]: # 422 if repo already exists
raise Exception(f"Failed to create GitHub repository: {response.text}")
# Create repository
repo_data = {
"name": repo_name,
"description": f"Generated by UVAI from YouTube tutorial - {project_config.get('title', 'Unknown')}",
"private": False,
"auto_init": True,
"has_issues": True,
"has_projects": True,
"has_wiki": False
}

if response.status_code == 422:
# Repository already exists, get its info
repo_response = requests.get(f"https://api.github.com/repos/{username}/{repo_name}", headers=headers)
if repo_response.status_code == 200:
repo_info = repo_response.json()
else:
raise Exception(f"Repository exists but can't access it: {repo_response.text}")
else:
repo_info = response.json()
async with session.post(
"https://api.github.com/user/repos",
headers=headers,
json=repo_data
) as response:
if response.status not in [201, 422]: # 422 if repo already exists
error_text = await response.text()
raise Exception(f"Failed to create GitHub repository: {error_text}")

if response.status == 422:
# Repository already exists, get its info
async with session.get(f"https://api.github.com/repos/{username}/{repo_name}", headers=headers) as repo_response:
if repo_response.status == 200:
repo_info = await repo_response.json()
else:
error_text = await repo_response.text()
raise Exception(f"Repository exists but can't access it: {error_text}")
else:
repo_info = await response.json()

return {
"repo_name": repo_name,
Expand All @@ -549,53 +551,65 @@ async def _upload_to_github(self, project_path: str, repo_name: str) -> dict[str
"Accept": "application/vnd.github.v3+json"
}

# Get user info
user_response = requests.get("https://api.github.com/user", headers=headers)
user_data = user_response.json()
username = user_data["login"]

uploaded_files = []
project_path_obj = Path(project_path)
async with aiohttp.ClientSession() as session:
# Get user info
async with session.get("https://api.github.com/user", headers=headers) as user_response:
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In _upload_to_github, the /user call doesn’t check user_response.status before parsing JSON and reading login. If the token is invalid/expired (401/403) this will likely raise a KeyError or JSON decode error and obscure the real failure. Mirror _create_github_repository by validating the status code and raising an exception with the response text when it’s not 200.

Suggested change
async with session.get("https://api.github.com/user", headers=headers) as user_response:
async with session.get("https://api.github.com/user", headers=headers) as user_response:
if user_response.status != 200:
error_text = await user_response.text()
raise Exception(f"GitHub API error fetching user info: {user_response.status} {error_text}")

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing HTTP status code check in _upload_to_github before parsing JSON response, causing unclear error messages when API calls fail

Fix on Vercel

user_data = await user_response.json()
username = user_data["login"]
Comment on lines +556 to +558
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The request to get user info is missing a status check. If the request to https://api.github.com/user fails, await user_response.json() might raise an exception, or accessing user_data["login"] will fail with a KeyError. This could lead to an unhandled exception. You should check user_response.status before attempting to parse the JSON, similar to how it's done in _create_github_repository.

            async with session.get("https://api.github.com/user", headers=headers) as user_response:
                if user_response.status != 200:
                    error_text = await user_response.text()
                    raise Exception(f"Failed to get GitHub user info: {error_text}")
                user_data = await user_response.json()
                username = user_data["login"]

Comment on lines +556 to +558
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The _upload_to_github function is missing an HTTP status check for the GitHub API response, which will cause a KeyError when the API returns an error (e.g., 401).
Severity: MEDIUM

Suggested Fix

Before calling await user_response.json(), check if user_response.status is 200. If it is not, read the response text and raise an exception with a descriptive error message, similar to the error handling implemented in the _create_github_repository function.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/youtube_extension/backend/deployment_manager.py#L556-L558

Potential issue: In the `_upload_to_github` function, the code makes a request to the
GitHub `/user` endpoint but does not check the HTTP status of the response before
attempting to parse it as JSON and access its keys. If the request fails due to an
invalid token, the GitHub API returns a non-200 status with a JSON body containing an
error message. The code will then attempt to access the `"login"` key on this error
object, which will raise a `KeyError` and crash the process with an unclear error
message instead of failing gracefully.

Did we get this right? 👍 / 👎 to inform future reviews.


# Directories to exclude from GitHub upload (standard .gitignore patterns)
EXCLUDED_DIRS = {'node_modules', '.next', '.git', '__pycache__', '.vercel', 'dist', '.turbo'}
uploaded_files = []
project_path_obj = Path(project_path)

def should_skip_path(path: Path) -> bool:
"""Check if any parent directory is in the exclusion list"""
return any(part in EXCLUDED_DIRS for part in path.parts)
# Directories to exclude from GitHub upload (standard .gitignore patterns)
EXCLUDED_DIRS = {'node_modules', '.next', '.git', '__pycache__', '.vercel', 'dist', '.turbo'}

# Upload each file
for file_path in project_path_obj.rglob("*"):
# Skip excluded directories and dotfiles
if should_skip_path(file_path.relative_to(project_path_obj)):
continue
if file_path.is_file() and not file_path.name.startswith('.'):
try:
relative_path = file_path.relative_to(project_path_obj)

# Read file content
with open(file_path, 'rb') as f:
content = f.read()

# Encode content
encoded_content = base64.b64encode(content).decode('utf-8')
def should_skip_path(path: Path) -> bool:
"""Check if any parent directory is in the exclusion list"""
return any(part in EXCLUDED_DIRS for part in path.parts)

# Upload file
file_data = {
"message": f"Add {relative_path}",
"content": encoded_content
}
# Prepare files for concurrent upload
upload_tasks = []
semaphore = asyncio.Semaphore(10) # Limit concurrent uploads to avoid rate limits
Comment on lines +570 to +572
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

upload_tasks accumulates one coroutine per file before any uploads start. For very large generated projects this can create a large in-memory list and delay the start of uploads until the directory walk completes. Consider scheduling uploads incrementally (e.g., create tasks as you iterate and await in bounded batches / use asyncio.TaskGroup with a semaphore) so memory stays bounded and uploads can begin earlier.

Copilot uses AI. Check for mistakes.

upload_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{relative_path}"
response = requests.put(upload_url, headers=headers, json=file_data)
async def upload_single_file(file_path: Path, relative_path: Path):
async with semaphore:
try:
# Read file content
with open(file_path, 'rb') as f:
content = f.read()

# Encode content
encoded_content = base64.b64encode(content).decode('utf-8')
Comment on lines +577 to +582
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

upload_single_file performs synchronous disk I/O (open(...).read()) inside an async coroutine. With concurrent uploads this can still block the event loop and reduce the benefit of switching to aiohttp (especially on slow disks / large files). Consider using aiofiles or asyncio.to_thread() for the file read (and base64 encoding if needed) to keep the upload path non-blocking.

Suggested change
# Read file content
with open(file_path, 'rb') as f:
content = f.read()
# Encode content
encoded_content = base64.b64encode(content).decode('utf-8')
# Read and encode file content in a background thread to avoid blocking the event loop
def _read_and_encode(p: Path) -> str:
with open(p, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
encoded_content = await asyncio.to_thread(_read_and_encode, file_path)

Copilot uses AI. Check for mistakes.
Comment on lines +578 to +582
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The upload_single_file function is vulnerable to Denial of Service via Memory Exhaustion. It reads entire files into memory using f.read(), which can lead to crashes with large or concurrent uploads. Additionally, the use of synchronous open() and f.read() blocks the asyncio event loop, negating aiohttp benefits. Implement file size limits, use streaming for content, and switch to aiofiles for asynchronous I/O to mitigate these issues.

                        async with aiofiles.open(file_path, 'rb') as f:\n                            content = await f.read()


# Upload file
file_data = {
"message": f"Add {relative_path}",
"content": encoded_content
}

upload_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{relative_path}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub API URL not properly encoding file paths with spaces, special characters, or non-ASCII characters, causing API requests to fail

Fix on Vercel

async with session.put(upload_url, headers=headers, json=file_data) as response:
if response.status in [201, 200]:
uploaded_files.append(str(relative_path))
else:
error_text = await response.text()
logger.warning(f"Failed to upload {relative_path}: {error_text}")

if response.status_code in [201, 200]:
uploaded_files.append(str(relative_path))
else:
logger.warning(f"Failed to upload {relative_path}: {response.text}")
except Exception as e:
logger.warning(f"Error uploading {file_path}: {e}")

# Collect tasks
for file_path in project_path_obj.rglob("*"):
Comment on lines +561 to +602
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The _upload_to_github function is vulnerable to Potential Path Traversal and File Exfiltration. The project_path parameter is used without validation, allowing potential access and upload of sensitive files from arbitrary locations. It is critical to validate project_path (e.g., using Path.resolve() and checking against an expected base path) to prevent this. Additionally, the nested upload_single_file function is missing a -> None return type hint, violating style guide requirements.

# Skip excluded directories and dotfiles
if should_skip_path(file_path.relative_to(project_path_obj)):
continue
if file_path.is_file() and not file_path.name.startswith('.'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

Arbitrary File Exfiltration via Symlink Following

The _upload_to_github function iterates through all files in the project directory using rglob("*") and uploads them to GitHub. It uses file_path.is_file() to identify files to upload. However, is_file() returns True for symbolic links that point to files. Since the project directory contains code generated by an AI and is subjected to a build process (npm run build) before the upload, a malicious build script (potentially generated via prompt injection) could create a symlink to a sensitive system file (e.g., /etc/passwd, server configuration files, or .env files). The _upload_to_github function will then read the content of the linked file and upload it to the user's GitHub repository, leading to arbitrary file exfiltration.

Remediation: Ensure that symbolic links are not followed during the file collection process. Use file_path.is_file() and not file_path.is_symlink() to filter files.

                if file_path.is_file() and not file_path.is_symlink() and not file_path.name.startswith('.'):

relative_path = file_path.relative_to(project_path_obj)
upload_tasks.append(upload_single_file(file_path, relative_path))

except Exception as e:
logger.warning(f"Error uploading {file_path}: {e}")
# Execute all uploads concurrently (limited by semaphore)
if upload_tasks:
await asyncio.gather(*upload_tasks)

return {
"files_uploaded": len(uploaded_files),
Expand Down
Loading