diff --git a/.vscode/launch.json b/.vscode/launch.json index 33f659a..e219b0d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,5 +17,19 @@ ], "console": "integratedTerminal" }, + { + "name": "Debug cocode CLI", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/.venv/bin/pipelex", + "args": [ + "validate", + "all", + "-c", + "cocode/pipelex_libraries", + ], + "console": "integratedTerminal", + "justMyCode": false + } ] } \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 993a025..558d433 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -118,6 +118,8 @@ Always fix any issues reported by these tools before proceeding. - **Pipelines**: `cocode/pipelex_libraries/pipelines/` - **Tests**: `tests/` directory - **Documentation**: `docs/` directory + + # Pipeline Guide - Always first write your "plan" in natural langage, then transcribe it in pipelex. diff --git a/analyze_hackathon_repos.sh b/analyze_hackathon_repos.sh new file mode 100755 index 0000000..86807e9 --- /dev/null +++ b/analyze_hackathon_repos.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +# Hackathon Repository Analysis Script using GNU Parallel +# Usage: ./analyze_hackathon_repos.sh [repos_file] [parallel_jobs] [output_dir] + +set -e # Exit on any error + +# Default values +REPOS_FILE="${1:-repos.txt}" +PARALLEL_JOBS="${2:-4}" +OUTPUT_DIR="${3:-results}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_PATH="$SCRIPT_DIR/.venv" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +echo_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +echo_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +echo_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Function to check if we're in the right directory +check_environment() { + if [[ ! -f "pyproject.toml" ]] || [[ ! -d ".venv" ]]; then + echo_error "This script must be run from the project root directory with a .venv folder" + exit 1 + fi + + if [[ ! -f "$REPOS_FILE" ]]; then + echo_error "Repository list file '$REPOS_FILE' not found!" + echo_info "Create a text file with one repository path/URL per line" + echo_info "Example content:" + echo " https://github.com/user/repo1" + echo " /path/to/local/repo2" + echo " user/repo3" + exit 1 + fi + + # Check if virtual environment exists and has cocode + if [[ ! -f "$VENV_PATH/bin/python" ]]; then + echo_error "Virtual environment not found at $VENV_PATH" + exit 1 + fi + + # Test if cocode is available in venv + if ! "$VENV_PATH/bin/cocode" --help > /dev/null 2>&1; then + echo_error "cocode command not available in virtual environment" + echo_info "Make sure you've installed the project: pip install -e ." + exit 1 + fi +} + +# Function to analyze a single repository +analyze_repo() { + local repo="$1" + local output_dir="$2" + local venv_path="$3" + + echo_info "Analyzing: $repo" + + # Use the virtual environment's cocode binary + if "$venv_path/bin/cocode" hackathon analyze "$repo" -o "$output_dir"; then + echo_success "Completed: $repo" + return 0 + else + echo_error "Failed: $repo" + return 1 + fi +} + +# Export the function so parallel can use it +export -f analyze_repo +export -f echo_info +export -f echo_success +export -f echo_error + +# Main execution +main() { + echo_info "Starting Hackathon Repository Analysis" + echo_info "Repository list: $REPOS_FILE" + echo_info "Parallel jobs: $PARALLEL_JOBS" + echo_info "Output directory: $OUTPUT_DIR" + echo "" + + # Check environment + check_environment + + # Count repositories + REPO_COUNT=$(wc -l < "$REPOS_FILE" | tr -d ' ') + echo_info "Found $REPO_COUNT repositories to analyze" + + # Create output directory + mkdir -p "$OUTPUT_DIR" + + # Create log directory + LOG_DIR="$OUTPUT_DIR/logs" + mkdir -p "$LOG_DIR" + + echo_info "Starting parallel analysis..." + echo_warning "This may take a while depending on repository sizes and complexity" + echo "" + + # Use GNU Parallel to process repositories + # --progress: Show progress bar + # --joblog: Log job execution details + # --results: Store stdout/stderr for each job + # --halt: Continue on errors but report them + # -j: Number of parallel jobs + if parallel \ + --progress \ + --joblog "$LOG_DIR/parallel_jobs.log" \ + --results "$LOG_DIR/job_outputs" \ + --halt never \ + -j "$PARALLEL_JOBS" \ + analyze_repo {} "$OUTPUT_DIR" "$VENV_PATH" :::: "$REPOS_FILE"; then + + echo "" + echo_success "Parallel analysis completed!" + else + echo "" + echo_warning "Parallel analysis completed with some failures" + fi + + # Summary + echo "" + echo_info "=== ANALYSIS SUMMARY ===" + + # Count successful vs failed jobs from the joblog + if [[ -f "$LOG_DIR/parallel_jobs.log" ]]; then + SUCCESSFUL=$(awk 'NR>1 && $7==0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log") + FAILED=$(awk 'NR>1 && $7!=0 {count++} END {print count+0}' "$LOG_DIR/parallel_jobs.log") + + echo_info "Total repositories: $REPO_COUNT" + echo_success "Successful analyses: $SUCCESSFUL" + if [[ $FAILED -gt 0 ]]; then + echo_error "Failed analyses: $FAILED" + + # Extract and display failed repository names + FAILED_REPOS=$(awk 'NR>1 && $7!=0 {print $9}' "$LOG_DIR/parallel_jobs.log") + if [[ -n "$FAILED_REPOS" ]]; then + echo_error "Failed repositories:" + while IFS= read -r repo; do + echo_error " - $repo" + done <<< "$FAILED_REPOS" + fi + + echo_info "Check detailed logs: grep -v '^1' '$LOG_DIR/parallel_jobs.log' | awk '\$7!=0 {print \$9}'" + fi + fi + + echo_info "Results saved to: $OUTPUT_DIR" + echo_info "Job logs saved to: $LOG_DIR" + echo "" + echo_info "To retry failed jobs, you can extract failed repo paths and create a new input file:" + echo_info "awk 'NR>1 && \$7!=0 {print \$9}' '$LOG_DIR/parallel_jobs.log' > failed_repos.txt" +} + +# Show usage if --help is passed +if [[ "$1" == "--help" ]] || [[ "$1" == "-h" ]]; then + echo "Usage: $0 [repos_file] [parallel_jobs] [output_dir]" + echo "" + echo "Arguments:" + echo " repos_file Path to text file containing repository paths/URLs (default: repos.txt)" + echo " parallel_jobs Number of parallel analysis jobs to run (default: 4)" + echo " output_dir Directory to save analysis results (default: results)" + echo "" + echo "Examples:" + echo " $0 # Use defaults" + echo " $0 my_repos.txt 8 hackathon_results # Custom settings" + echo "" + echo "The repos file should contain one repository per line:" + echo " https://github.com/user/repo1" + echo " /path/to/local/repo2" + echo " user/repo3" + exit 0 +fi + +# Run main function +main "$@" diff --git a/cocode/cli.py b/cocode/cli.py index 001f77d..775aecd 100644 --- a/cocode/cli.py +++ b/cocode/cli.py @@ -12,6 +12,7 @@ from typing_extensions import override from cocode.github.github_cli import github_app +from cocode.hackathon.hackathon_cli import hackathon_app from cocode.repox.repox_cli import repox_app from cocode.swe.swe_cli import swe_app from cocode.validation_cli import validation_app @@ -48,6 +49,7 @@ def get_command(self, ctx: Context, cmd_name: str) -> Optional[Command]: # Add command groups app.add_typer(repox_app, name="repox", help="Repository processing and analysis commands") app.add_typer(swe_app, name="swe", help="Software Engineering analysis and automation commands") +app.add_typer(hackathon_app, name="hackathon", help="Hackathon codebase analysis and evaluation commands") app.add_typer(validation_app, name="validation", help="Pipeline validation and setup commands") app.add_typer(github_app, name="github", help="GitHub-related operations and utilities") diff --git a/cocode/hackathon/__init__.py b/cocode/hackathon/__init__.py new file mode 100644 index 0000000..3d88425 --- /dev/null +++ b/cocode/hackathon/__init__.py @@ -0,0 +1 @@ +"""Hackathon analysis module.""" diff --git a/cocode/hackathon/hackathon_cli.py b/cocode/hackathon/hackathon_cli.py new file mode 100644 index 0000000..36775c8 --- /dev/null +++ b/cocode/hackathon/hackathon_cli.py @@ -0,0 +1,81 @@ +""" +Hackathon analysis CLI commands. +""" + +import asyncio +from typing import Annotated, List, Optional + +import typer + +from cocode.common import get_output_dir, validate_repo_path +from cocode.repox.models import OutputStyle +from cocode.repox.process_python import PythonProcessingRule + +from .hackathon_cmd import hackathon_analyze_repo + +hackathon_app = typer.Typer( + name="hackathon", + help="Hackathon codebase analysis commands", + add_completion=False, + rich_markup_mode="rich", +) + + +@hackathon_app.command("analyze") +def hackathon_analyze( + repo_path: Annotated[ + str, + typer.Argument(help="Repository path (local directory) or GitHub URL/identifier (owner/repo or https://github.com/owner/repo)"), + ] = ".", + output_dir: Annotated[ + Optional[str], + typer.Option("--output-dir", "-o", help="Output directory path. Use 'stdout' to print to console. Defaults to config value if not provided"), + ] = None, + output_filename: Annotated[ + str, + typer.Option("--output-filename", "-n", help="Output filename for HTML report"), + ] = "hackathon-analysis.html", + ignore_patterns: Annotated[ + Optional[List[str]], + typer.Option("--ignore-pattern", "-i", help="List of patterns to ignore (in gitignore format)"), + ] = None, + python_processing_rule: Annotated[ + PythonProcessingRule, + typer.Option("--python-rule", "-p", help="Python processing rule to apply", case_sensitive=False), + ] = PythonProcessingRule.INTERFACE, + output_style: Annotated[ + OutputStyle, + typer.Option( + "--output-style", "-s", help="One of: repo_map, flat (contents only), or import_list (for --python-rule imports)", case_sensitive=False + ), + ] = OutputStyle.REPO_MAP, + include_patterns: Annotated[ + Optional[List[str]], + typer.Option("--include-pattern", "-r", help="Optional pattern to filter files in the tree structure (glob pattern) - can be repeated"), + ] = None, + path_pattern: Annotated[ + Optional[str], + typer.Option("--path-pattern", "-pp", help="Optional pattern to filter paths in the tree structure (regex pattern)"), + ] = None, + dry_run: Annotated[ + bool, + typer.Option("--dry", help="Run pipeline in dry mode (no actual execution)"), + ] = False, +) -> None: + """Analyze a hackathon codebase for features, architecture, quality, security, and X-factors. Generates an HTML report.""" + repo_path = validate_repo_path(repo_path) + output_dir = get_output_dir(output_dir) + + asyncio.run( + hackathon_analyze_repo( + repo_path=repo_path, + ignore_patterns=ignore_patterns, + include_patterns=include_patterns, + path_pattern=path_pattern, + python_processing_rule=python_processing_rule, + output_style=output_style, + output_filename=output_filename, + output_dir=output_dir, + dry_run=dry_run, + ) + ) diff --git a/cocode/hackathon/hackathon_cmd.py b/cocode/hackathon/hackathon_cmd.py new file mode 100644 index 0000000..e94dd9f --- /dev/null +++ b/cocode/hackathon/hackathon_cmd.py @@ -0,0 +1,161 @@ +""" +Hackathon analysis command implementation. +""" + +import json +import os +from pathlib import Path +from typing import List, Optional + +from pipelex import log, pretty_print +from pipelex.core.stuffs.stuff_content import TextContent +from pipelex.hub import get_pipeline_tracker, get_report_delegate +from pipelex.pipeline.execute import execute_pipeline +from pipelex.tools.misc.file_utils import ensure_path, get_incremental_directory_path + +from cocode.github.github_repo_manager import GitHubRepoManager +from cocode.pipelex_libraries.pipelines.hackathon_analysis.hackathon_analysis import ( + HackathonAnalysis, + HackathonAspects, + HackathonFinalAnalysis, + ProjectSummary, +) +from cocode.repox.models import OutputStyle +from cocode.repox.process_python import PythonProcessingRule +from cocode.repox.repox_cmd import repox_command + + +def _extract_repo_name(repo_path: str) -> str: + """Extract a clean repo name from GitHub URL or local path for directory naming.""" + # Check if it's a GitHub URL or identifier + if GitHubRepoManager.is_github_url(repo_path): + try: + owner, repo, _ = GitHubRepoManager.parse_github_url(repo_path) + return f"{owner}_{repo}".replace("/", "_") + except Exception: + # Fallback to extracting from URL manually + repo_name = repo_path.rstrip("/").split("/")[-1] + if repo_name.endswith(".git"): + repo_name = repo_name[:-4] + return repo_name.replace("/", "_") + else: + # Local path - use directory name + return Path(repo_path).name.replace("/", "_") + + +async def hackathon_analyze_repo( + repo_path: str, + ignore_patterns: Optional[List[str]], + include_patterns: Optional[List[str]], + path_pattern: Optional[str], + python_processing_rule: PythonProcessingRule, + output_style: OutputStyle, + output_filename: str, + output_dir: str, + dry_run: bool = False, +) -> None: + """Analyze a hackathon repository and generate HTML report.""" + + log.info(f"Starting hackathon analysis for repository: {repo_path}") + + # Create subdirectory based on repo name + repo_name = _extract_repo_name(repo_path) + repo_output_dir = get_incremental_directory_path(base_path=output_dir + "/hackathon", base_name=repo_name) + os.makedirs(repo_output_dir, exist_ok=True) + log.info(f"Created output directory: {repo_output_dir}") + + # Step 1: Convert repository to text using repox + log.info("Converting repository to text representation...") + + # Create a temporary text representation of the repo + temp_filename = "temp_repo_content.txt" + repox_command( + repo_path=repo_path, + ignore_patterns=ignore_patterns, + include_patterns=include_patterns, + path_pattern=path_pattern, + python_processing_rule=python_processing_rule, + output_style=output_style, + output_filename=temp_filename, + output_dir=repo_output_dir, # Use the repo-specific directory + to_stdout=False, + ) + + # Read the generated text content + temp_file_path = os.path.join(repo_output_dir, temp_filename) + try: + with open(temp_file_path, "r", encoding="utf-8") as f: + codebase_content = f.read() + except FileNotFoundError: + log.error(f"Failed to read temporary file: {temp_file_path}") + raise + finally: + # Clean up temporary file + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + + if dry_run: + log.info("Dry run mode - skipping pipeline execution") + log.info(f"Would analyze codebase content of {len(codebase_content)} characters") + return + + # Step 2: Run the hackathon analysis pipeline + log.info("Running hackathon analysis pipeline...") + + try: + pipe_output = await execute_pipeline( + pipe_code="analyze_hackathon_project", + input_memory={ + "codebase": TextContent(text=codebase_content), + }, + ) + + # Extract the HTML report from the pipeline output + html_report = pipe_output.main_stuff_as(content_type=TextContent) + + # Step 3: Save the HTML report in the repo subdirectory + html_output_path = os.path.join(repo_output_dir, output_filename) + with open(html_output_path, "w", encoding="utf-8") as f: + f.write(html_report.text) + log.info(f"HTML report saved to: {html_output_path}") + + # Step 4: Assemble and save the complete analysis as JSON + # Extract components from working memory + project_summary_stuff = pipe_output.working_memory.get_stuff("project_summary") + aspects_stuff = pipe_output.working_memory.get_stuff("aspects") + final_analysis_stuff = pipe_output.working_memory.get_stuff("final_analysis") + + # Extract the content from each stuff + project_summary = project_summary_stuff.content_as(content_type=ProjectSummary) + aspects = aspects_stuff.content_as(content_type=HackathonAspects) + final_analysis = final_analysis_stuff.content_as(content_type=HackathonFinalAnalysis) + + # Assemble the complete analysis in Python + complete_analysis = HackathonAnalysis( + project_summary=project_summary, + feature_analysis=aspects.feature_analysis, + architecture_analysis=aspects.architecture_analysis, + code_quality_analysis=aspects.code_quality_analysis, + security_analysis=aspects.security_analysis, + x_factor_analysis=aspects.x_factor_analysis, + overall_score=final_analysis.overall_score, + final_verdict=final_analysis.final_verdict, + ) + + # Save as JSON + json_filename = output_filename.replace(".html", ".json") + json_output_path = os.path.join(repo_output_dir, json_filename) + + with open(json_output_path, "w", encoding="utf-8") as f: + json.dump(complete_analysis.model_dump(), f, indent=2, ensure_ascii=False) + log.info(f"Complete analysis JSON saved to: {json_output_path}") + + log.info(f"Hackathon analysis complete! Files saved to: {repo_output_dir}") + + # Display results + get_report_delegate().generate_report() + get_pipeline_tracker().output_flowchart() + + except Exception as e: + log.error(f"Failed to run hackathon analysis pipeline: {e}") + raise diff --git a/cocode/pipelex_libraries/llm_deck/cocode_deck.toml b/cocode/pipelex_libraries/llm_deck/cocode_deck.toml index 1a6ab62..9a40f2e 100644 --- a/cocode/pipelex_libraries/llm_deck/cocode_deck.toml +++ b/cocode/pipelex_libraries/llm_deck/cocode_deck.toml @@ -2,4 +2,7 @@ llm_for_large_text = { llm_handle = "gemini-2.5-pro", temperature = 0.1 } # llm_for_swe = { llm_handle = "gpt-4o", temperature = 0.1 } llm_for_swe = { llm_handle = "claude-4-sonnet", temperature = 0.1 } +llm_for_hackathon_analyse = { llm_handle = "blackboxai/google/gemini-2.5-pro", temperature = 0.2 } +llm_for_hackathon_analyse_security = { llm_handle = "blackboxai/google/gemini-2.5-pro", temperature = 0.1 } +llm_for_hackathon_analyse_x_factors = { llm_handle = "blackboxai/google/gemini-2.5-pro", temperature = 0.3 } diff --git a/cocode/pipelex_libraries/llm_integrations/blackboxai.toml b/cocode/pipelex_libraries/llm_integrations/blackboxai.toml new file mode 100644 index 0000000..87cf08c --- /dev/null +++ b/cocode/pipelex_libraries/llm_integrations/blackboxai.toml @@ -0,0 +1,153 @@ + +# OpenAI Models +[custom-blackboxai."blackboxai/openai/gpt-4o-mini".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.15, output = 0.60 } +platform_llm_id = { custom_llm = "blackboxai/openai/gpt-4o-mini" } + +[custom-blackboxai."blackboxai/openai/gpt-4o".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 2.50, output = 10.00 } +platform_llm_id = { custom_llm = "blackboxai/openai/gpt-4o" } + +[custom-blackboxai."blackboxai/openai/o1-mini".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 1.10, output = 4.40 } +platform_llm_id = { custom_llm = "blackboxai/openai/o1-mini" } + +[custom-blackboxai."blackboxai/openai/o4-mini".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 1.10, output = 4.40 } +platform_llm_id = { custom_llm = "blackboxai/openai/o4-mini" } + +[custom-blackboxai."blackboxai/openai/gpt-4.5-preview".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 75.00, output = 150.00 } +platform_llm_id = { custom_llm = "blackboxai/openai/gpt-4.5-preview" } + +# Anthropic Models +[custom-blackboxai."blackboxai/anthropic/claude-3.5-haiku".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.80, output = 4.00 } +platform_llm_id = { custom_llm = "blackboxai/anthropic/claude-3.5-haiku" } + +[custom-blackboxai."blackboxai/anthropic/claude-3.5-sonnet".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 3.00, output = 15.00 } +platform_llm_id = { custom_llm = "blackboxai/anthropic/claude-3.5-sonnet" } + +[custom-blackboxai."blackboxai/anthropic/claude-3.7-sonnet".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 3.00, output = 15.00 } +platform_llm_id = { custom_llm = "blackboxai/anthropic/claude-3.7-sonnet" } + +[custom-blackboxai."blackboxai/anthropic/claude-opus-4".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 15.00, output = 75.00 } +platform_llm_id = { custom_llm = "blackboxai/anthropic/claude-opus-4" } + +[custom-blackboxai."blackboxai/anthropic/claude-sonnet-4".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 3.00, output = 15.00 } +platform_llm_id = { custom_llm = "blackboxai/anthropic/claude-sonnet-4" } + +# Google Models +[custom-blackboxai."blackboxai/google/gemini-2.5-flash".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.30, output = 2.50 } +platform_llm_id = { custom_llm = "blackboxai/google/gemini-2.5-flash" } + +[custom-blackboxai."blackboxai/google/gemini-2.5-pro".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 1.25, output = 10.00 } +platform_llm_id = { custom_llm = "blackboxai/google/gemini-2.5-pro" } + +[custom-blackboxai."blackboxai/google/gemini-flash-1.5-8b".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.04, output = 0.15 } +platform_llm_id = { custom_llm = "blackboxai/google/gemini-flash-1.5-8b" } + +# Free Models +[custom-blackboxai."blackboxai/deepseek/deepseek-chat:free".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.00, output = 0.00 } +platform_llm_id = { custom_llm = "blackboxai/deepseek/deepseek-chat:free" } + +[custom-blackboxai."blackboxai/deepseek/deepseek-r1:free".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.00, output = 0.00 } +platform_llm_id = { custom_llm = "blackboxai/deepseek/deepseek-r1:free" } + +[custom-blackboxai."blackboxai/meta-llama/llama-3.3-70b-instruct:free".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.00, output = 0.00 } +platform_llm_id = { custom_llm = "blackboxai/meta-llama/llama-3.3-70b-instruct:free" } + +# Mistral Models +[custom-blackboxai."blackboxai/mistralai/mistral-large".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 2.00, output = 6.00 } +platform_llm_id = { custom_llm = "blackboxai/mistralai/mistral-large" } + +[custom-blackboxai."blackboxai/mistralai/pixtral-large-2411".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 2.00, output = 6.00 } +platform_llm_id = { custom_llm = "blackboxai/mistralai/pixtral-large-2411" } + +# Cost-Effective Models +[custom-blackboxai."blackboxai/meta-llama/llama-3.3-70b-instruct".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.04, output = 0.12 } +platform_llm_id = { custom_llm = "blackboxai/meta-llama/llama-3.3-70b-instruct" } + +[custom-blackboxai."blackboxai/qwen/qwen-2.5-72b-instruct".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.12, output = 0.39 } +platform_llm_id = { custom_llm = "blackboxai/qwen/qwen-2.5-72b-instruct" } + +# Vision Models +[custom-blackboxai."blackboxai/meta-llama/llama-3.2-11b-vision-instruct".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.05, output = 0.05 } +platform_llm_id = { custom_llm = "blackboxai/meta-llama/llama-3.2-11b-vision-instruct" } + +[custom-blackboxai."blackboxai/qwen/qwen2.5-vl-72b-instruct".latest] +is_gen_object_supported = true +is_vision_supported = true +cost_per_million_tokens_usd = { input = 0.25, output = 0.75 } +platform_llm_id = { custom_llm = "blackboxai/qwen/qwen2.5-vl-72b-instruct" } + +# Amazon Nova Models +[custom-blackboxai."blackboxai/amazon/nova-micro-v1".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.04, output = 0.14 } +platform_llm_id = { custom_llm = "blackboxai/amazon/nova-micro-v1" } + +[custom-blackboxai."blackboxai/amazon/nova-lite-v1".latest] +is_gen_object_supported = true +is_vision_supported = false +cost_per_million_tokens_usd = { input = 0.06, output = 0.24 } +platform_llm_id = { custom_llm = "blackboxai/amazon/nova-lite-v1" } + diff --git a/cocode/pipelex_libraries/pipelines/hackathon_analysis/example_usage.py b/cocode/pipelex_libraries/pipelines/hackathon_analysis/example_usage.py new file mode 100644 index 0000000..8bbff8b --- /dev/null +++ b/cocode/pipelex_libraries/pipelines/hackathon_analysis/example_usage.py @@ -0,0 +1,301 @@ +"""Example usage of the hackathon analysis pipeline.""" + +import asyncio +from pathlib import Path + +from pipelex import pretty_print +from pipelex.core.stuffs.stuff_content import TextContent +from pipelex.hub import get_pipeline_tracker, get_report_delegate +from pipelex.pipelex import Pipelex +from pipelex.pipeline.execute import execute_pipeline + + +async def analyze_hackathon_project(codebase_content: str) -> str: + """Analyze a hackathon project and return HTML report. + + Args: + codebase_content: Text representation of the codebase + + Returns: + HTML report as string + """ + # Run the pipeline + pipe_output = await execute_pipeline( + pipe_code="analyze_codebase_v2", + input_memory={ + "codebase": codebase_content, + }, + ) + + # Return the HTML report + return pipe_output.main_stuff_as_str + + +async def main(): + """Main example function.""" + + # Example codebase content (you would replace this with actual codebase analysis) + sample_codebase = """ +# Weather App - Hackathon Project + +## File Structure: +- app.py (Flask backend) +- templates/index.html (Frontend) +- static/style.css (Styling) +- weather_api.py (Weather service) +- requirements.txt (Dependencies) +- tests/test_weather.py (Unit tests) +- .github/workflows/ci.yml (CI/CD) +- README.md (Documentation) + +## app.py: +```python +from flask import Flask, render_template, request, jsonify +from weather_api import WeatherService +import os + +app = Flask(__name__) +weather_service = WeatherService(api_key=os.getenv('WEATHER_API_KEY')) + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/api/weather/') +def get_weather(city): + try: + weather_data = weather_service.get_current_weather(city) + return jsonify(weather_data) + except Exception as e: + return jsonify({'error': str(e)}), 400 + +@app.route('/api/forecast/') +def get_forecast(city): + try: + forecast_data = weather_service.get_forecast(city) + return jsonify(forecast_data) + except Exception as e: + return jsonify({'error': str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) +``` + +## weather_api.py: +```python +import requests +from typing import Dict, List +from dataclasses import dataclass + +@dataclass +class WeatherData: + temperature: float + humidity: int + description: str + city: str + +class WeatherService: + def __init__(self, api_key: str): + self.api_key = api_key + self.base_url = "https://api.openweathermap.org/data/2.5" + + def get_current_weather(self, city: str) -> Dict: + url = f"{self.base_url}/weather" + params = { + 'q': city, + 'appid': self.api_key, + 'units': 'metric' + } + + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + return { + 'temperature': data['main']['temp'], + 'humidity': data['main']['humidity'], + 'description': data['weather'][0]['description'], + 'city': data['name'] + } + + def get_forecast(self, city: str) -> List[Dict]: + url = f"{self.base_url}/forecast" + params = { + 'q': city, + 'appid': self.api_key, + 'units': 'metric' + } + + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + forecast = [] + + for item in data['list'][:5]: # Next 5 forecasts + forecast.append({ + 'datetime': item['dt_txt'], + 'temperature': item['main']['temp'], + 'description': item['weather'][0]['description'] + }) + + return forecast +``` + +## tests/test_weather.py: +```python +import pytest +from unittest.mock import Mock, patch +from weather_api import WeatherService + +class TestWeatherService: + def setup_method(self): + self.weather_service = WeatherService("test_api_key") + + @patch('weather_api.requests.get') + def test_get_current_weather_success(self, mock_get): + # Mock successful API response + mock_response = Mock() + mock_response.json.return_value = { + 'main': {'temp': 25.5, 'humidity': 60}, + 'weather': [{'description': 'clear sky'}], + 'name': 'London' + } + mock_get.return_value = mock_response + + result = self.weather_service.get_current_weather('London') + + assert result['temperature'] == 25.5 + assert result['humidity'] == 60 + assert result['description'] == 'clear sky' + assert result['city'] == 'London' + + @patch('weather_api.requests.get') + def test_get_forecast_success(self, mock_get): + # Mock successful forecast API response + mock_response = Mock() + mock_response.json.return_value = { + 'list': [ + { + 'dt_txt': '2024-01-01 12:00:00', + 'main': {'temp': 20.0}, + 'weather': [{'description': 'sunny'}] + } + ] + } + mock_get.return_value = mock_response + + result = self.weather_service.get_forecast('London') + + assert len(result) == 1 + assert result[0]['temperature'] == 20.0 + assert result[0]['description'] == 'sunny' +``` + +## .github/workflows/ci.yml: +```yaml +name: CI + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install pytest pytest-cov + + - name: Run tests + run: | + pytest --cov=. --cov-report=xml + + - name: Upload coverage + uses: codecov/codecov-action@v1 +``` + +## requirements.txt: +``` +Flask==2.3.3 +requests==2.31.0 +python-dotenv==1.0.0 +``` + +## README.md: +```markdown +# Weather App + +A real-time weather application built for the hackathon. + +## Features +- Current weather for any city +- 5-day weather forecast +- Clean, responsive UI +- Real-time data from OpenWeatherMap API +- Error handling and validation +- Unit tests with 90%+ coverage +- CI/CD pipeline + +## Technology Stack +- Backend: Flask (Python) +- Frontend: HTML5, CSS3, JavaScript +- API: OpenWeatherMap +- Testing: pytest +- CI/CD: GitHub Actions + +## Setup +1. Get API key from OpenWeatherMap +2. Set environment variable: `export WEATHER_API_KEY=your_key` +3. Install dependencies: `pip install -r requirements.txt` +4. Run tests: `pytest` +5. Start app: `python app.py` + +## API Endpoints +- GET `/api/weather/` - Current weather +- GET `/api/forecast/` - 5-day forecast + +## Testing +Run tests with coverage: +```bash +pytest --cov=. --cov-report=html +``` + +## Security +- API keys stored as environment variables +- Input validation on all endpoints +- Error handling prevents information leakage +``` + """ + + # Start Pipelex + Pipelex.make() + + # Analyze the codebase + print("Analyzing hackathon project...") + html_report = await analyze_hackathon_project(sample_codebase) + + # Save the report + output_path = Path("hackathon_analysis_report.html") + with open(output_path, "w", encoding="utf-8") as f: + f.write(html_report) + + print(f"Analysis complete! Report saved to: {output_path}") + + # Display cost report + get_report_delegate().generate_report() + + # Output pipeline flowchart + get_pipeline_tracker().output_flowchart() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analysis.py b/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analysis.py new file mode 100644 index 0000000..f35ac9b --- /dev/null +++ b/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analysis.py @@ -0,0 +1,101 @@ +"""Structured content models for hackathon codebase analysis.""" + +from typing import List, Optional + +from pipelex.core.stuffs.stuff_content import StructuredContent +from pydantic import Field + + +class ProjectSummary(StructuredContent): + """Summary of the hackathon project concept and purpose.""" + + project_name: str = Field(..., description="Name of the project") + concept: str = Field(..., description="Core concept and purpose of the project") + target_audience: str = Field(..., description="Intended users or audience") + key_value_proposition: str = Field(..., description="Main value the project provides") + technology_stack: List[str] = Field(default_factory=list, description="Main technologies used") + + +class FeatureAnalysis(StructuredContent): + """Analysis of project features - real vs fake functionality.""" + + real_features: List[str] = Field(default_factory=list, description="Features with actual implementation") + fake_features: List[str] = Field(default_factory=list, description="Features that are just UI mockups") + partially_implemented: List[str] = Field(default_factory=list, description="Features with incomplete implementation") + evidence: str = Field(..., description="Evidence supporting the analysis") + feature_depth_score: int = Field(..., description="Score 1-10 for feature implementation depth") + + +class ArchitectureAnalysis(StructuredContent): + """Analysis of code architecture and modularity.""" + + architecture_pattern: str = Field(..., description="Main architectural pattern used") + separation_of_concerns: str = Field(..., description="How well concerns are separated") + code_organization: str = Field(..., description="Assessment of file and folder structure") + design_patterns: List[str] = Field(default_factory=list, description="Design patterns identified") + technical_debt: str = Field(..., description="Assessment of technical debt") + modularity_score: int = Field(..., description="Score 1-10 for code modularity") + + +class CodeQualityAnalysis(StructuredContent): + """Analysis of code quality metrics.""" + + has_tests: bool = Field(..., description="Whether tests are present") + test_coverage_estimate: str = Field(..., description="Estimated test coverage") + has_typing: bool = Field(..., description="Whether type hints are used") + has_linting: bool = Field(..., description="Whether linting is configured") + has_ci_cd: bool = Field(..., description="Whether CI/CD is set up") + documentation_quality: str = Field(..., description="Quality of documentation") + code_style_consistency: str = Field(..., description="Consistency of code style") + quality_score: int = Field(..., description="Overall quality score 1-10") + + +class SecurityAnalysis(StructuredContent): + """Analysis of security vulnerabilities and issues.""" + + vulnerabilities_found: List[str] = Field(default_factory=list, description="Security vulnerabilities identified") + dependency_issues: List[str] = Field(default_factory=list, description="Dependency security issues") + secrets_in_code: List[str] = Field(default_factory=list, description="Hardcoded secrets or credentials") + security_best_practices: List[str] = Field(default_factory=list, description="Security practices followed") + recommendations: List[str] = Field(default_factory=list, description="Security improvement recommendations") + security_score: int = Field(..., description="Security score 1-10") + + +class XFactorAnalysis(StructuredContent): + """Analysis of standout positive or negative elements.""" + + positive_highlights: List[str] = Field(default_factory=list, description="Impressive or innovative aspects") + negative_highlights: List[str] = Field(default_factory=list, description="Concerning or problematic aspects") + innovation_score: int = Field(..., description="Innovation score 1-10") + execution_quality: int = Field(..., description="Execution quality score 1-10") + overall_impression: str = Field(..., description="Overall impression of the project") + + +class HackathonAspects(StructuredContent): + """Analysis of aspects of Hackathon codebase.""" + + feature_analysis: FeatureAnalysis = Field(..., description="Feature implementation analysis") + architecture_analysis: ArchitectureAnalysis = Field(..., description="Architecture and modularity analysis") + code_quality_analysis: CodeQualityAnalysis = Field(..., description="Code quality metrics") + security_analysis: SecurityAnalysis = Field(..., description="Security assessment") + x_factor_analysis: XFactorAnalysis = Field(..., description="Standout elements analysis") + + +class HackathonFinalAnalysis(StructuredContent): + """Final hackathon codebase analysis.""" + + final_verdict: str = Field(..., description="Final assessment and recommendation") + overall_score: int = Field(..., description="Overall project score 1-100") + + +class HackathonAnalysis(StructuredContent): + """Complete hackathon codebase analysis result.""" + + project_summary: ProjectSummary = Field(..., description="Project concept summary") + feature_analysis: FeatureAnalysis = Field(..., description="Feature implementation analysis") + architecture_analysis: ArchitectureAnalysis = Field(..., description="Architecture and modularity analysis") + code_quality_analysis: CodeQualityAnalysis = Field(..., description="Code quality metrics") + security_analysis: SecurityAnalysis = Field(..., description="Security assessment") + x_factor_analysis: XFactorAnalysis = Field(..., description="Standout elements analysis") + overall_score: int = Field(..., description="Overall project score 1-100") + final_verdict: str = Field(..., description="Final assessment and recommendation") diff --git a/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analyzer.plx b/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analyzer.plx new file mode 100644 index 0000000..eb3fb99 --- /dev/null +++ b/cocode/pipelex_libraries/pipelines/hackathon_analysis/hackathon_analyzer.plx @@ -0,0 +1,575 @@ +domain = "hackathon_analyzer" +definition = "Pipeline to analyze hackathon codebases for features, architecture, quality, security, and X-factors" + +[concept] +CodebaseContent = "Text representation of a codebase including file structure and code" +ProjectSummary = "Summary of the hackathon project concept and purpose" +FeatureAnalysis = "Analysis of project features - real vs fake functionality" +ArchitectureAnalysis = "Analysis of code architecture and modularity" +CodeQualityAnalysis = "Analysis of code quality metrics" +SecurityAnalysis = "Analysis of security vulnerabilities and issues" +XFactorAnalysis = "Analysis of standout positive or negative elements" +HackathonAspects = "Analysis of multiple aspects of Hackathon codebase" +HackathonFinalAnalysis = "Final analysis of the hackathon codebase" +HackathonAnalysis = "Complete hackathon codebase analysis result" +HTMLReport = "HTML formatted analysis report" + +[pipe.analyze_hackathon_project] +type = "PipeSequence" +definition = "Complete analysis of a hackathon codebase" +inputs = { codebase = "CodebaseContent" } +output = "HTMLReport" +steps = [ + { pipe = "summarize_hackathon_project", result = "project_summary" }, + { pipe = "analyze_aspects", result = "aspects" }, + { pipe = "final_hackathon_analysis", result = "final_analysis" }, + { pipe = "generate_hackathon_html_report", result = "html_report" } +] + +[pipe.analyze_aspects] +type = "PipeParallel" +definition = "Analyze different apsects of the hackathon project" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "HackathonAspects" +parallels = [ + { pipe = "analyze_hackathon_features", result = "feature_analysis" }, + { pipe = "analyze_hackathon_architecture", result = "architecture_analysis" }, + { pipe = "analyze_hackathon_code_quality", result = "code_quality_analysis" }, + { pipe = "analyze_hackathon_security", result = "security_analysis" }, + { pipe = "identify_hackathon_x_factors", result = "x_factor_analysis" }, +] +combined_output = "HackathonAspects" + +[pipe.final_hackathon_analysis] +type = "PipeLLM" +definition = "Assess the overall score and final verdict" +inputs = { project_summary = "ProjectSummary", aspects = "HackathonAspects" } +output = "HackathonFinalAnalysis" +llm = "llm_for_hackathon_analyse" +system_prompt = "You are a hackathon judge who synthesizes multiple analysis reports into a final comprehensive assessment." +prompt_template = """ +Synthesize the following project summary and detailed analyses into a final hackathon assessment. + +@project_summary + +Detailed Analyses: +@aspects + +Provide an overall score (1-100) and final verdict considering all aspects. Weight the scoring as follows: +- Features (30%): Real functionality vs mockups +- Architecture (20%): Code organization and design +- Code Quality (20%): Tests, documentation, best practices +- Security (15%): Vulnerabilities and security practices +- X-Factor (15%): Innovation and execution quality + +Base your final verdict on the weighted score and provide constructive feedback. + +Keep your response short and concise. +""" + +[pipe.summarize_hackathon_project] +type = "PipeLLM" +definition = "Analyze and summarize the project concept" +inputs = { codebase = "CodebaseContent" } +output = "ProjectSummary" +llm = "llm_for_hackathon_analyse" +system_prompt = "You are an expert at analyzing software projects and understanding their core concepts and purposes." +prompt_template = """ +Analyze this hackathon codebase and provide a comprehensive project summary. + +Focus on: +- Project name and core concept +- Target audience and use cases +- Key value proposition +- Technology stack used + +@codebase + +Provide a clear, concise summary of what this project is trying to achieve. Keep your response short and concise. +""" + +[pipe.analyze_hackathon_features] +type = "PipeLLM" +definition = "Analyze whether features are real implementations or just UI mockups" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "FeatureAnalysis" +llm = "llm_for_hackathon_analyse" +system_prompt = "You are an expert at distinguishing between real feature implementations and fake/mockup features in codebases." +prompt_template = """ +Analyze this codebase to determine which features are actually implemented vs just UI mockups. + +Look for: +- Real backend logic and data processing +- Actual API endpoints with business logic +- Database operations and data persistence +- vs. Static UI components without functionality +- Hardcoded data instead of dynamic content +- Missing backend implementations + +@project_summary + +@codebase + +Categorize features as real, fake, or partially implemented. Provide evidence for your assessment. Keep your response short and concise. +""" + +[pipe.analyze_hackathon_architecture] +type = "PipeLLM" +definition = "Evaluate code architecture and modularity" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "ArchitectureAnalysis" +llm = "llm_for_hackathon_analyse" +system_prompt = "You are a software architecture expert specializing in code organization and design patterns." +prompt_template = """ +Analyze the architecture and modularity of this codebase. + +Evaluate: +- Overall architectural pattern (MVC, microservices, monolith, etc.) +- Code organization and file structure +- Separation of concerns +- Modularity and reusability +- Design patterns used +- Technical debt and code smells + +@project_summary + +@codebase + +Provide scores and detailed analysis of the architectural quality. Keep your response short and concise. +""" + +[pipe.analyze_hackathon_code_quality] +type = "PipeLLM" +definition = "Assess code quality metrics including tests, typing, documentation" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "CodeQualityAnalysis" +llm = "llm_for_hackathon_analyse" +system_prompt = "You are a code quality expert who evaluates testing, documentation, and development practices." +prompt_template = """ +Analyze the code quality of this hackathon project. + +Check for: +- Unit tests and test coverage +- Integration tests +- Type hints and static typing +- Linting configuration (eslint, pylint, etc.) +- CI/CD setup +- Documentation quality (README, docstrings, comments) +- Code style consistency +- Error handling + +@project_summary + +@codebase + +Provide a comprehensive quality assessment with specific examples. Keep your response short and concise. +""" + +[pipe.analyze_hackathon_security] +type = "PipeLLM" +definition = "Identify security vulnerabilities and issues" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "SecurityAnalysis" +llm = "llm_for_hackathon_analyse_security" +system_prompt = "You are a cybersecurity expert specializing in code security analysis and vulnerability assessment." +prompt_template = """ +Perform a security analysis of this hackathon codebase. + +Look for: +- Common vulnerabilities (SQL injection, XSS, CSRF, etc.) +- Hardcoded secrets, API keys, passwords +- Insecure dependencies or outdated packages +- Missing authentication/authorization +- Insecure data handling +- Missing input validation +- Security best practices followed + +@project_summary + +@codebase + +Identify specific security issues and provide recommendations for improvement. Keep your response short and concise. +""" + +[pipe.identify_hackathon_x_factors] +type = "PipeLLM" +definition = "Identify standout positive or negative elements" +inputs = { codebase = "CodebaseContent", project_summary = "ProjectSummary" } +output = "XFactorAnalysis" +llm = "llm_for_hackathon_analyse_x_factors" +system_prompt = "You are an expert judge at hackathons who can identify what makes projects stand out positively or negatively." +prompt_template = """ +Analyze this hackathon project for X-factors - elements that make it stand out. + +Look for: +POSITIVE: +- Innovative approaches or creative solutions +- Impressive technical implementation +- Unique features or use cases +- Exceptional code quality or architecture +- Creative use of technologies +- Polished user experience + +NEGATIVE: +- Major technical flaws or bugs +- Poor user experience +- Overly complex or convoluted solutions +- Missing core functionality +- Security vulnerabilities +- Plagiarism or copied code + +@project_summary + +@codebase + +Identify what makes this project memorable (good or bad) and rate innovation and execution quality. Keep your response short and concise. +""" + +[pipe.generate_hackathon_html_report] +type = "PipeJinja2" +definition = "Generate HTML report using Jinja2 template" +inputs = { project_summary = "ProjectSummary", aspects = "HackathonAspects", final_analysis = "HackathonFinalAnalysis" } +output = "HTMLReport" +jinja2 = """ + + + + + + Hackathon Analysis Report - {{ project_summary.project_name }} + + + +
+
+

{{ project_summary.project_name }}

+

{{ project_summary.concept }}

+ +
+
{{ final_analysis.overall_score }}/100
+
+ +
+ {% for tech in project_summary.technology_stack %} + {{ tech }} + {% endfor %} +
+
+ +
+

Executive Summary

+

Target Audience: {{ project_summary.target_audience }}

+

Value Proposition: {{ project_summary.key_value_proposition }}

+
+

Final Verdict

+

{{ final_analysis.final_verdict }}

+
+
+ +
+
+

Features Analysis

+
+
Feature Depth Score
+
+
+
+ {{ aspects.feature_analysis.feature_depth_score }}/10 +
+ +

Real Features:

+
    + {% for feature in aspects.feature_analysis.real_features %} +
  • {{ feature }}
  • + {% endfor %} +
+ +

Fake/Mockup Features:

+
    + {% for feature in aspects.feature_analysis.fake_features %} +
  • {{ feature }}
  • + {% endfor %} +
+ +

Evidence: {{ aspects.feature_analysis.evidence }}

+
+ +
+

Architecture & Modularity

+
+
Modularity Score
+
+
+
+ {{ aspects.architecture_analysis.modularity_score }}/10 +
+ +

Pattern: {{ aspects.architecture_analysis.architecture_pattern }}

+

Organization: {{ aspects.architecture_analysis.code_organization }}

+

Separation of Concerns: {{ aspects.architecture_analysis.separation_of_concerns }}

+ +
+ {% for pattern in aspects.architecture_analysis.design_patterns %} + {{ pattern }} + {% endfor %} +
+
+ +
+

Code Quality

+
+
Quality Score
+
+
+
+ {{ aspects.code_quality_analysis.quality_score }}/10 +
+ +

Tests: {% if aspects.code_quality_analysis.has_tests %}✅ Present{% else %}❌ Missing{% endif %}

+

Type Hints: {% if aspects.code_quality_analysis.has_typing %}✅ Present{% else %}❌ Missing{% endif %}

+

Linting: {% if aspects.code_quality_analysis.has_linting %}✅ Configured{% else %}❌ Not configured{% endif %}

+

CI/CD: {% if aspects.code_quality_analysis.has_ci_cd %}✅ Set up{% else %}❌ Not set up{% endif %}

+

Test Coverage: {{ aspects.code_quality_analysis.test_coverage_estimate }}

+

Documentation: {{ aspects.code_quality_analysis.documentation_quality }}

+
+ +
+

Security Analysis

+
+
Security Score
+
+
+
+ {{ aspects.security_analysis.security_score }}/10 +
+ + {% if aspects.security_analysis.vulnerabilities_found %} +

Vulnerabilities Found:

+
    + {% for vuln in aspects.security_analysis.vulnerabilities_found %} +
  • {{ vuln }}
  • + {% endfor %} +
+ {% endif %} + + {% if aspects.security_analysis.security_best_practices %} +

Security Practices:

+
+ {% for practice in aspects.security_analysis.security_best_practices %} + {{ practice }} + {% endfor %} +
+ {% endif %} + + {% if aspects.security_analysis.recommendations %} +

Recommendations:

+
    + {% for rec in aspects.security_analysis.recommendations %} +
  • {{ rec }}
  • + {% endfor %} +
+ {% endif %} +
+ +
+

X-Factor Analysis

+
+
Innovation Score
+
+
+
+ {{ aspects.x_factor_analysis.innovation_score }}/10 +
+ +
+
Execution Quality
+
+
+
+ {{ aspects.x_factor_analysis.execution_quality }}/10 +
+ + {% if aspects.x_factor_analysis.positive_highlights %} +

Positive Highlights:

+
+ {% for highlight in aspects.x_factor_analysis.positive_highlights %} + {{ highlight }} + {% endfor %} +
+ {% endif %} + + {% if aspects.x_factor_analysis.negative_highlights %} +

Areas for Improvement:

+
+ {% for highlight in aspects.x_factor_analysis.negative_highlights %} + {{ highlight }} + {% endfor %} +
+ {% endif %} + +

Overall Impression: {{ aspects.x_factor_analysis.overall_impression }}

+
+
+
+ + +""" + diff --git a/cocode/repox/repox_processor.py b/cocode/repox/repox_processor.py index e7db99e..b9f70e5 100644 --- a/cocode/repox/repox_processor.py +++ b/cocode/repox/repox_processor.py @@ -32,11 +32,16 @@ class RepoxException(RootException): ".git", ".github", ".venv", + "venv", + "env", + "myenv", + ".env", ".mypy_cache", ".pytest_cache", ".ruff_cache", ".vscode", "__pycache__", + "node_modules", "results/", "temp/", "__init__.py", @@ -54,6 +59,7 @@ class RepoxException(RootException): ".env.example", ".ruff_cache/", ".vscode/", + "node_modules", "trigger_pipeline", "LICENSE", "Makefile", @@ -128,10 +134,12 @@ def _ignore_specs( gitignore_spec: Optional[PathSpec] = None if IS_GITIGNORE_APPLIED: gitignore_path = os.path.join(repo_path, ".gitignore") - if os.path.exists(gitignore_path): + if os.path.exists(gitignore_path) and os.path.isfile(gitignore_path): log.debug(f"Loading .gitignore from path: {gitignore_path}") with open(gitignore_path, "r", encoding="utf-8") as f: gitignore_spec = pathspec.PathSpec.from_lines("gitwildmatch", f) + elif os.path.exists(gitignore_path) and os.path.isdir(gitignore_path): + log.warning(f".gitignore exists as a directory instead of a file at: {gitignore_path}, skipping") return gitignore_spec, content_ignore_spec, tree_and_content_ignore_spec @@ -326,6 +334,17 @@ def _should_ignore_file( if os.path.basename(file_path).startswith("repo-to-text_"): log.debug(f"File '{relative_path}' is ignored because of repo-to-text_ prefix") return True + + # Check if it's a broken symlink or non-existent file + if os.path.islink(file_path) and not os.path.exists(file_path): + log.debug(f"File '{relative_path}' is ignored because it is a broken symlink") + return True + + # Check if file exists before getting size + if not os.path.exists(file_path): + log.debug(f"File '{relative_path}' is ignored because it does not exist") + return True + if should_ignore_empty_files and os.path.getsize(file_path) == 0: log.debug(f"File '{relative_path}' is ignored because it is empty") return True diff --git a/pyproject.toml b/pyproject.toml index f0e076f..81d5944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,9 @@ classifiers = [ dependencies = ["pipelex[anthropic,google,bedrock]>=0.9.4", "PyGithub==2.4.0"] +[tool.uv.sources] +pipelex = { path = "../pipelex", editable = true } + [project.optional-dependencies] docs = [ "mkdocs==1.6.1", diff --git a/uv.lock b/uv.lock index b8ad35c..a771ac3 100644 --- a/uv.lock +++ b/uv.lock @@ -526,7 +526,7 @@ requires-dist = [ { name = "mkdocs-material", marker = "extra == 'docs'", specifier = "==9.6.14" }, { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = "==1.1.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["anthropic", "google", "bedrock"], specifier = ">=0.9.4" }, + { name = "pipelex", extras = ["anthropic", "google", "bedrock"], editable = "../pipelex" }, { name = "pygithub", specifier = "==2.4.0" }, { name = "pyright", marker = "extra == 'dev'", specifier = "==1.1.398" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" }, @@ -1815,7 +1815,7 @@ wheels = [ [[package]] name = "pipelex" version = "0.9.4" -source = { registry = "https://pypi.org/simple" } +source = { editable = "../pipelex" } dependencies = [ { name = "aiofiles" }, { name = "backports-strenum", marker = "python_full_version < '3.11'" }, @@ -1845,10 +1845,6 @@ dependencies = [ { name = "typing-extensions" }, { name = "yattag" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/27/99/1e4b5fbfb516f0805a41b530f78da932241141831b4526ab7c1748ede615/pipelex-0.9.4.tar.gz", hash = "sha256:9f3165a3b7edc4fd0e9eea75fc0a3aec17231226b841da379d8ec55068ee9e5a", size = 217199, upload-time = "2025-09-06T13:40:17.296Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/bd/fd7ef4a63439528eee427f029f8b4b218ef46ec8c71cc97193165801520a/pipelex-0.9.4-py3-none-any.whl", hash = "sha256:cf287bd759e68dba77218ef3ca5ba71fa04895d04a42207f2b689afcbd5fef89", size = 351963, upload-time = "2025-09-06T13:40:15.149Z" }, -] [package.optional-dependencies] anthropic = [ @@ -1862,6 +1858,68 @@ google = [ { name = "google-auth-oauthlib" }, ] +[package.metadata] +requires-dist = [ + { name = "aioboto3", marker = "extra == 'bedrock'", specifier = ">=13.4.0" }, + { name = "aiofiles", specifier = ">=23.2.1" }, + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.49.0" }, + { name = "backports-strenum", marker = "python_full_version < '3.11'", specifier = ">=1.3.0" }, + { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34.131" }, + { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.35.24" }, + { name = "cocode", marker = "extra == 'dev'", specifier = "==0.1.2" }, + { name = "fal-client", marker = "extra == 'fal'", specifier = ">=0.4.1" }, + { name = "filetype", specifier = ">=1.2.0" }, + { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.2.1" }, + { name = "httpx", specifier = ">=0.27.0" }, + { name = "instructor", specifier = ">=1.8.3" }, + { name = "jinja2", specifier = ">=3.1.4" }, + { name = "json2html", specifier = ">=1.3.0" }, + { name = "kajson", specifier = "==0.3.0" }, + { name = "markdown", specifier = ">=3.6" }, + { name = "mistralai", marker = "extra == 'mistralai'", specifier = "==1.5.2" }, + { name = "mkdocs", marker = "extra == 'docs'", specifier = "==1.6.1" }, + { name = "mkdocs-glightbox", marker = "extra == 'docs'", specifier = "==0.4.0" }, + { name = "mkdocs-material", marker = "extra == 'docs'", specifier = "==9.6.14" }, + { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = "==1.1.0" }, + { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, + { name = "networkx", specifier = ">=3.4.2" }, + { name = "openai", specifier = ">=1.60.1" }, + { name = "openpyxl", specifier = ">=3.1.5" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "pandas-stubs", marker = "extra == 'dev'", specifier = ">=2.2.3.241126" }, + { name = "pillow", specifier = ">=11.2.1" }, + { name = "polyfactory", specifier = ">=2.21.0" }, + { name = "pydantic", specifier = "==2.10.6" }, + { name = "pypdfium2", specifier = ">=4.30.0,!=4.30.1" }, + { name = "pyright", marker = "extra == 'dev'", specifier = "==1.1.398" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.1.1" }, + { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" }, + { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0.0" }, + { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.6.1" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "rich", specifier = ">=13.8.1" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6.8" }, + { name = "shortuuid", specifier = ">=1.0.13" }, + { name = "toml", specifier = ">=0.10.2" }, + { name = "tomlkit", specifier = ">=0.13.2" }, + { name = "typer", specifier = ">=0.16.0" }, + { name = "types-aioboto3", extras = ["bedrock", "bedrock-runtime"], marker = "extra == 'dev'", specifier = ">=13.4.0" }, + { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0.20240626" }, + { name = "types-beautifulsoup4", marker = "extra == 'dev'", specifier = ">=4.12.0.20240907" }, + { name = "types-markdown", marker = "extra == 'dev'", specifier = ">=3.6.0.20240316" }, + { name = "types-networkx", marker = "extra == 'dev'", specifier = ">=3.3.0.20241020" }, + { name = "types-openpyxl", marker = "extra == 'dev'", specifier = ">=3.1.5.20250306" }, + { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250326" }, + { name = "types-requests", marker = "extra == 'dev'", specifier = ">=2.32.0.2024091" }, + { name = "types-toml", marker = "extra == 'dev'", specifier = ">=0.10.8.20240310" }, + { name = "typing-extensions", specifier = ">=4.13.2" }, + { name = "yattag", specifier = ">=1.15.2" }, +] +provides-extras = ["anthropic", "bedrock", "fal", "google", "mistralai", "docs", "dev"] + [[package]] name = "platformdirs" version = "4.3.8"