feat: construct training loop orchestrator #1998
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run Tests | |
| on: | |
| push: | |
| branches: [ "main", "auto-memory/dev" ] | |
| pull_request: | |
| branches: [ "main", "auto-memory/dev" ] | |
| workflow_dispatch: | |
| # Allow manual triggering | |
| jobs: | |
| docstring-lint: | |
| name: NumPy docstring validation | |
| runs-on: ubuntu-latest | |
| permissions: | |
| checks: write # required to publish a neutral check conclusion | |
| contents: read | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install pre-commit | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install pre-commit numpydoc | |
| - name: Compute docstring coverage | |
| id: coverage | |
| # Computes NumPy-docstring coverage over the same objects numpydoc | |
| # inspects (rules from [tool.numpydoc_validation] in pyproject.toml) and | |
| # exposes percent/passing/total/failing as step outputs. | |
| run: python scripts/docstring_coverage.py | |
| - name: Validate NumPy docstrings | |
| id: numpydoc | |
| # Runs the numpydoc-validation hook (.pre-commit-config.yaml) with rules | |
| # from [tool.numpydoc_validation] in pyproject.toml. The step always | |
| # exits 0; the published check below carries the real status so docstring | |
| # issues show as a neutral (informational) result rather than a failure. | |
| # The full report is written to the job summary (markdown, no JSON | |
| # escaping needed) to avoid invalid control characters in the check output. | |
| run: | | |
| set +e | |
| output=$(pre-commit run numpydoc-validation --all-files 2>&1) | |
| status=$? | |
| echo "$output" | |
| echo "status=$status" >> "$GITHUB_OUTPUT" | |
| { | |
| echo "## NumPy docstring guide coverage: ${{ steps.coverage.outputs.percent }}% (${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }})" | |
| if [ "$status" -eq 0 ]; then | |
| echo "All docstrings follow the NumPy guide. :white_check_mark:" | |
| else | |
| echo "_Informational only — this does **not** block merging._" | |
| echo "" | |
| echo "Some docstrings don't yet follow the NumPy guide " | |
| echo "(see \`docs/api-reference-guide.md\`). Please fix them when you can." | |
| echo "" | |
| echo '```' | |
| echo "$output" | tail -n 200 | |
| echo '```' | |
| fi | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| exit 0 | |
| - name: Publish docstring check (neutral if issues found) | |
| if: always() | |
| uses: LouisBrunner/checks-action@6b626ffbad7cc56fd58627f774b9067e6118af23 # v2.0.0 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| name: NumPy docstring guide | |
| conclusion: ${{ steps.coverage.outputs.failing == '0' && 'success' || 'neutral' }} | |
| output: | | |
| { | |
| "title": "Coverage - ${{ steps.coverage.outputs.percent }}%(${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }}) - Non Blocking", | |
| "summary": "This check is informational and does not block merging. See the job summary and the 'Validate NumPy docstrings' step log for the full list of issues." | |
| } | |
| test: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| id-token: write # required for Entra OIDC authentication | |
| contents: read | |
| strategy: | |
| matrix: | |
| # Installing ollama model in GitHub Actions runner requires significant disk space. | |
| # It reduces the space available for browser-based tests | |
| test-type: ["unit", "integration", "ollama_local", "slow-browser", "slow-other", "ghcp"] | |
| include: | |
| - test-type: "unit" | |
| pytest-args: "-m 'unit and not ollama_local'" | |
| - test-type: "integration" | |
| pytest-args: "-m 'integration and not ollama_local and not slow'" | |
| - test-type: "ollama_local" | |
| pytest-args: "-m 'ollama_local and not slow'" | |
| - test-type: "slow-browser" | |
| pytest-args: "-m 'slow' test/bot/test_browsing_bot.py" | |
| - test-type: "slow-other" | |
| pytest-args: "-m 'slow' --ignore=test/bot/test_browsing_bot.py" | |
| - test-type: "ghcp" | |
| pytest-args: "-m 'ghcp'" | |
| steps: | |
| - name: Free up disk space for slow and ollama tests | |
| if: matrix.test-type == 'slow-browser' || matrix.test-type == 'slow-other' || matrix.test-type == 'ollama_local' | |
| uses: jlumbroso/free-disk-space@main | |
| with: | |
| tool-cache: true | |
| android: true | |
| dotnet: true | |
| haskell: true | |
| large-packages: true | |
| docker-images: false | |
| swap-storage: false | |
| - name: Reinstall Azure CLI (removed by disk cleanup) | |
| if: matrix.test-type == 'slow-browser' || matrix.test-type == 'slow-other' || matrix.test-type == 'ollama_local' | |
| run: sudo apt-get install -y azure-cli | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Set up Docker Buildx | |
| if: matrix.test-type != 'unit' | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Cache Docker layers | |
| if: matrix.test-type != 'unit' | |
| uses: actions/cache@v4 | |
| with: | |
| path: /tmp/.buildx-cache | |
| key: ${{ runner.os }}-buildx-${{ hashFiles('src/microbots/environment/local_docker/image_builder/Dockerfile') }} | |
| restore-keys: | | |
| ${{ runner.os }}-buildx- | |
| - name: Cache pip dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip- | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| pip install pytest pytest-cov pytest-mock pytest-asyncio pytest-xdist pytest-rerunfailures | |
| - name: Install package in development mode | |
| run: | | |
| pip install -e . | |
| - name: Install GitHub Copilot SDK dependencies for GHCP tests | |
| if: matrix.test-type == 'ghcp' | |
| run: | | |
| pip install "github-copilot-sdk==0.3.0" | |
| - name: Install Azure AD / keyless auth dependencies | |
| run: | | |
| pip install "azure-identity>=1.15.0" | |
| - name: Install Azure Pipelines task dependencies | |
| if: matrix.test-type == 'unit' | |
| run: npm ci --prefix azure-pipelines/MicrobotsLogAnalyzerTask | |
| - name: Run Azure Pipelines task unit tests | |
| if: matrix.test-type == 'unit' | |
| run: npm test --prefix azure-pipelines/MicrobotsLogAnalyzerTask | |
| - name: Build Docker images for integration tests | |
| if: matrix.test-type != 'unit' | |
| run: | | |
| # Build the shell server image needed for Docker tests | |
| docker buildx build \ | |
| --cache-from type=local,src=/tmp/.buildx-cache \ | |
| --cache-to type=local,dest=/tmp/.buildx-cache-new,mode=max \ | |
| --load \ | |
| -f src/microbots/environment/local_docker/image_builder/Dockerfile \ | |
| -t kavyasree261002/shell_server:latest . | |
| # Prevent cache from growing indefinitely | |
| rm -rf /tmp/.buildx-cache | |
| mv /tmp/.buildx-cache-new /tmp/.buildx-cache | |
| - name: Check disk space before ollama installation | |
| if: matrix.test-type == 'ollama_local' | |
| run: df -h | |
| - name: Run model | |
| uses: ai-action/ollama-action@v1 | |
| id: model | |
| if: matrix.test-type == 'ollama_local' | |
| with: | |
| model: qwen2.5-coder:latest | |
| prompt: Hi, Are you running? What is your model name? | |
| - name: Check disk space after ollama installation | |
| if: matrix.test-type == 'ollama_local' | |
| run: df -h | |
| - name: Print response | |
| run: echo "$response" | |
| env: | |
| response: ${{ steps.model.outputs.response }} | |
| - name: Azure Login (Entra OIDC) | |
| uses: azure/login@v2 | |
| with: | |
| client-id: ${{ secrets.AZURE_CLIENT_ID }} | |
| tenant-id: ${{ secrets.AZURE_TENANT_ID }} | |
| subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} | |
| - name: Run ${{ matrix.test-type }} tests | |
| env: | |
| # Azure OpenAI API Configuration (key-free via Entra OIDC) | |
| AZURE_OPENAI_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_DEPLOYMENT_NAME }} | |
| AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }} | |
| AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }} | |
| # Enable DefaultAzureCredential auto-detection for all non-unit test types | |
| AZURE_AUTH_METHOD: ${{ matrix.test-type != 'unit' && 'azure_ad' || '' }} | |
| BROWSER_USE_LLM_MODEL: "gpt-5" | |
| BROWSER_USE_LLM_TEMPERATURE: 1 | |
| #Anthrpic API Configuration | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| ANTHROPIC_DEPLOYMENT_NAME: ${{ vars.ANTHROPIC_DEPLOYMENT_NAME }} | |
| ANTHROPIC_END_POINT: ${{ vars.ANTHROPIC_END_POINT }} | |
| #Local Model Configuration | |
| LOCAL_MODEL_NAME: "qwen2.5-coder:latest" | |
| LOCAL_MODEL_PORT: 11434 | |
| # CopilotBot Configuration (keyless OIDC — only populated for ghcp tests) | |
| COPILOT_BYOK_BASE_URL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_ENDPOINT || '' }} | |
| COPILOT_BYOK_PROVIDER_TYPE: ${{ matrix.test-type == 'ghcp' && 'azure' || '' }} | |
| COPILOT_BYOK_MODEL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_DEPLOYMENT_NAME || '' }} | |
| COPILOT_BYOK_AZURE_API_VERSION: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_API_VERSION || '' }} | |
| COPILOT_BYOK_WIRE_API: ${{ matrix.test-type == 'ghcp' && 'completions' || '' }} | |
| run: | | |
| python -m pytest ${{ matrix.pytest-args }} \ | |
| -n auto \ | |
| --dist loadgroup \ | |
| --reruns 1 \ | |
| --reruns-delay 5 \ | |
| --cov=src \ | |
| --cov-report=xml \ | |
| --cov-report=term-missing \ | |
| --junitxml=test-results-${{ matrix.test-type }}.xml \ | |
| -v \ | |
| -o log_cli=true \ | |
| -o log_cli_level=DEBUG \ | |
| -o log_cli_format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" \ | |
| -o log_cli_date_format="%Y-%m-%d %H:%M:%S" | |
| - name: Upload test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: test-results-${{ matrix.test-type }} | |
| path: test-results-*.xml | |
| - name: Upload coverage reports | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: coverage-${{ matrix.test-type }} | |
| path: coverage.xml | |
| - name: Upload coverage to Codecov | |
| uses: codecov/codecov-action@v4 | |
| if: always() | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| file: ./coverage.xml | |
| flags: ${{ matrix.test-type }} | |
| name: codecov-${{ matrix.test-type }} | |
| fail_ci_if_error: false | |
| test-summary: | |
| runs-on: ubuntu-latest | |
| needs: [test] | |
| if: always() | |
| steps: | |
| - name: Download all test results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: test-results-* | |
| merge-multiple: true | |
| - name: Test Summary | |
| if: always() | |
| run: | | |
| echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "| Test Type | Status |" >> $GITHUB_STEP_SUMMARY | |
| echo "|-----------|--------|" >> $GITHUB_STEP_SUMMARY | |
| # Check each test result file and parse for failures | |
| for test_type in unit integration ollama_local slow-browser slow-other ghcp; do | |
| if [ -f "test-results-${test_type}.xml" ]; then | |
| failures=$(grep -oP 'failures="\K[0-9]+' "test-results-${test_type}.xml" | head -1) | |
| errors=$(grep -oP 'errors="\K[0-9]+' "test-results-${test_type}.xml" | head -1) | |
| if [ "${failures:-0}" -eq 0 ] && [ "${errors:-0}" -eq 0 ]; then | |
| status="✅ Passed" | |
| else | |
| status="❌ Failed" | |
| fi | |
| # Format test type name nicely | |
| case $test_type in | |
| unit) name="Unit Tests" ;; | |
| integration) name="Integration Tests" ;; | |
| ollama_local) name="Ollama Tests" ;; | |
| slow-browser) name="Slow Browser Tests" ;; | |
| slow-other) name="Slow Other Tests" ;; | |
| ghcp) name="GitHub Copilot Tests" ;; | |
| *) name="$test_type" ;; | |
| esac | |
| echo "| ${name} | ${status} |" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| done |