Skip to content

fix: validate sandbox path as POSIX for Windows compatibility (#138) #1995

fix: validate sandbox path as POSIX for Windows compatibility (#138)

fix: validate sandbox path as POSIX for Windows compatibility (#138) #1995

Workflow file for this run

name: Run Tests
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
workflow_dispatch:
# Allow manual triggering
jobs:
docstring-lint:
name: NumPy docstring validation
runs-on: ubuntu-latest
permissions:
checks: write # required to publish a neutral check conclusion
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install pre-commit
run: |
python -m pip install --upgrade pip
pip install pre-commit numpydoc
- name: Compute docstring coverage
id: coverage
# Computes NumPy-docstring coverage over the same objects numpydoc
# inspects (rules from [tool.numpydoc_validation] in pyproject.toml) and
# exposes percent/passing/total/failing as step outputs.
run: python scripts/docstring_coverage.py
- name: Validate NumPy docstrings
id: numpydoc
# Runs the numpydoc-validation hook (.pre-commit-config.yaml) with rules
# from [tool.numpydoc_validation] in pyproject.toml. The step always
# exits 0; the published check below carries the real status so docstring
# issues show as a neutral (informational) result rather than a failure.
# The full report is written to the job summary (markdown, no JSON
# escaping needed) to avoid invalid control characters in the check output.
run: |
set +e
output=$(pre-commit run numpydoc-validation --all-files 2>&1)
status=$?
echo "$output"
echo "status=$status" >> "$GITHUB_OUTPUT"
{
echo "## NumPy docstring guide coverage: ${{ steps.coverage.outputs.percent }}% (${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }})"
if [ "$status" -eq 0 ]; then
echo "All docstrings follow the NumPy guide. :white_check_mark:"
else
echo "_Informational only — this does **not** block merging._"
echo ""
echo "Some docstrings don't yet follow the NumPy guide "
echo "(see \`docs/api-reference-guide.md\`). Please fix them when you can."
echo ""
echo '```'
echo "$output" | tail -n 200
echo '```'
fi
} >> "$GITHUB_STEP_SUMMARY"
exit 0
- name: Publish docstring check (neutral if issues found)
if: always()
uses: LouisBrunner/checks-action@6b626ffbad7cc56fd58627f774b9067e6118af23 # v2.0.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
name: NumPy docstring guide
conclusion: ${{ steps.coverage.outputs.failing == '0' && 'success' || 'neutral' }}
output: |
{
"title": "Coverage - ${{ steps.coverage.outputs.percent }}%(${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }}) - Non Blocking",
"summary": "This check is informational and does not block merging. See the job summary and the 'Validate NumPy docstrings' step log for the full list of issues."
}
test:
runs-on: ubuntu-latest
permissions:
id-token: write # required for Entra OIDC authentication
contents: read
strategy:
matrix:
# Installing ollama model in GitHub Actions runner requires significant disk space.
# It reduces the space available for browser-based tests
test-type: ["unit", "integration", "ollama_local", "slow-browser", "slow-other", "ghcp"]
include:
- test-type: "unit"
pytest-args: "-m 'unit and not ollama_local'"
- test-type: "integration"
pytest-args: "-m 'integration and not ollama_local and not slow'"
- test-type: "ollama_local"
pytest-args: "-m 'ollama_local and not slow'"
- test-type: "slow-browser"
pytest-args: "-m 'slow' test/bot/test_browsing_bot.py"
- test-type: "slow-other"
pytest-args: "-m 'slow' --ignore=test/bot/test_browsing_bot.py"
- test-type: "ghcp"
pytest-args: "-m 'ghcp'"
steps:
- name: Free up disk space for slow and ollama tests
if: matrix.test-type == 'slow-browser' || matrix.test-type == 'slow-other' || matrix.test-type == 'ollama_local'
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: false
- name: Reinstall Azure CLI (removed by disk cleanup)
if: matrix.test-type == 'slow-browser' || matrix.test-type == 'slow-other' || matrix.test-type == 'ollama_local'
run: sudo apt-get install -y azure-cli
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Set up Docker Buildx
if: matrix.test-type != 'unit'
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
if: matrix.test-type != 'unit'
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ hashFiles('src/microbots/environment/local_docker/image_builder/Dockerfile') }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Cache pip dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest pytest-cov pytest-mock pytest-asyncio pytest-xdist pytest-rerunfailures
- name: Install package in development mode
run: |
pip install -e .
- name: Install GitHub Copilot SDK dependencies for GHCP tests
if: matrix.test-type == 'ghcp'
run: |
pip install "github-copilot-sdk==0.3.0"
- name: Install Azure AD / keyless auth dependencies
run: |
pip install "azure-identity>=1.15.0"
- name: Install Azure Pipelines task dependencies
if: matrix.test-type == 'unit'
run: npm ci --prefix azure-pipelines/MicrobotsLogAnalyzerTask
- name: Run Azure Pipelines task unit tests
if: matrix.test-type == 'unit'
run: npm test --prefix azure-pipelines/MicrobotsLogAnalyzerTask
- name: Build Docker images for integration tests
if: matrix.test-type != 'unit'
run: |
# Build the shell server image needed for Docker tests
docker buildx build \
--cache-from type=local,src=/tmp/.buildx-cache \
--cache-to type=local,dest=/tmp/.buildx-cache-new,mode=max \
--load \
-f src/microbots/environment/local_docker/image_builder/Dockerfile \
-t kavyasree261002/shell_server:latest .
# Prevent cache from growing indefinitely
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
- name: Check disk space before ollama installation
if: matrix.test-type == 'ollama_local'
run: df -h
- name: Run model
uses: ai-action/ollama-action@v1
id: model
if: matrix.test-type == 'ollama_local'
with:
model: qwen2.5-coder:latest
prompt: Hi, Are you running? What is your model name?
- name: Check disk space after ollama installation
if: matrix.test-type == 'ollama_local'
run: df -h
- name: Print response
run: echo "$response"
env:
response: ${{ steps.model.outputs.response }}
- name: Azure Login (Entra OIDC)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Run ${{ matrix.test-type }} tests
env:
# Azure OpenAI API Configuration (key-free via Entra OIDC)
AZURE_OPENAI_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_DEPLOYMENT_NAME }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }}
# Enable DefaultAzureCredential auto-detection for all non-unit test types
AZURE_AUTH_METHOD: ${{ matrix.test-type != 'unit' && 'azure_ad' || '' }}
BROWSER_USE_LLM_MODEL: "gpt-5"
BROWSER_USE_LLM_TEMPERATURE: 1
#Anthrpic API Configuration
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_DEPLOYMENT_NAME: ${{ vars.ANTHROPIC_DEPLOYMENT_NAME }}
ANTHROPIC_END_POINT: ${{ vars.ANTHROPIC_END_POINT }}
#Local Model Configuration
LOCAL_MODEL_NAME: "qwen2.5-coder:latest"
LOCAL_MODEL_PORT: 11434
# CopilotBot Configuration (keyless OIDC — only populated for ghcp tests)
COPILOT_BYOK_BASE_URL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_ENDPOINT || '' }}
COPILOT_BYOK_PROVIDER_TYPE: ${{ matrix.test-type == 'ghcp' && 'azure' || '' }}
COPILOT_BYOK_MODEL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_DEPLOYMENT_NAME || '' }}
COPILOT_BYOK_AZURE_API_VERSION: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_API_VERSION || '' }}
COPILOT_BYOK_WIRE_API: ${{ matrix.test-type == 'ghcp' && 'completions' || '' }}
run: |
python -m pytest ${{ matrix.pytest-args }} \
-n auto \
--dist loadgroup \
--reruns 1 \
--reruns-delay 5 \
--cov=src \
--cov-report=xml \
--cov-report=term-missing \
--junitxml=test-results-${{ matrix.test-type }}.xml \
-v \
-o log_cli=true \
-o log_cli_level=DEBUG \
-o log_cli_format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" \
-o log_cli_date_format="%Y-%m-%d %H:%M:%S"
- name: Upload test results
uses: actions/upload-artifact@v4
if: always()
with:
name: test-results-${{ matrix.test-type }}
path: test-results-*.xml
- name: Upload coverage reports
uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-${{ matrix.test-type }}
path: coverage.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
if: always()
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: ${{ matrix.test-type }}
name: codecov-${{ matrix.test-type }}
fail_ci_if_error: false
test-summary:
runs-on: ubuntu-latest
needs: [test]
if: always()
steps:
- name: Download all test results
uses: actions/download-artifact@v4
with:
pattern: test-results-*
merge-multiple: true
- name: Test Summary
if: always()
run: |
echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY
echo "| Test Type | Status |" >> $GITHUB_STEP_SUMMARY
echo "|-----------|--------|" >> $GITHUB_STEP_SUMMARY
# Check each test result file and parse for failures
for test_type in unit integration ollama_local slow-browser slow-other ghcp; do
if [ -f "test-results-${test_type}.xml" ]; then
failures=$(grep -oP 'failures="\K[0-9]+' "test-results-${test_type}.xml" | head -1)
errors=$(grep -oP 'errors="\K[0-9]+' "test-results-${test_type}.xml" | head -1)
if [ "${failures:-0}" -eq 0 ] && [ "${errors:-0}" -eq 0 ]; then
status="✅ Passed"
else
status="❌ Failed"
fi
# Format test type name nicely
case $test_type in
unit) name="Unit Tests" ;;
integration) name="Integration Tests" ;;
ollama_local) name="Ollama Tests" ;;
slow-browser) name="Slow Browser Tests" ;;
slow-other) name="Slow Other Tests" ;;
ghcp) name="GitHub Copilot Tests" ;;
*) name="$test_type" ;;
esac
echo "| ${name} | ${status} |" >> $GITHUB_STEP_SUMMARY
fi
done