feat: construct training loop orchestrator #1998

Workflow file for this run

	name: Run Tests

	on:
	push:
	branches: [ "main", "auto-memory/dev" ]
	pull_request:
	branches: [ "main", "auto-memory/dev" ]
	workflow_dispatch:
	# Allow manual triggering

	jobs:
	docstring-lint:
	name: NumPy docstring validation
	runs-on: ubuntu-latest
	permissions:
	checks: write # required to publish a neutral check conclusion
	contents: read
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"

	- name: Install pre-commit
	run: \|
	python -m pip install --upgrade pip
	pip install pre-commit numpydoc

	- name: Compute docstring coverage
	id: coverage
	# Computes NumPy-docstring coverage over the same objects numpydoc
	# inspects (rules from [tool.numpydoc_validation] in pyproject.toml) and
	# exposes percent/passing/total/failing as step outputs.
	run: python scripts/docstring_coverage.py

	- name: Validate NumPy docstrings
	id: numpydoc
	# Runs the numpydoc-validation hook (.pre-commit-config.yaml) with rules
	# from [tool.numpydoc_validation] in pyproject.toml. The step always
	# exits 0; the published check below carries the real status so docstring
	# issues show as a neutral (informational) result rather than a failure.
	# The full report is written to the job summary (markdown, no JSON
	# escaping needed) to avoid invalid control characters in the check output.
	run: \|
	set +e
	output=$(pre-commit run numpydoc-validation --all-files 2>&1)
	status=$?
	echo "$output"
	echo "status=$status" >> "$GITHUB_OUTPUT"
	{
	echo "## NumPy docstring guide coverage: ${{ steps.coverage.outputs.percent }}% (${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }})"
	if [ "$status" -eq 0 ]; then
	echo "All docstrings follow the NumPy guide. :white_check_mark:"
	else
	echo "_Informational only — this does not block merging._"
	echo ""
	echo "Some docstrings don't yet follow the NumPy guide "
	echo "(see \`docs/api-reference-guide.md\`). Please fix them when you can."
	echo ""
	echo '```'
	echo "$output" \| tail -n 200
	echo '```'
	fi
	} >> "$GITHUB_STEP_SUMMARY"
	exit 0

	- name: Publish docstring check (neutral if issues found)
	if: always()
	uses: LouisBrunner/checks-action@6b626ffbad7cc56fd58627f774b9067e6118af23 # v2.0.0
	with:
	token: ${{ secrets.GITHUB_TOKEN }}
	name: NumPy docstring guide
	conclusion: ${{ steps.coverage.outputs.failing == '0' && 'success' \|\| 'neutral' }}
	output: \|
	{
	"title": "Coverage - ${{ steps.coverage.outputs.percent }}%(${{ steps.coverage.outputs.passing }}/${{ steps.coverage.outputs.total }}) - Non Blocking",
	"summary": "This check is informational and does not block merging. See the job summary and the 'Validate NumPy docstrings' step log for the full list of issues."
	}

	test:
	runs-on: ubuntu-latest
	permissions:
	id-token: write # required for Entra OIDC authentication
	contents: read
	strategy:
	matrix:
	# Installing ollama model in GitHub Actions runner requires significant disk space.
	# It reduces the space available for browser-based tests
	test-type: ["unit", "integration", "ollama_local", "slow-browser", "slow-other", "ghcp"]
	include:
	- test-type: "unit"
	pytest-args: "-m 'unit and not ollama_local'"
	- test-type: "integration"
	pytest-args: "-m 'integration and not ollama_local and not slow'"
	- test-type: "ollama_local"
	pytest-args: "-m 'ollama_local and not slow'"
	- test-type: "slow-browser"
	pytest-args: "-m 'slow' test/bot/test_browsing_bot.py"
	- test-type: "slow-other"
	pytest-args: "-m 'slow' --ignore=test/bot/test_browsing_bot.py"
	- test-type: "ghcp"
	pytest-args: "-m 'ghcp'"


	steps:

	- name: Free up disk space for slow and ollama tests
	if: matrix.test-type == 'slow-browser' \|\| matrix.test-type == 'slow-other' \|\| matrix.test-type == 'ollama_local'
	uses: jlumbroso/free-disk-space@main
	with:
	tool-cache: true
	android: true
	dotnet: true
	haskell: true
	large-packages: true
	docker-images: false
	swap-storage: false

	- name: Reinstall Azure CLI (removed by disk cleanup)
	if: matrix.test-type == 'slow-browser' \|\| matrix.test-type == 'slow-other' \|\| matrix.test-type == 'ollama_local'
	run: sudo apt-get install -y azure-cli

	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"

	- name: Set up Docker Buildx
	if: matrix.test-type != 'unit'
	uses: docker/setup-buildx-action@v3

	- name: Cache Docker layers
	if: matrix.test-type != 'unit'
	uses: actions/cache@v4
	with:
	path: /tmp/.buildx-cache
	key: ${{ runner.os }}-buildx-${{ hashFiles('src/microbots/environment/local_docker/image_builder/Dockerfile') }}
	restore-keys: \|
	${{ runner.os }}-buildx-

	- name: Cache pip dependencies
	uses: actions/cache@v4
	with:
	path: ~/.cache/pip
	key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential

	- name: Install Python dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt
	pip install pytest pytest-cov pytest-mock pytest-asyncio pytest-xdist pytest-rerunfailures

	- name: Install package in development mode
	run: \|
	pip install -e .

	- name: Install GitHub Copilot SDK dependencies for GHCP tests
	if: matrix.test-type == 'ghcp'
	run: \|
	pip install "github-copilot-sdk==0.3.0"

	- name: Install Azure AD / keyless auth dependencies
	run: \|
	pip install "azure-identity>=1.15.0"

	- name: Install Azure Pipelines task dependencies
	if: matrix.test-type == 'unit'
	run: npm ci --prefix azure-pipelines/MicrobotsLogAnalyzerTask

	- name: Run Azure Pipelines task unit tests
	if: matrix.test-type == 'unit'
	run: npm test --prefix azure-pipelines/MicrobotsLogAnalyzerTask

	- name: Build Docker images for integration tests
	if: matrix.test-type != 'unit'
	run: \|
	# Build the shell server image needed for Docker tests
	docker buildx build \
	--cache-from type=local,src=/tmp/.buildx-cache \
	--cache-to type=local,dest=/tmp/.buildx-cache-new,mode=max \
	--load \
	-f src/microbots/environment/local_docker/image_builder/Dockerfile \
	-t kavyasree261002/shell_server:latest .
	# Prevent cache from growing indefinitely
	rm -rf /tmp/.buildx-cache
	mv /tmp/.buildx-cache-new /tmp/.buildx-cache

	- name: Check disk space before ollama installation
	if: matrix.test-type == 'ollama_local'
	run: df -h

	- name: Run model
	uses: ai-action/ollama-action@v1
	id: model
	if: matrix.test-type == 'ollama_local'
	with:
	model: qwen2.5-coder:latest
	prompt: Hi, Are you running? What is your model name?

	- name: Check disk space after ollama installation
	if: matrix.test-type == 'ollama_local'
	run: df -h

	- name: Print response
	run: echo "$response"
	env:
	response: ${{ steps.model.outputs.response }}

	- name: Azure Login (Entra OIDC)
	uses: azure/login@v2
	with:
	client-id: ${{ secrets.AZURE_CLIENT_ID }}
	tenant-id: ${{ secrets.AZURE_TENANT_ID }}
	subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}

	- name: Run ${{ matrix.test-type }} tests
	env:
	# Azure OpenAI API Configuration (key-free via Entra OIDC)
	AZURE_OPENAI_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_DEPLOYMENT_NAME }}
	AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
	AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }}
	# Enable DefaultAzureCredential auto-detection for all non-unit test types
	AZURE_AUTH_METHOD: ${{ matrix.test-type != 'unit' && 'azure_ad' \|\| '' }}
	BROWSER_USE_LLM_MODEL: "gpt-5"
	BROWSER_USE_LLM_TEMPERATURE: 1
	#Anthrpic API Configuration
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	ANTHROPIC_DEPLOYMENT_NAME: ${{ vars.ANTHROPIC_DEPLOYMENT_NAME }}
	ANTHROPIC_END_POINT: ${{ vars.ANTHROPIC_END_POINT }}
	#Local Model Configuration
	LOCAL_MODEL_NAME: "qwen2.5-coder:latest"
	LOCAL_MODEL_PORT: 11434
	# CopilotBot Configuration (keyless OIDC — only populated for ghcp tests)
	COPILOT_BYOK_BASE_URL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_ENDPOINT \|\| '' }}
	COPILOT_BYOK_PROVIDER_TYPE: ${{ matrix.test-type == 'ghcp' && 'azure' \|\| '' }}
	COPILOT_BYOK_MODEL: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_DEPLOYMENT_NAME \|\| '' }}
	COPILOT_BYOK_AZURE_API_VERSION: ${{ matrix.test-type == 'ghcp' && vars.AZURE_OPENAI_API_VERSION \|\| '' }}
	COPILOT_BYOK_WIRE_API: ${{ matrix.test-type == 'ghcp' && 'completions' \|\| '' }}
	run: \|
	python -m pytest ${{ matrix.pytest-args }} \
	-n auto \
	--dist loadgroup \
	--reruns 1 \
	--reruns-delay 5 \
	--cov=src \
	--cov-report=xml \
	--cov-report=term-missing \
	--junitxml=test-results-${{ matrix.test-type }}.xml \
	-v \
	-o log_cli=true \
	-o log_cli_level=DEBUG \
	-o log_cli_format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" \
	-o log_cli_date_format="%Y-%m-%d %H:%M:%S"

	- name: Upload test results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: test-results-${{ matrix.test-type }}
	path: test-results-*.xml

	- name: Upload coverage reports
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: coverage-${{ matrix.test-type }}
	path: coverage.xml

	- name: Upload coverage to Codecov
	uses: codecov/codecov-action@v4
	if: always()
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	file: ./coverage.xml
	flags: ${{ matrix.test-type }}
	name: codecov-${{ matrix.test-type }}
	fail_ci_if_error: false

	test-summary:
	runs-on: ubuntu-latest
	needs: [test]
	if: always()
	steps:
	- name: Download all test results
	uses: actions/download-artifact@v4
	with:
	pattern: test-results-*
	merge-multiple: true

	- name: Test Summary
	if: always()
	run: \|
	echo "## Test Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "\| Test Type \| Status \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----------\|--------\|" >> $GITHUB_STEP_SUMMARY

	# Check each test result file and parse for failures
	for test_type in unit integration ollama_local slow-browser slow-other ghcp; do
	if [ -f "test-results-${test_type}.xml" ]; then
	failures=$(grep -oP 'failures="\K[0-9]+' "test-results-${test_type}.xml" \| head -1)
	errors=$(grep -oP 'errors="\K[0-9]+' "test-results-${test_type}.xml" \| head -1)

	if [ "${failures:-0}" -eq 0 ] && [ "${errors:-0}" -eq 0 ]; then
	status="✅ Passed"
	else
	status="❌ Failed"
	fi

	# Format test type name nicely
	case $test_type in
	unit) name="Unit Tests" ;;
	integration) name="Integration Tests" ;;
	ollama_local) name="Ollama Tests" ;;
	slow-browser) name="Slow Browser Tests" ;;
	slow-other) name="Slow Other Tests" ;;
	ghcp) name="GitHub Copilot Tests" ;;
	*) name="$test_type" ;;
	esac

	echo "\| ${name} \| ${status} \|" >> $GITHUB_STEP_SUMMARY
	fi
	done

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: construct training loop orchestrator #1998

Workflow file

feat: construct training loop orchestrator #1998

Uh oh!

Workflow file for this run