Batch Inference Completion-time Cron Job #2175
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Batch Inference Completion-time Cron Job | |
| on: | |
| schedule: | |
| # Runs every hour at minute 0 | |
| - cron: "0 * * * *" | |
| workflow_dispatch: # Allow manual triggering | |
| env: | |
| # Fixed input for all batch jobs | |
| BATCH_INPUT: | | |
| {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4", "messages": [{"role": "user", "content": "What is the capital of France?"}], "max_tokens": 100}} | |
| jobs: | |
| openai-batch: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: "3.9" | |
| - name: Install dependencies | |
| run: | | |
| pip install openai requests | |
| - name: Create batch input file | |
| run: | | |
| mkdir -p batch_inputs | |
| echo '${{ env.BATCH_INPUT }}' > batch_inputs/input.jsonl | |
| - name: OpenAI Batch Inference | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.BATCH_CRON_OPENAI_API_KEY }} | |
| run: | | |
| python -c " | |
| import openai | |
| import json | |
| from datetime import datetime | |
| import secrets | |
| import string | |
| # Generate random string | |
| random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10)) | |
| client = openai.OpenAI() | |
| # Upload batch input file | |
| batch_input = { | |
| 'custom_id': 'request-1', | |
| 'method': 'POST', | |
| 'url': '/v1/chat/completions', | |
| 'body': { | |
| 'model': 'gpt-4', | |
| 'messages': [{ | |
| 'role': 'user', | |
| 'content': f'My id is {random_str}. What is the capital of France?' | |
| }], | |
| 'max_tokens': 100 | |
| } | |
| } | |
| with open('batch_inputs/input.jsonl', 'w') as f: | |
| f.write(json.dumps(batch_input) + '\n') | |
| batch_input_file = client.files.create( | |
| file=open('batch_inputs/input.jsonl', 'rb'), | |
| purpose='batch' | |
| ) | |
| # Create batch job | |
| batch_job = client.batches.create( | |
| input_file_id=batch_input_file.id, | |
| endpoint='/v1/chat/completions', | |
| completion_window='24h', | |
| metadata={'source': 'github-actions', 'timestamp': datetime.now().isoformat()} | |
| ) | |
| print(f'OpenAI Batch Job Created: {batch_job.id}') | |
| print(f'Status: {batch_job.status}') | |
| " | |
| - name: Cleanup | |
| run: | | |
| rm -rf batch_inputs/ | |
| google-vertex-batch: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: "3.9" | |
| - name: Install dependencies | |
| run: | | |
| pip install google-cloud-aiplatform google-cloud-storage | |
| - name: Google Vertex AI Batch Inference | |
| env: | |
| GOOGLE_APPLICATION_CREDENTIALS_JSON: ${{ secrets.GCP_JWT_KEY }} | |
| GOOGLE_CLOUD_PROJECT: tensorzero-public | |
| GOOGLE_CLOUD_REGION: us-east1 | |
| run: | | |
| # Create service account key file | |
| echo '${{ secrets.GCP_JWT_KEY }}' > /tmp/gcp-key.json | |
| export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-key.json | |
| python -c " | |
| from google.cloud import aiplatform | |
| from google.cloud import storage | |
| import json | |
| from datetime import datetime | |
| import uuid | |
| import secrets | |
| import string | |
| # Generate random string | |
| random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10)) | |
| # Initialize Vertex AI | |
| aiplatform.init( | |
| project='tensorzero-public', | |
| location='us-east1' | |
| ) | |
| # Create batch prediction input | |
| batch_input = { | |
| 'request': { | |
| 'contents': [{ | |
| 'role': 'user', | |
| 'parts': [{ | |
| 'text': f'My id is {random_str}. What is the capital of France?' | |
| }] | |
| }] | |
| } | |
| } | |
| # Upload to GCS (you'll need a bucket) | |
| storage_client = storage.Client() | |
| bucket_name = 'tensorzero-batch-tests-input' | |
| bucket = storage_client.bucket(bucket_name) | |
| # Create unique filename | |
| timestamp = datetime.now().strftime('%Y%m%d-%H%M%S') | |
| input_blob_name = f'batch-inputs/gemini-input-{timestamp}.jsonl' | |
| output_blob_name = f'batch-outputs/gemini-output-{timestamp}/' | |
| # Upload input file | |
| blob = bucket.blob(input_blob_name) | |
| blob.upload_from_string(json.dumps(batch_input)) | |
| # Create batch prediction job | |
| job = aiplatform.BatchPredictionJob.submit( | |
| job_display_name=f'gemini-batch-{timestamp}', | |
| model_name='publishers/google/models/gemini-2.0-flash-001', | |
| instances_format='jsonl', | |
| predictions_format='jsonl', | |
| gcs_source=f'gs://{bucket_name}/{input_blob_name}', | |
| gcs_destination_prefix=f'gs://{bucket_name}/{output_blob_name}', | |
| ) | |
| print(f'Vertex AI Batch Job Created: {job.name}') | |
| print(f'State: {job.state}') | |
| " | |
| - name: Cleanup | |
| run: | | |
| rm -f /tmp/gcp-key.json | |
| anthropic-batch: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: "3.9" | |
| - name: Install dependencies | |
| run: | | |
| pip install anthropic | |
| - name: Anthropic Batch Inference | |
| env: | |
| ANTHROPIC_API_KEY: ${{ secrets.BATCH_CRON_ANTHROPIC_API_KEY }} | |
| run: | | |
| python -c " | |
| import anthropic | |
| from anthropic.types.message_create_params import MessageCreateParamsNonStreaming | |
| from anthropic.types.messages.batch_create_params import Request | |
| import json | |
| from datetime import datetime | |
| import secrets | |
| import string | |
| # Generate random string | |
| random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10)) | |
| client = anthropic.Anthropic() | |
| batch_job = client.messages.batches.create( | |
| requests=[ | |
| Request( | |
| custom_id='my-first-request', | |
| params=MessageCreateParamsNonStreaming( | |
| model='claude-3-7-sonnet-20250219', | |
| max_tokens=1024, | |
| messages=[{ | |
| 'role': 'user', | |
| 'content': f'My id is {random_str}. What is the capital of France?', | |
| }] | |
| ) | |
| ), | |
| ] | |
| ) | |
| print(f'Anthropic Batch Job Created: {batch_job.id}') | |
| print(f'Processing Status: {batch_job.processing_status}') | |
| " | |
| log-completion: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| needs: [openai-batch, google-vertex-batch, anthropic-batch] | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Log completion | |
| run: | | |
| echo "Batch inference jobs submitted at $(date)" | |
| echo "All three providers have been triggered successfully" |