Skip to content

Batch Inference Completion-time Cron Job #2175

Batch Inference Completion-time Cron Job

Batch Inference Completion-time Cron Job #2175

name: Batch Inference Completion-time Cron Job
on:
schedule:
# Runs every hour at minute 0
- cron: "0 * * * *"
workflow_dispatch: # Allow manual triggering
env:
# Fixed input for all batch jobs
BATCH_INPUT: |
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4", "messages": [{"role": "user", "content": "What is the capital of France?"}], "max_tokens": 100}}
jobs:
openai-batch:
if: github.repository == 'tensorzero/tensorzero'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install dependencies
run: |
pip install openai requests
- name: Create batch input file
run: |
mkdir -p batch_inputs
echo '${{ env.BATCH_INPUT }}' > batch_inputs/input.jsonl
- name: OpenAI Batch Inference
env:
OPENAI_API_KEY: ${{ secrets.BATCH_CRON_OPENAI_API_KEY }}
run: |
python -c "
import openai
import json
from datetime import datetime
import secrets
import string
# Generate random string
random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
client = openai.OpenAI()
# Upload batch input file
batch_input = {
'custom_id': 'request-1',
'method': 'POST',
'url': '/v1/chat/completions',
'body': {
'model': 'gpt-4',
'messages': [{
'role': 'user',
'content': f'My id is {random_str}. What is the capital of France?'
}],
'max_tokens': 100
}
}
with open('batch_inputs/input.jsonl', 'w') as f:
f.write(json.dumps(batch_input) + '\n')
batch_input_file = client.files.create(
file=open('batch_inputs/input.jsonl', 'rb'),
purpose='batch'
)
# Create batch job
batch_job = client.batches.create(
input_file_id=batch_input_file.id,
endpoint='/v1/chat/completions',
completion_window='24h',
metadata={'source': 'github-actions', 'timestamp': datetime.now().isoformat()}
)
print(f'OpenAI Batch Job Created: {batch_job.id}')
print(f'Status: {batch_job.status}')
"
- name: Cleanup
run: |
rm -rf batch_inputs/
google-vertex-batch:
if: github.repository == 'tensorzero/tensorzero'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install dependencies
run: |
pip install google-cloud-aiplatform google-cloud-storage
- name: Google Vertex AI Batch Inference
env:
GOOGLE_APPLICATION_CREDENTIALS_JSON: ${{ secrets.GCP_JWT_KEY }}
GOOGLE_CLOUD_PROJECT: tensorzero-public
GOOGLE_CLOUD_REGION: us-east1
run: |
# Create service account key file
echo '${{ secrets.GCP_JWT_KEY }}' > /tmp/gcp-key.json
export GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-key.json
python -c "
from google.cloud import aiplatform
from google.cloud import storage
import json
from datetime import datetime
import uuid
import secrets
import string
# Generate random string
random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
# Initialize Vertex AI
aiplatform.init(
project='tensorzero-public',
location='us-east1'
)
# Create batch prediction input
batch_input = {
'request': {
'contents': [{
'role': 'user',
'parts': [{
'text': f'My id is {random_str}. What is the capital of France?'
}]
}]
}
}
# Upload to GCS (you'll need a bucket)
storage_client = storage.Client()
bucket_name = 'tensorzero-batch-tests-input'
bucket = storage_client.bucket(bucket_name)
# Create unique filename
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
input_blob_name = f'batch-inputs/gemini-input-{timestamp}.jsonl'
output_blob_name = f'batch-outputs/gemini-output-{timestamp}/'
# Upload input file
blob = bucket.blob(input_blob_name)
blob.upload_from_string(json.dumps(batch_input))
# Create batch prediction job
job = aiplatform.BatchPredictionJob.submit(
job_display_name=f'gemini-batch-{timestamp}',
model_name='publishers/google/models/gemini-2.0-flash-001',
instances_format='jsonl',
predictions_format='jsonl',
gcs_source=f'gs://{bucket_name}/{input_blob_name}',
gcs_destination_prefix=f'gs://{bucket_name}/{output_blob_name}',
)
print(f'Vertex AI Batch Job Created: {job.name}')
print(f'State: {job.state}')
"
- name: Cleanup
run: |
rm -f /tmp/gcp-key.json
anthropic-batch:
if: github.repository == 'tensorzero/tensorzero'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install dependencies
run: |
pip install anthropic
- name: Anthropic Batch Inference
env:
ANTHROPIC_API_KEY: ${{ secrets.BATCH_CRON_ANTHROPIC_API_KEY }}
run: |
python -c "
import anthropic
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request
import json
from datetime import datetime
import secrets
import string
# Generate random string
random_str = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
client = anthropic.Anthropic()
batch_job = client.messages.batches.create(
requests=[
Request(
custom_id='my-first-request',
params=MessageCreateParamsNonStreaming(
model='claude-3-7-sonnet-20250219',
max_tokens=1024,
messages=[{
'role': 'user',
'content': f'My id is {random_str}. What is the capital of France?',
}]
)
),
]
)
print(f'Anthropic Batch Job Created: {batch_job.id}')
print(f'Processing Status: {batch_job.processing_status}')
"
log-completion:
if: github.repository == 'tensorzero/tensorzero'
needs: [openai-batch, google-vertex-batch, anthropic-batch]
runs-on: ubuntu-latest
steps:
- name: Log completion
run: |
echo "Batch inference jobs submitted at $(date)"
echo "All three providers have been triggered successfully"