diff --git a/deploy/example.config.ts b/deploy/example.config.ts index a530ab6..b05cb71 100644 --- a/deploy/example.config.ts +++ b/deploy/example.config.ts @@ -74,8 +74,9 @@ export const config: Config = { }, azure: { providerId: 'azure', - // NOTE: For now, you need to specify the family of models you want to use. - baseUrl: 'https://marcelo-0665-resource.openai.azure.com/openai/v1', + // NOTE: baseUrl should NOT include /openai or /anthropic - the provider adds these automatically + // Use 'anthropic/' prefix in the path for Anthropic API, otherwise OpenAI API is used + baseUrl: 'https://marcelo-0665-resource.openai.azure.com', injectCost: true, credentials: env.AZURE_API_KEY, }, diff --git a/gateway/src/handler.ts b/gateway/src/handler.ts index 14dd49a..0fc3b1c 100644 --- a/gateway/src/handler.ts +++ b/gateway/src/handler.ts @@ -9,7 +9,7 @@ import type { BaseAPI } from './api/base' import type { OtelSpan } from './otel' import { attributesFromRequest, attributesFromResponse, type GenAIAttributes } from './otel/attributes' import { AnthropicProvider } from './providers/anthropic' -import { AzureProvider } from './providers/azure' +import { createAzureProvider } from './providers/azure' import type { BaseProvider, ExtractedInfo, ProviderOptions } from './providers/base' import { BedrockProvider } from './providers/bedrock' import { GoogleVertexProvider } from './providers/google' @@ -63,7 +63,7 @@ export class RequestHandler { return match(options.providerProxy.providerId) .returnType() .with('openai', () => new OpenAIProvider(options)) - .with('azure', () => new AzureProvider(options)) + .with('azure', () => createAzureProvider(options)) .with('groq', () => new GroqProvider(options)) .with('google-vertex', () => new GoogleVertexProvider(options)) .with('anthropic', () => new AnthropicProvider(options)) diff --git a/gateway/src/providers/azure.ts b/gateway/src/providers/azure.ts index 5230e90..23eb6dc 100644 --- a/gateway/src/providers/azure.ts +++ b/gateway/src/providers/azure.ts @@ -1,5 +1,23 @@ +import { AnthropicProvider } from './anthropic' +import type { BaseProvider, ProviderOptions } from './base' import { OpenAIProvider } from './openai' -// TODO(Marcelo): The `AzureProvider` should be its own class, not a subclass of `OpenAIProvider`. -export class AzureProvider extends OpenAIProvider {} -// TODO(Marcelo): We should support Anthropic models as well. +export function createAzureProvider(options: ProviderOptions): BaseProvider { + // We assume that it's Anthropic if the path starts with `v1/messages`. Otherwise, it's OpenAI. + // This is not necessarily true, since Anthropic does support OpenAI-compatible endpoints. + const isAnthropic = options.restOfPath.startsWith('v1/messages') + + // We modify the `baseUrl` and not the `restOfPath` because the `restOfPath` is used to determine the API flavor. + // NOTE: Instead of modifying the `provierProxy` object, I think we should pass the `baseUrl` as a separate argument to the constructor. + const modifiedOptions: ProviderOptions = { + ...options, + providerProxy: { + ...options.providerProxy, + baseUrl: isAnthropic + ? `${options.providerProxy.baseUrl}/anthropic` + : `${options.providerProxy.baseUrl}/openai/v1`, + }, + } + + return isAnthropic ? new AnthropicProvider(modifiedOptions) : new OpenAIProvider(modifiedOptions) +} diff --git a/gateway/test/providers/azure.spec.ts b/gateway/test/providers/azure.spec.ts index 42f0afb..3b15e57 100644 --- a/gateway/test/providers/azure.spec.ts +++ b/gateway/test/providers/azure.spec.ts @@ -1,3 +1,4 @@ +import Anthropic from '@anthropic-ai/sdk' import OpenAI from 'openai' import { describe, expect } from 'vitest' import { deserializeRequest } from '../otel' @@ -37,4 +38,25 @@ describe('azure', () => { expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') }) + + test('anthropic', async ({ gateway }) => { + const { fetch, otelBatch } = gateway + + // The `authToken` is passed as `Authorization` header with the anthropic client. + const client = new Anthropic({ authToken: 'healthy', baseURL: 'https://example.com/azure', fetch }) + + const completion = await client.messages.create({ + model: 'claude-sonnet-4-20250514', + max_tokens: 1024, + top_p: 0.95, + top_k: 1, + temperature: 0.5, + stop_sequences: ['potato'], + system: 'You are a helpful assistant.', + messages: [{ role: 'user', content: 'What is the capital of France?' }], + }) + expect(completion).toMatchSnapshot('llm') + expect(otelBatch, 'otelBatch length not 1').toHaveLength(1) + expect(deserializeRequest(otelBatch[0]!)).toMatchSnapshot('span') + }) }) diff --git a/gateway/test/providers/azure.spec.ts.snap b/gateway/test/providers/azure.spec.ts.snap index f94c238..76a8879 100644 --- a/gateway/test/providers/azure.spec.ts.snap +++ b/gateway/test/providers/azure.spec.ts.snap @@ -1,5 +1,116 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html +exports[`azure > anthropic > llm 1`] = ` +{ + "content": [ + { + "text": "The capital of France is Paris.", + "type": "text", + }, + ], + "id": "msg_01Dk1uacDayZhr8zthkRovpM", + "model": "claude-sonnet-4-20250514", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0, + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "input_tokens": 20, + "output_tokens": 10, + "pydantic_ai_gateway": { + "cost_estimate": 0.00020999999999999998, + }, + "service_tier": "standard", + }, +} +`; + +exports[`azure > anthropic > span 1`] = ` +[ + { + "attributes": { + "gen_ai.input.messages": [ + { + "parts": [ + { + "content": "What is the capital of France?", + "type": "text", + }, + ], + "role": "user", + }, + ], + "gen_ai.operation.name": "chat", + "gen_ai.output.messages": [ + { + "finish_reason": "end_turn", + "parts": [ + { + "content": "The capital of France is Paris.", + "type": "text", + }, + ], + "role": "assistant", + }, + ], + "gen_ai.request.max_tokens": 1024, + "gen_ai.request.model": "claude-sonnet-4-20250514", + "gen_ai.request.seed": {}, + "gen_ai.request.stop_sequences": [ + "potato", + ], + "gen_ai.request.temperature": 0.5, + "gen_ai.request.top_k": 1, + "gen_ai.request.top_p": 0.95, + "gen_ai.response.finish_reasons": [ + "end_turn", + ], + "gen_ai.response.id": "msg_01Dk1uacDayZhr8zthkRovpM", + "gen_ai.response.model": "claude-sonnet-4-20250514", + "gen_ai.system": "anthropic", + "gen_ai.system_instructions": [ + { + "content": "You are a helpful assistant.", + "type": "text", + }, + ], + "gen_ai.usage.cache_audio_read_tokens": {}, + "gen_ai.usage.cache_read_tokens": 0, + "gen_ai.usage.cache_write_tokens": 0, + "gen_ai.usage.input_audio_tokens": {}, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.output_audio_tokens": {}, + "gen_ai.usage.output_tokens": 10, + "http.request.body.text": "{"model":"claude-sonnet-4-20250514","max_tokens":1024,"top_p":0.95,"top_k":1,"temperature":0.5,"stop_sequences":["potato"],"system":"You are a helpful assistant.","messages":[{"role":"user","content":"What is the capital of France?"}]}", + "http.response.body.text": "{"model":"claude-sonnet-4-20250514","id":"msg_01Dk1uacDayZhr8zthkRovpM","type":"message","role":"assistant","content":[{"type":"text","text":"The capital of France is Paris."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":10,"service_tier":"standard","pydantic_ai_gateway":{"cost_estimate":0.00020999999999999998}}}", + "http.response.status_code": 200, + "logfire.json_schema": "{"type":"object","properties":{"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.system":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.request.top_k":{"type":"number"},"gen_ai.request.top_p":{"type":"number"},"gen_ai.request.temperature":{"type":"number"},"gen_ai.request.stop_sequences":{},"gen_ai.request.seed":{},"gen_ai.response.finish_reasons":{},"gen_ai.response.id":{"type":"string"},"gen_ai.input.messages":{},"gen_ai.output.messages":{},"gen_ai.system_instructions":{},"http.response.status_code":{"type":"number"},"http.request.body.text":{"type":"string"},"http.response.body.text":{"type":"string"},"gen_ai.response.model":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"gen_ai.usage.input_audio_tokens":{},"gen_ai.usage.cache_audio_read_tokens":{},"gen_ai.usage.output_audio_tokens":{}}}", + "logfire.level_num": 9, + "logfire.msg": "chat claude-sonnet-4-20250514", + }, + "events": [], + "kind": 1, + "links": [], + "name": "chat claude-sonnet-4-20250514", + "parentSpanId": undefined, + "resource": { + "service.name": "PAIG", + "service.version": "test", + }, + "scope": "pydantic-ai-gateway", + "status": { + "code": 1, + }, + }, +] +`; + exports[`azure > chat > llm 1`] = ` { "choices": [ diff --git a/proxy-vcr/proxy_vcr/cassettes/azure-9752c34227c474614a0161c842bd67313be0932354998df27c07a1b9d8b29eaa.yaml b/proxy-vcr/proxy_vcr/cassettes/azure-9752c34227c474614a0161c842bd67313be0932354998df27c07a1b9d8b29eaa.yaml new file mode 100644 index 0000000..21fc5af --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/azure-9752c34227c474614a0161c842bd67313be0932354998df27c07a1b9d8b29eaa.yaml @@ -0,0 +1,40 @@ +interactions: +- request: + body: '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"top_p":0.95,"top_k":1,"temperature":0.5,"stop_sequences":["potato"],"system":"You + are a helpful assistant.","messages":[{"role":"user","content":"What is the + capital of France?"}]}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '235' + content-type: + - application/json + host: + - marcelo-0665-resource.openai.azure.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://marcelo-0665-resource.openai.azure.com/anthropic/v1/messages + response: + body: + string: '{"error":{"code":"401","message":"Access denied due to invalid subscription + key or wrong API endpoint. Make sure to provide a valid key for an active + subscription and use a correct regional API endpoint for your resource."}}' + headers: + Content-Length: + - '224' + Content-Type: + - application/json + Date: + - Wed, 03 Dec 2025 12:57:22 GMT + apim-request-id: + - 04fea0f2-7241-45f9-9a1c-3fd7fe450be7 + status: + code: 401 + message: PermissionDenied +version: 1 diff --git a/proxy-vcr/proxy_vcr/main.py b/proxy-vcr/proxy_vcr/main.py index e2a1166..4e865ab 100644 --- a/proxy-vcr/proxy_vcr/main.py +++ b/proxy-vcr/proxy_vcr/main.py @@ -22,7 +22,7 @@ BEDROCK_BASE_URL = 'https://bedrock-runtime.us-east-1.amazonaws.com' GOOGLE_BASE_URL = 'https://aiplatform.googleapis.com' # The Azure URL is not a secret, we can commit it. -AZURE_BASE_URL = 'https://marcelo-0665-resource.openai.azure.com/openai/v1' +AZURE_BASE_URL = 'https://marcelo-0665-resource.openai.azure.com' HF_BASE_URL = 'https://router.huggingface.co/v1' current_file_dir = pathlib.Path(__file__).parent