-
- {isLoading ? 'Searching...' : 'Find Product'}
-
- {response &&
Result: {response}
}
+
+ {messages.map(m => (
+
+ {m.role === 'user' ? 'User: ' : 'AI: '}
+ {m.content}
+
+ ))}
+
+ {(status === 'submitted' || status === 'streaming') && (
+
+ {status === 'submitted' &&
Loading...
}
+
+ Stop
+
+
+ )}
+
+ {error && (
+
+
An error occurred.
+
reload()}
+ >
+ Retry
+
+
+ )}
+
+
);
}
diff --git a/examples/next-openai/app/mcp/server/route.ts b/examples/next-openai/app/mcp/server/route.ts
new file mode 100644
index 000000000000..235528c4c186
--- /dev/null
+++ b/examples/next-openai/app/mcp/server/route.ts
@@ -0,0 +1,16 @@
+import { mcpApiHandler } from '@/util/mcp/handler';
+import { createServerResponseAdapter } from '@/util/mcp/server-response';
+import { NextRequest } from 'next/server';
+
+// This route (/mcp/server) serves the MCP server; it's called by the /mcp/chat route that's used by useChat to connect to the server and fetch tools:
+const requestHandler = (req: NextRequest) => {
+ return createServerResponseAdapter(req.signal, res => {
+ mcpApiHandler(req, res);
+ });
+};
+
+export {
+ requestHandler as DELETE,
+ requestHandler as GET,
+ requestHandler as POST,
+};
diff --git a/examples/next-openai/app/use-chat-resilient-persistence/[id]/chat.tsx b/examples/next-openai/app/use-chat-resilient-persistence/[id]/chat.tsx
index 90428815a508..c589443d864f 100644
--- a/examples/next-openai/app/use-chat-resilient-persistence/[id]/chat.tsx
+++ b/examples/next-openai/app/use-chat-resilient-persistence/[id]/chat.tsx
@@ -1,19 +1,20 @@
'use client';
-import { createIdGenerator } from 'ai';
import { Message, useChat } from '@ai-sdk/react';
+import { createIdGenerator } from 'ai';
export default function Chat({
id,
initialMessages,
}: { id?: string | undefined; initialMessages?: Message[] } = {}) {
- const { input, status, handleInputChange, handleSubmit, messages } = useChat({
- api: '/api/use-chat-resilient-persistence',
- id, // use the provided chatId
- initialMessages, // initial messages if provided
- sendExtraMessageFields: true, // send id and createdAt for each message
- generateId: createIdGenerator({ prefix: 'msgc', size: 16 }), // id format for client-side messages
- });
+ const { input, status, handleInputChange, handleSubmit, messages, stop } =
+ useChat({
+ api: '/api/use-chat-resilient-persistence',
+ id, // use the provided chatId
+ initialMessages, // initial messages if provided
+ sendExtraMessageFields: true, // send id and createdAt for each message
+ generateId: createIdGenerator({ prefix: 'msgc', size: 16 }), // id format for client-side messages
+ });
return (
@@ -32,6 +33,15 @@ export default function Chat({
onChange={handleInputChange}
disabled={status !== 'ready'}
/>
+ {status === 'streaming' && (
+
+ Stop
+
+ )}
);
diff --git a/examples/next-openai/app/use-chat-resume/[id]/page.tsx b/examples/next-openai/app/use-chat-resume/[id]/page.tsx
new file mode 100644
index 000000000000..85ac133795d7
--- /dev/null
+++ b/examples/next-openai/app/use-chat-resume/[id]/page.tsx
@@ -0,0 +1,14 @@
+import { loadChat } from '@/util/chat-store';
+import { Chat } from '../chat';
+
+export default async function Page({
+ params,
+}: {
+ params: Promise<{ id: string }>;
+}) {
+ const { id } = await params;
+
+ const messages = await loadChat(id);
+
+ return
;
+}
diff --git a/examples/next-openai/app/use-chat-resume/chat.tsx b/examples/next-openai/app/use-chat-resume/chat.tsx
new file mode 100644
index 000000000000..72be3a8f8ba0
--- /dev/null
+++ b/examples/next-openai/app/use-chat-resume/chat.tsx
@@ -0,0 +1,111 @@
+'use client';
+
+import { useChat } from '@ai-sdk/react';
+import { Message } from 'ai';
+import Link from 'next/link';
+import { useEffect } from 'react';
+
+export function Chat({
+ chatId,
+ autoResume,
+ initialMessages = [],
+}: {
+ chatId: string;
+ autoResume: boolean;
+ initialMessages: Message[];
+}) {
+ const {
+ error,
+ input,
+ status,
+ handleInputChange,
+ handleSubmit,
+ messages,
+ reload,
+ stop,
+ experimental_resume,
+ } = useChat({
+ id: chatId,
+ api: '/api/use-chat-resume',
+ initialMessages,
+ sendExtraMessageFields: true,
+ onError: error => {
+ console.error('Error streaming text:', error);
+ },
+ });
+
+ useEffect(() => {
+ if (autoResume) {
+ experimental_resume();
+ }
+ // We want to disable the exhaustive deps rule here because we only want to run this effect once
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, []);
+
+ return (
+
+
+ Chat Id: {chatId}
+
+
+
Status: {status}
+
+ {messages.map(message => (
+
+
+ {message.role === 'user' ? 'User: ' : 'AI: '}
+
+
+
+
{message.id}
+ {message.parts
+ .filter(part => part.type !== 'source')
+ .map((part, partIndex) => {
+ if (part.type === 'text') {
+ return (
+
{part.text}
+ );
+ }
+ })}
+
+
+ ))}
+
+ {(status === 'submitted' || status === 'streaming') && (
+
+ {status === 'submitted' &&
Loading...
}
+
+ Stop
+
+
+ )}
+
+ {error && (
+
+
An error occurred.
+
reload()}
+ >
+ Retry
+
+
+ )}
+
+
+
+ );
+}
diff --git a/examples/next-openai/app/use-chat-resume/page.tsx b/examples/next-openai/app/use-chat-resume/page.tsx
new file mode 100644
index 000000000000..dc60255f6769
--- /dev/null
+++ b/examples/next-openai/app/use-chat-resume/page.tsx
@@ -0,0 +1,8 @@
+import { Chat } from './chat';
+import { generateId } from 'ai';
+
+export default function Page() {
+ const chatId = generateId(32);
+
+ return
;
+}
diff --git a/examples/next-openai/package.json b/examples/next-openai/package.json
index 03979804c95e..79011c68d44f 100644
--- a/examples/next-openai/package.json
+++ b/examples/next-openai/package.json
@@ -9,22 +9,25 @@
"lint": "next lint"
},
"dependencies": {
- "@ai-sdk/anthropic": "1.2.4",
- "@ai-sdk/deepseek": "0.2.5",
- "@ai-sdk/fireworks": "0.2.5",
- "@ai-sdk/openai": "1.3.6",
- "@ai-sdk/google": "1.2.5",
- "@ai-sdk/google-vertex": "2.2.7",
- "@ai-sdk/perplexity": "1.1.3",
- "@ai-sdk/ui-utils": "1.2.4",
- "@ai-sdk/react": "1.2.5",
+ "@ai-sdk/anthropic": "1.2.11",
+ "@ai-sdk/deepseek": "0.2.14",
+ "@ai-sdk/fireworks": "0.2.14",
+ "@ai-sdk/openai": "1.3.22",
+ "@ai-sdk/google": "1.2.18",
+ "@ai-sdk/google-vertex": "2.2.22",
+ "@ai-sdk/perplexity": "1.1.9",
+ "@modelcontextprotocol/sdk": "1.10.2",
+ "@ai-sdk/ui-utils": "1.2.11",
+ "@ai-sdk/react": "1.2.12",
"@vercel/blob": "^0.26.0",
- "ai": "4.2.10",
+ "ai": "4.3.15",
"next": "latest",
"openai": "4.52.6",
"react": "^18",
"react-dom": "^18",
"react-markdown": "9.0.1",
+ "redis": "^4.7.0",
+ "resumable-stream": "^2.0.0",
"zod": "3.23.8"
},
"devDependencies": {
diff --git a/examples/next-openai/util/chat-store.ts b/examples/next-openai/util/chat-store.ts
index 6f58a23edcba..319ab3e299c4 100644
--- a/examples/next-openai/util/chat-store.ts
+++ b/examples/next-openai/util/chat-store.ts
@@ -1,5 +1,5 @@
import { generateId, Message } from 'ai';
-import { existsSync, mkdirSync } from 'fs';
+import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { readFile, writeFile } from 'fs/promises';
import path from 'path';
@@ -23,12 +23,58 @@ export async function saveChat({
await writeFile(getChatFile(id), JSON.stringify(messages, null, 2));
}
+export async function appendMessageToChat({
+ chatId,
+ message,
+}: {
+ chatId: string;
+ message: Message;
+}): Promise
{
+ const file = getChatFile(chatId);
+ const messages = await loadChat(chatId);
+ messages.push(message);
+ await writeFile(file, JSON.stringify(messages, null, 2));
+}
+
export async function loadChat(id: string): Promise {
return JSON.parse(await readFile(getChatFile(id), 'utf8'));
}
function getChatFile(id: string): string {
const chatDir = path.join(process.cwd(), '.chats');
+
+ if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
+
+ const chatFile = path.join(chatDir, `${id}.json`);
+
+ if (!existsSync(chatFile)) {
+ writeFileSync(chatFile, '[]');
+ }
+
+ return chatFile;
+}
+
+export async function appendStreamId({
+ chatId,
+ streamId,
+}: {
+ chatId: string;
+ streamId: string;
+}) {
+ const file = getStreamsFile(chatId);
+ const streams = await loadStreams(chatId);
+ streams.push(streamId);
+ await writeFile(file, JSON.stringify(streams, null, 2));
+}
+
+export async function loadStreams(chatId: string): Promise {
+ const file = getStreamsFile(chatId);
+ if (!existsSync(file)) return [];
+ return JSON.parse(await readFile(file, 'utf8'));
+}
+
+function getStreamsFile(chatId: string): string {
+ const chatDir = path.join(process.cwd(), '.streams');
if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
- return path.join(chatDir, `${id}.json`);
+ return path.join(chatDir, `${chatId}.json`);
}
diff --git a/examples/next-openai/util/mcp/handler.ts b/examples/next-openai/util/mcp/handler.ts
new file mode 100644
index 000000000000..acb732267077
--- /dev/null
+++ b/examples/next-openai/util/mcp/handler.ts
@@ -0,0 +1,102 @@
+import { ServerOptions } from '@modelcontextprotocol/sdk/server/index.js';
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import { ServerResponse } from 'http';
+import { NextRequest } from 'next/server';
+import { z } from 'zod';
+import { convertNextRequestToIncomingMessage } from './incoming-message';
+
+export const mcpApiHandler = initializeMcpApiHandler({
+ initializationCallback: server => {
+ server.tool(
+ 'calculateSum',
+ 'Returns the sum of N numbers',
+ {
+ values: z.array(z.number()),
+ },
+ async ({ values }: { values: number[] }) => ({
+ content: [
+ {
+ type: 'text',
+ text: `Sum: ${values.reduce((a: number, b: number) => a + b, 0)}`,
+ },
+ ],
+ }),
+ );
+ },
+ serverOptions: {
+ capabilities: {
+ tools: {},
+ },
+ },
+});
+
+function initializeMcpApiHandler({
+ initializationCallback,
+ serverOptions,
+}: {
+ initializationCallback: (server: McpServer) => void;
+ serverOptions?: ServerOptions;
+}) {
+ return async function mcpApiHandler(req: NextRequest, res: ServerResponse) {
+ const url = new URL(req.url || '', 'https://example.com');
+
+ if (url.pathname === '/mcp/server') {
+ if (req.method === 'GET') {
+ console.log('Received GET MCP request');
+ res.writeHead(405, { 'Content-Type': 'application/json' }).end(
+ JSON.stringify({
+ jsonrpc: '2.0',
+ error: {
+ code: -32000,
+ message: 'Method not allowed.',
+ },
+ id: null,
+ }),
+ );
+ return;
+ }
+
+ if (req.method === 'DELETE') {
+ console.log('Received DELETE MCP request');
+ res.writeHead(405, { 'Content-Type': 'application/json' }).end(
+ JSON.stringify({
+ jsonrpc: '2.0',
+ error: {
+ code: -32000,
+ message: 'Method not allowed.',
+ },
+ id: null,
+ }),
+ );
+ return;
+ }
+
+ console.log('New MCP connection', req.url, req.method);
+
+ if (req.method === 'POST') {
+ /**
+ * In Stateless Mode, we create a new instance of transport and server for each request to ensure complete isolation. A single instance would cause request ID collisions when multiple clients connect concurrently.
+ */
+ const server = new McpServer(
+ {
+ name: 'MCP Next.js Server',
+ version: '0.1.0',
+ },
+ serverOptions,
+ );
+ const statelessTransport = new StreamableHTTPServerTransport({
+ sessionIdGenerator: undefined,
+ });
+ initializationCallback(server);
+ await server.connect(statelessTransport);
+
+ const incomingMessage = await convertNextRequestToIncomingMessage(req);
+ await statelessTransport.handleRequest(incomingMessage, res);
+ }
+ } else {
+ res.statusCode = 404;
+ res.end('Not found');
+ }
+ };
+}
diff --git a/examples/next-openai/util/mcp/incoming-message.ts b/examples/next-openai/util/mcp/incoming-message.ts
new file mode 100644
index 000000000000..7b05c84a8a03
--- /dev/null
+++ b/examples/next-openai/util/mcp/incoming-message.ts
@@ -0,0 +1,55 @@
+import { IncomingMessage } from 'http';
+import { Socket } from 'net';
+import { NextRequest } from 'next/server';
+import { Readable } from 'stream';
+
+export async function convertNextRequestToIncomingMessage(
+ request: NextRequest,
+): Promise {
+ const method = request.method;
+ const url = request.url;
+ const headers = Object.fromEntries(request.headers);
+ const contentType = request.headers.get('content-type') || '';
+ const body = contentType.includes('application/json')
+ ? await request.json()
+ : await request.text();
+ const socket = new Socket();
+
+ // Create a readable stream that will be used as the base for IncomingMessage
+ const readable = new Readable();
+ readable._read = (): void => {}; // Required implementation
+
+ // Add the body content if provided
+ if (body) {
+ if (typeof body === 'string') {
+ readable.push(body);
+ } else if (Buffer.isBuffer(body)) {
+ readable.push(body);
+ } else {
+ // Ensure proper JSON-RPC format
+ const bodyString = JSON.stringify(body);
+ readable.push(bodyString);
+ }
+ readable.push(null); // Signal the end of the stream
+ } else {
+ readable.push(null); // Always end the stream even if no body
+ }
+
+ // Create the IncomingMessage instance
+ const req = new IncomingMessage(socket);
+
+ // Set the properties
+ req.method = method;
+ req.url = url;
+ req.headers = headers;
+
+ // Copy over the stream methods
+ req.push = readable.push.bind(readable);
+ req.read = readable.read.bind(readable);
+
+ // @ts-expect-error
+ req.on = readable.on.bind(readable);
+ req.pipe = readable.pipe.bind(readable);
+
+ return req;
+}
diff --git a/examples/next-openai/util/mcp/server-response.ts b/examples/next-openai/util/mcp/server-response.ts
new file mode 100644
index 000000000000..8e780acf24a2
--- /dev/null
+++ b/examples/next-openai/util/mcp/server-response.ts
@@ -0,0 +1,127 @@
+import { EventEmitter } from 'node:events';
+import { type ServerResponse } from 'node:http';
+
+type WriteheadArgs = {
+ statusCode: number;
+ headers?: Record;
+};
+
+/**
+ * Anthropic's MCP API requires a ServerResponse object. This function
+ * creates a fake server response object that can be used to pass to the MCP API.
+ */
+export function createServerResponseAdapter(
+ signal: AbortSignal,
+ fn: (re: ServerResponse) => Promise | void,
+): Promise {
+ let writeHeadResolver: (v: WriteheadArgs) => void;
+ const writeHeadPromise = new Promise(
+ async (resolve, _reject) => {
+ writeHeadResolver = resolve;
+ },
+ );
+
+ return new Promise(async (resolve, _reject) => {
+ let controller: ReadableStreamController | undefined;
+ let shouldClose = false;
+ let wroteHead = false;
+
+ const writeHead = (
+ statusCode: number,
+ headers?: Record,
+ ) => {
+ if (typeof headers === 'string') {
+ throw new Error('Status message of writeHead not supported');
+ }
+
+ wroteHead = true;
+ writeHeadResolver({
+ statusCode,
+ headers,
+ });
+
+ return fakeServerResponse;
+ };
+
+ let bufferedData: Uint8Array[] = [];
+
+ const write = (
+ chunk: Buffer | string,
+ encoding?: BufferEncoding,
+ ): boolean => {
+ if (encoding) {
+ throw new Error('Encoding not supported');
+ }
+ if (chunk instanceof Buffer) {
+ throw new Error('Buffer not supported');
+ }
+ if (!wroteHead) {
+ writeHead(200);
+ }
+ if (!controller) {
+ bufferedData.push(new TextEncoder().encode(chunk as string));
+ return true;
+ }
+ controller.enqueue(new TextEncoder().encode(chunk as string));
+ return true;
+ };
+
+ const eventEmitter = new EventEmitter();
+
+ const fakeServerResponse = {
+ writeHead,
+ write,
+ end: (data?: Buffer | string) => {
+ if (data) {
+ write(data);
+ }
+
+ if (!controller) {
+ shouldClose = true;
+ return fakeServerResponse;
+ }
+ try {
+ controller.close();
+ } catch {
+ /* May be closed on tcp layer */
+ }
+ return fakeServerResponse;
+ },
+ on: (event: string, listener: (...args: any[]) => void) => {
+ eventEmitter.on(event, listener);
+ return fakeServerResponse;
+ },
+ flushHeaders: () => {
+ return fakeServerResponse;
+ },
+ };
+
+ signal.addEventListener('abort', () => {
+ eventEmitter.emit('close');
+ });
+
+ fn(fakeServerResponse as unknown as ServerResponse);
+
+ const head = await writeHeadPromise;
+
+ const response = new Response(
+ new ReadableStream({
+ start(c) {
+ controller = c;
+ for (const chunk of bufferedData) {
+ controller.enqueue(chunk);
+ }
+ if (shouldClose) {
+ controller.close();
+ }
+ },
+ }),
+ {
+ status: head.statusCode,
+ headers: head.headers,
+ },
+ );
+
+ resolve(response);
+ });
+}
diff --git a/examples/node-http-server/package.json b/examples/node-http-server/package.json
index 09ff6fecb6aa..8b4ac872aef6 100644
--- a/examples/node-http-server/package.json
+++ b/examples/node-http-server/package.json
@@ -3,8 +3,8 @@
"version": "0.0.0",
"private": true,
"dependencies": {
- "@ai-sdk/openai": "1.3.6",
- "ai": "4.2.10",
+ "@ai-sdk/openai": "1.3.22",
+ "ai": "4.3.15",
"dotenv": "16.4.5",
"zod": "3.23.8",
"zod-to-json-schema": "3.23.5"
diff --git a/examples/nuxt-openai/README.md b/examples/nuxt-openai/README.md
index cbadb669da44..f52d610e7cd8 100644
--- a/examples/nuxt-openai/README.md
+++ b/examples/nuxt-openai/README.md
@@ -1,6 +1,6 @@
# AI SDK, Nuxt and OpenAI Chat Example
-This example shows how to use the [AI SDK](https://sdk.vercel.ai/docs) with [Nuxt](https://nuxt.com/), and [OpenAI](https://openai.com) to create a ChatGPT-like AI-powered streaming chat bot.
+This example shows how to use the [AI SDK](https://ai-sdk.dev/docs) with [Nuxt](https://nuxt.com/), and [OpenAI](https://openai.com) to create a ChatGPT-like AI-powered streaming chat bot.
## Deploy your own
@@ -10,10 +10,10 @@ Deploy the example using [Vercel](https://vercel.com?utm_source=github&utm_mediu
## How to use
-Execute `nuxi` to bootstrap the example:
+Execute `create-nuxt` to bootstrap the example:
```bash
-npx nuxi@latest init -t github:vercel/ai/examples/nuxt-openai nuxt-openai
+npx create-nuxt -t github:vercel/ai/examples/nuxt-openai nuxt-openai
```
To run the example locally you need to:
@@ -41,7 +41,7 @@ You can use different providers, such as `vercel` by modifying your `nuxt.config
To learn more about OpenAI, Nuxt, and the AI SDK take a look at the following resources:
-- [AI SDK docs](https://sdk.vercel.ai/docs) - learn mode about the AI SDK
-- [Vercel AI Playground](https://play.vercel.ai) - compare and tune 20+ AI models side-by-side
+- [AI SDK docs](https://ai-sdk.dev/docs) - learn mode about the AI SDK
+- [Vercel AI Playground](https://ai-sdk.dev/playground) - compare and tune 20+ AI models side-by-side
- [OpenAI Documentation](https://platform.openai.com/docs) - learn about OpenAI features and API.
- [Nuxt Documentation](https://nuxt.com/docs) - learn about Nuxt features and API.
diff --git a/examples/nuxt-openai/package.json b/examples/nuxt-openai/package.json
index 429177683a28..3df68226cd83 100644
--- a/examples/nuxt-openai/package.json
+++ b/examples/nuxt-openai/package.json
@@ -9,9 +9,9 @@
"postinstall": "nuxt prepare"
},
"dependencies": {
- "@ai-sdk/vue": "1.2.4",
- "@ai-sdk/openai": "1.3.6",
- "ai": "4.2.10",
+ "@ai-sdk/vue": "1.2.12",
+ "@ai-sdk/openai": "1.3.22",
+ "ai": "4.3.15",
"zod": "3.23.8"
},
"devDependencies": {
diff --git a/examples/solidstart-openai/README.md b/examples/solidstart-openai/README.md
index 427d8f67d37d..ded7348ad888 100644
--- a/examples/solidstart-openai/README.md
+++ b/examples/solidstart-openai/README.md
@@ -1,6 +1,6 @@
# AI SDK, Solid.js, SolidStart and OpenAI Chat Example
-This example shows how to use the [AI SDK](https://sdk.vercel.ai/docs) with [Solid](https://solidjs.com/), [SolidStart](https://start.solidjs.com), and [OpenAI](https://openai.com) to create a ChatGPT-like AI-powered streaming chat bot.
+This example shows how to use the [AI SDK](https://ai-sdk.dev/docs) with [Solid](https://solidjs.com/), [SolidStart](https://start.solidjs.com), and [OpenAI](https://openai.com) to create a ChatGPT-like AI-powered streaming chat bot.
## Deploy your own
@@ -41,7 +41,7 @@ By default, `npm run build` will generate a Node app that you can run with `npm
To learn more about OpenAI, Nuxt, and the AI SDK take a look at the following resources:
-- [AI SDK docs](https://sdk.vercel.ai/docs) - learn mode about the AI SDK
-- [Vercel AI Playground](https://play.vercel.ai) - compare and tune 20+ AI models side-by-side
+- [AI SDK docs](https://ai-sdk.dev/docs) - learn mode about the AI SDK
+- [Vercel AI Playground](https://ai-sdk.dev/playground) - compare and tune 20+ AI models side-by-side
- [OpenAI Documentation](https://platform.openai.com/docs) - learn about OpenAI features and API.
- [SolidStart Documentation](https://start.solidjs.com) - learn about SolidStart.
diff --git a/examples/solidstart-openai/package.json b/examples/solidstart-openai/package.json
index f364905d9791..2d8b1e0c9bae 100644
--- a/examples/solidstart-openai/package.json
+++ b/examples/solidstart-openai/package.json
@@ -14,13 +14,13 @@
"vinxi": "^0.4.3"
},
"dependencies": {
- "@ai-sdk/openai": "1.3.6",
- "@ai-sdk/solid": "1.2.6",
- "@ai-sdk/ui-utils": "1.2.4",
+ "@ai-sdk/openai": "1.3.22",
+ "@ai-sdk/solid": "1.2.13",
+ "@ai-sdk/ui-utils": "1.2.11",
"@solidjs/meta": "0.29.4",
"@solidjs/router": "^0.15.1",
"@solidjs/start": "^1.0.10",
- "ai": "4.2.10",
+ "ai": "4.3.15",
"solid-js": "^1.9.3",
"zod": "^3.23.8"
},
diff --git a/examples/sveltekit-openai/package.json b/examples/sveltekit-openai/package.json
index 71a50d46cd59..570b69f82ee4 100644
--- a/examples/sveltekit-openai/package.json
+++ b/examples/sveltekit-openai/package.json
@@ -16,16 +16,16 @@
},
"type": "module",
"devDependencies": {
- "@ai-sdk/openai": "1.3.6",
- "@ai-sdk/provider-utils": "2.2.3",
- "@ai-sdk/svelte": "2.1.5",
- "@ai-sdk/ui-utils": "1.2.4",
+ "@ai-sdk/openai": "1.3.22",
+ "@ai-sdk/provider-utils": "2.2.8",
+ "@ai-sdk/svelte": "2.1.12",
+ "@ai-sdk/ui-utils": "1.2.11",
"@eslint/compat": "^1.2.5",
"@eslint/js": "^9.18.0",
"@sveltejs/adapter-vercel": "^5.5.2",
"@sveltejs/kit": "^2.16.0",
"@sveltejs/vite-plugin-svelte": "^5.0.0",
- "ai": "4.2.10",
+ "ai": "4.3.15",
"autoprefixer": "^10.4.20",
"bits-ui": "^1.3.9",
"clsx": "^2.1.1",
diff --git a/package.json b/package.json
index c336659cec05..4af3945a2a93 100644
--- a/package.json
+++ b/package.json
@@ -5,14 +5,14 @@
"build": "turbo build",
"changeset": "changeset",
"clean": "turbo clean",
- "dev": "turbo dev --no-cache --concurrency 16 --continue",
+ "dev": "turbo dev --no-cache --concurrency 16 --continue",
"lint": "turbo lint",
"prepare": "husky install",
"prettier-check": "prettier --check \"**/*.{js,ts,tsx,md,mdx,svelte}\"",
"type-check": "turbo type-check",
"prettier-fix": "prettier --write \"**/*.{js,ts,tsx,md,mdx,svelte}\"",
"publint": "turbo publint",
- "test": "turbo test",
+ "test": "turbo test --concurrency 16",
"ci:release": "turbo clean && turbo build && changeset publish",
"ci:version": "changeset version && node .github/scripts/cleanup-examples-changesets.mjs && pnpm install --no-frozen-lockfile",
"clean-examples": "node .github/scripts/cleanup-examples-changesets.mjs && pnpm install --no-frozen-lockfile"
@@ -43,7 +43,7 @@
"engines": {
"node": "^18.0.0 || ^20.0.0 || ^22.0.0"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/ai/.changeset/quick-toys-study.md b/packages/ai/.changeset/quick-toys-study.md
new file mode 100644
index 000000000000..99827ec61f6c
--- /dev/null
+++ b/packages/ai/.changeset/quick-toys-study.md
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+fix (ai/mcp-stdio): make `createChildProcess` synchronous to prevent spawn race condition
diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index b5e5cdf62b93..114c23f965d5 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -1,5 +1,140 @@
# ai
+## 4.3.15
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/react@1.2.12
+ - @ai-sdk/ui-utils@1.2.11
+
+## 4.3.14
+
+### Patch Changes
+
+- a295521: feat(message-validator): include more details in error messages
+
+## 4.3.13
+
+### Patch Changes
+
+- Updated dependencies [6c59ae7]
+ - @ai-sdk/ui-utils@1.2.10
+ - @ai-sdk/react@1.2.11
+
+## 4.3.12
+
+### Patch Changes
+
+- 1ed3755: fix (ai): don't publish mcp-stdio TypeScript files
+- 46cb332: chore (ai/mcp): add `assertCapability` method to experimental MCP client
+
+## 4.3.11
+
+### Patch Changes
+
+- Updated dependencies [77b2097]
+- Updated dependencies [62181ef]
+ - @ai-sdk/react@1.2.10
+ - @ai-sdk/ui-utils@1.2.9
+
+## 4.3.10
+
+### Patch Changes
+
+- 0432959: feat (ai): add experimental prepareStep callback to generateText
+
+## 4.3.9
+
+### Patch Changes
+
+- b69a253: fix(utils/detect-mimetype): add support for detecting id3 tags
+
+## 4.3.8
+
+### Patch Changes
+
+- 6e8a73b: feat(providers/fal): add transcribe
+
+## 4.3.7
+
+### Patch Changes
+
+- f4f3945: fix (ai/core): refactor `toResponseMessages` to filter out empty string/content
+
+## 4.3.6
+
+### Patch Changes
+
+- beef951: feat: add speech with experimental_generateSpeech
+- bd41167: fix(ai/core): properly handle custom separator in provider registry
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+ - @ai-sdk/ui-utils@1.2.8
+ - @ai-sdk/react@1.2.9
+
+## 4.3.5
+
+### Patch Changes
+
+- 452bf12: fix (ai/mcp): better support for zero-argument MCP tools
+
+## 4.3.4
+
+### Patch Changes
+
+- 013faa8: core (ai): change transcription model mimeType to mediaType
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+ - @ai-sdk/ui-utils@1.2.7
+ - @ai-sdk/react@1.2.8
+
+## 4.3.3
+
+### Patch Changes
+
+- 3e88f4d: fix (ai/mcp): prevent mutation of customEnv
+- c21fa6d: feat: add transcription with experimental_transcribe
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/react@1.2.7
+ - @ai-sdk/ui-utils@1.2.6
+
+## 4.3.2
+
+### Patch Changes
+
+- 665a567: fix (core): improve error handling in streamText's consumeStream method
+
+## 4.3.1
+
+### Patch Changes
+
+- 3d1bd38: feat(smooth-stream): chunking callbacks
+
+## 4.3.0
+
+### Minor Changes
+
+- 772a2d7: feat (core): Add finishReason field to NoObjectGeneratedError
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/react@1.2.6
+ - @ai-sdk/ui-utils@1.2.5
+
+## 4.2.11
+
+### Patch Changes
+
+- c45d100: fix (core): send buffered text in smooth stream when stream parts change
+
## 4.2.10
### Patch Changes
diff --git a/packages/ai/README.md b/packages/ai/README.md
index 5df326033311..3a219de7f67f 100644
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@@ -2,9 +2,9 @@
# AI SDK
-The [AI SDK](https://sdk.vercel.ai/docs) is a TypeScript toolkit designed to help you build AI-powered applications using popular frameworks like Next.js, React, Svelte, Vue and runtimes like Node.js.
+The [AI SDK](https://ai-sdk.dev/docs) is a TypeScript toolkit designed to help you build AI-powered applications using popular frameworks like Next.js, React, Svelte, Vue and runtimes like Node.js.
-To learn more about how to use the AI SDK, check out our [API Reference](https://sdk.vercel.ai/docs/reference) and [Documentation](https://sdk.vercel.ai/docs).
+To learn more about how to use the AI SDK, check out our [API Reference](https://ai-sdk.dev/docs/reference) and [Documentation](https://ai-sdk.dev/docs).
## Installation
@@ -18,7 +18,7 @@ npm install ai
### AI SDK Core
-The [AI SDK Core](https://sdk.vercel.ai/docs/ai-sdk-core/overview) module provides a unified API to interact with model providers like [OpenAI](https://sdk.vercel.ai/providers/ai-sdk-providers/openai), [Anthropic](https://sdk.vercel.ai/providers/ai-sdk-providers/anthropic), [Google](https://sdk.vercel.ai/providers/ai-sdk-providers/google-generative-ai), and more.
+The [AI SDK Core](https://ai-sdk.dev/docs/ai-sdk-core/overview) module provides a unified API to interact with model providers like [OpenAI](https://ai-sdk.dev/providers/ai-sdk-providers/openai), [Anthropic](https://ai-sdk.dev/providers/ai-sdk-providers/anthropic), [Google](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai), and more.
You will then install the model provider of your choice.
@@ -43,7 +43,7 @@ console.log(text);
### AI SDK UI
-The [AI SDK UI](https://sdk.vercel.ai/docs/ai-sdk-ui/overview) module provides a set of hooks that help you build chatbots and generative user interfaces. These hooks are framework agnostic, so they can be used in Next.js, React, Svelte, and Vue.
+The [AI SDK UI](https://ai-sdk.dev/docs/ai-sdk-ui/overview) module provides a set of hooks that help you build chatbots and generative user interfaces. These hooks are framework agnostic, so they can be used in Next.js, React, Svelte, and Vue.
You need to install the package for your framework:
diff --git a/packages/ai/core/generate-image/generate-image.ts b/packages/ai/core/generate-image/generate-image.ts
index cf06d8f7d498..ae0a79a550ec 100644
--- a/packages/ai/core/generate-image/generate-image.ts
+++ b/packages/ai/core/generate-image/generate-image.ts
@@ -8,7 +8,10 @@ import { prepareRetries } from '../prompt/prepare-retries';
import { ImageGenerationWarning } from '../types/image-model';
import { ImageModelResponseMetadata } from '../types/image-model-response-metadata';
import { GenerateImageResult } from './generate-image-result';
-import { detectImageMimeType } from '../util/detect-image-mimetype';
+import {
+ detectMimeType,
+ imageMimeTypeSignatures,
+} from '../util/detect-mimetype';
/**
Generates images using an image model.
@@ -146,7 +149,11 @@ Only applicable for HTTP-based providers.
image =>
new DefaultGeneratedFile({
data: image,
- mimeType: detectImageMimeType(image) ?? 'image/png',
+ mimeType:
+ detectMimeType({
+ data: image,
+ signatures: imageMimeTypeSignatures,
+ }) ?? 'image/png',
}),
),
);
diff --git a/packages/ai/core/generate-object/generate-object.test.ts b/packages/ai/core/generate-object/generate-object.test.ts
index 0686c05c4494..80747e4f286c 100644
--- a/packages/ai/core/generate-object/generate-object.test.ts
+++ b/packages/ai/core/generate-object/generate-object.test.ts
@@ -801,6 +801,7 @@ describe('output = "object"', () => {
promptTokens: 10,
totalTokens: 30,
},
+ finishReason: 'stop',
});
}
diff --git a/packages/ai/core/generate-object/generate-object.ts b/packages/ai/core/generate-object/generate-object.ts
index 6284a12d8262..1a2e279d3726 100644
--- a/packages/ai/core/generate-object/generate-object.ts
+++ b/packages/ai/core/generate-object/generate-object.ts
@@ -556,6 +556,7 @@ export async function generateObject({
'No object generated: the model did not return a response.',
response: responseData,
usage: calculateLanguageModelUsage(result.usage),
+ finishReason: result.finishReason,
});
}
@@ -681,6 +682,7 @@ export async function generateObject({
message: 'No object generated: the tool was not called.',
response: responseData,
usage: calculateLanguageModelUsage(result.usage),
+ finishReason: result.finishReason,
});
}
@@ -751,6 +753,7 @@ export async function generateObject({
text: result,
response,
usage: calculateLanguageModelUsage(usage),
+ finishReason: finishReason,
});
}
@@ -770,6 +773,7 @@ export async function generateObject({
text: result,
response,
usage: calculateLanguageModelUsage(usage),
+ finishReason: finishReason,
});
}
diff --git a/packages/ai/core/generate-object/output-strategy.ts b/packages/ai/core/generate-object/output-strategy.ts
index 07c9d70ecd38..6da624bd331c 100644
--- a/packages/ai/core/generate-object/output-strategy.ts
+++ b/packages/ai/core/generate-object/output-strategy.ts
@@ -16,7 +16,11 @@ import {
createAsyncIterableStream,
} from '../util/async-iterable-stream';
import { ObjectStreamPart } from './stream-object-result';
-import { LanguageModelResponseMetadata, LanguageModelUsage } from '../types';
+import {
+ FinishReason,
+ LanguageModelResponseMetadata,
+ LanguageModelUsage,
+} from '../types';
export interface OutputStrategy {
readonly type: 'object' | 'array' | 'enum' | 'no-schema';
@@ -64,6 +68,7 @@ const noSchemaOutputStrategy: OutputStrategy = {
text: string;
response: LanguageModelResponseMetadata;
usage: LanguageModelUsage;
+ finishReason: FinishReason;
},
): ValidationResult {
return value === undefined
@@ -74,6 +79,7 @@ const noSchemaOutputStrategy: OutputStrategy = {
text: context.text,
response: context.response,
usage: context.usage,
+ finishReason: context.finishReason,
}),
}
: { success: true, value };
diff --git a/packages/ai/core/generate-object/stream-object.test.ts b/packages/ai/core/generate-object/stream-object.test.ts
index a645b1943159..0414a6d26c29 100644
--- a/packages/ai/core/generate-object/stream-object.test.ts
+++ b/packages/ai/core/generate-object/stream-object.test.ts
@@ -1311,6 +1311,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
@@ -1354,6 +1355,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
@@ -1403,6 +1405,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
@@ -1446,6 +1449,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
@@ -1488,6 +1492,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
@@ -1530,6 +1535,7 @@ describe('streamObject', () => {
modelId: 'model-1',
},
usage: { completionTokens: 10, promptTokens: 3, totalTokens: 13 },
+ finishReason: 'stop',
});
}
});
diff --git a/packages/ai/core/generate-object/stream-object.ts b/packages/ai/core/generate-object/stream-object.ts
index 0cc5607bcf59..9f79cd1c3184 100644
--- a/packages/ai/core/generate-object/stream-object.ts
+++ b/packages/ai/core/generate-object/stream-object.ts
@@ -898,6 +898,7 @@ class DefaultStreamObjectResult
text: accumulatedText,
response,
usage,
+ finishReason: finishReason,
});
self.objectPromise.reject(error);
}
diff --git a/packages/ai/core/generate-speech/generate-speech-result.ts b/packages/ai/core/generate-speech/generate-speech-result.ts
new file mode 100644
index 000000000000..48dc49417e64
--- /dev/null
+++ b/packages/ai/core/generate-speech/generate-speech-result.ts
@@ -0,0 +1,30 @@
+import { JSONValue } from '@ai-sdk/provider';
+import { SpeechModelResponseMetadata } from '../types/speech-model-response-metadata';
+import { SpeechWarning } from '../types';
+import { GeneratedAudioFile } from './generated-audio-file';
+
+/**
+The result of a `generateSpeech` call.
+It contains the audio data and additional information.
+ */
+export interface SpeechResult {
+ /**
+ * The audio data as a base64 encoded string or binary data.
+ */
+ readonly audio: GeneratedAudioFile;
+
+ /**
+ Warnings for the call, e.g. unsupported settings.
+ */
+ readonly warnings: Array;
+
+ /**
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
+ */
+ readonly responses: Array;
+
+ /**
+ Provider metadata from the provider.
+ */
+ readonly providerMetadata: Record>;
+}
diff --git a/packages/ai/core/generate-speech/generate-speech.test.ts b/packages/ai/core/generate-speech/generate-speech.test.ts
new file mode 100644
index 000000000000..ca18938f737b
--- /dev/null
+++ b/packages/ai/core/generate-speech/generate-speech.test.ts
@@ -0,0 +1,215 @@
+import {
+ JSONValue,
+ SpeechModelV1,
+ SpeechModelV1CallWarning,
+} from '@ai-sdk/provider';
+import { MockSpeechModelV1 } from '../test/mock-speech-model-v1';
+import { generateSpeech } from './generate-speech';
+import {
+ GeneratedAudioFile,
+ DefaultGeneratedAudioFile,
+} from './generated-audio-file';
+
+const audio = new Uint8Array([1, 2, 3, 4]); // Sample audio data
+const testDate = new Date(2024, 0, 1);
+const mockFile = new DefaultGeneratedAudioFile({
+ data: audio,
+ mimeType: 'audio/mp3',
+});
+
+const sampleText = 'This is a sample text to convert to speech.';
+
+const createMockResponse = (options: {
+ audio: GeneratedAudioFile;
+ warnings?: SpeechModelV1CallWarning[];
+ timestamp?: Date;
+ modelId?: string;
+ headers?: Record;
+ providerMetadata?: Record>;
+}) => ({
+ audio: options.audio.uint8Array,
+ warnings: options.warnings ?? [],
+ response: {
+ timestamp: options.timestamp ?? new Date(),
+ modelId: options.modelId ?? 'test-model-id',
+ headers: options.headers ?? {},
+ },
+ providerMetadata: options.providerMetadata ?? {},
+});
+
+describe('generateSpeech', () => {
+ it('should send args to doGenerate', async () => {
+ const abortController = new AbortController();
+ const abortSignal = abortController.signal;
+
+ let capturedArgs!: Parameters[0];
+
+ await generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async args => {
+ capturedArgs = args;
+ return createMockResponse({
+ audio: mockFile,
+ });
+ },
+ }),
+ text: sampleText,
+ voice: 'test-voice',
+ headers: { 'custom-request-header': 'request-header-value' },
+ abortSignal,
+ });
+
+ expect(capturedArgs).toStrictEqual({
+ text: sampleText,
+ voice: 'test-voice',
+ headers: { 'custom-request-header': 'request-header-value' },
+ abortSignal,
+ providerOptions: {},
+ outputFormat: undefined,
+ instructions: undefined,
+ speed: undefined,
+ });
+ });
+
+ it('should return warnings', async () => {
+ const result = await generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ audio: mockFile,
+ warnings: [
+ {
+ type: 'other',
+ message: 'Setting is not supported',
+ },
+ ],
+ providerMetadata: {
+ 'test-provider': {
+ 'test-key': 'test-value',
+ },
+ },
+ }),
+ }),
+ text: sampleText,
+ });
+
+ expect(result.warnings).toStrictEqual([
+ {
+ type: 'other',
+ message: 'Setting is not supported',
+ },
+ ]);
+ });
+
+ it('should return the audio data', async () => {
+ const result = await generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ audio: mockFile,
+ }),
+ }),
+ text: sampleText,
+ });
+
+ expect(result).toEqual({
+ audio: mockFile,
+ warnings: [],
+ responses: [
+ {
+ timestamp: expect.any(Date),
+ modelId: 'test-model-id',
+ headers: {},
+ },
+ ],
+ providerMetadata: {},
+ });
+ });
+
+ describe('error handling', () => {
+ it('should throw NoSpeechGeneratedError when no audio is returned', async () => {
+ await expect(
+ generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ audio: new DefaultGeneratedAudioFile({
+ data: new Uint8Array(),
+ mimeType: 'audio/mp3',
+ }),
+ timestamp: testDate,
+ }),
+ }),
+ text: sampleText,
+ }),
+ ).rejects.toMatchObject({
+ name: 'AI_NoSpeechGeneratedError',
+ message: 'No speech audio generated.',
+ responses: [
+ {
+ timestamp: testDate,
+ modelId: expect.any(String),
+ },
+ ],
+ });
+ });
+
+ it('should include response headers in error when no audio generated', async () => {
+ await expect(
+ generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ audio: new DefaultGeneratedAudioFile({
+ data: new Uint8Array(),
+ mimeType: 'audio/mp3',
+ }),
+ timestamp: testDate,
+ headers: {
+ 'custom-response-header': 'response-header-value',
+ },
+ }),
+ }),
+ text: sampleText,
+ }),
+ ).rejects.toMatchObject({
+ name: 'AI_NoSpeechGeneratedError',
+ message: 'No speech audio generated.',
+ responses: [
+ {
+ timestamp: testDate,
+ modelId: expect.any(String),
+ headers: {
+ 'custom-response-header': 'response-header-value',
+ },
+ },
+ ],
+ });
+ });
+ });
+
+ it('should return response metadata', async () => {
+ const testHeaders = { 'x-test': 'value' };
+
+ const result = await generateSpeech({
+ model: new MockSpeechModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ audio: mockFile,
+ timestamp: testDate,
+ modelId: 'test-model',
+ headers: testHeaders,
+ }),
+ }),
+ text: sampleText,
+ });
+
+ expect(result.responses).toStrictEqual([
+ {
+ timestamp: testDate,
+ modelId: 'test-model',
+ headers: testHeaders,
+ },
+ ]);
+ });
+});
diff --git a/packages/ai/core/generate-speech/generate-speech.ts b/packages/ai/core/generate-speech/generate-speech.ts
new file mode 100644
index 000000000000..2f6f2f35a0df
--- /dev/null
+++ b/packages/ai/core/generate-speech/generate-speech.ts
@@ -0,0 +1,159 @@
+import { JSONValue, SpeechModelV1 } from '@ai-sdk/provider';
+import { NoSpeechGeneratedError } from '../../errors/no-speech-generated-error';
+import { prepareRetries } from '../prompt/prepare-retries';
+import { ProviderOptions } from '../types/provider-metadata';
+import { SpeechWarning } from '../types/speech-model';
+import { SpeechModelResponseMetadata } from '../types/speech-model-response-metadata';
+import { SpeechResult } from './generate-speech-result';
+import {
+ audioMimeTypeSignatures,
+ detectMimeType,
+} from '../util/detect-mimetype';
+import {
+ DefaultGeneratedAudioFile,
+ GeneratedAudioFile,
+} from './generated-audio-file';
+
+/**
+Generates speech audio using a speech model.
+
+@param model - The speech model to use.
+@param text - The text to convert to speech.
+@param voice - The voice to use for speech generation.
+@param outputFormat - The output format to use for speech generation e.g. "mp3", "wav", etc.
+@param instructions - Instructions for the speech generation e.g. "Speak in a slow and steady tone".
+@param speed - The speed of the speech generation.
+@param providerOptions - Additional provider-specific options that are passed through to the provider
+as body parameters.
+@param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
+@param abortSignal - An optional abort signal that can be used to cancel the call.
+@param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
+
+@returns A result object that contains the generated audio data.
+ */
+export async function generateSpeech({
+ model,
+ text,
+ voice,
+ outputFormat,
+ instructions,
+ speed,
+ providerOptions = {},
+ maxRetries: maxRetriesArg,
+ abortSignal,
+ headers,
+}: {
+ /**
+The speech model to use.
+ */
+ model: SpeechModelV1;
+
+ /**
+The text to convert to speech.
+ */
+ text: string;
+
+ /**
+The voice to use for speech generation.
+ */
+ voice?: string;
+
+ /**
+ * The desired output format for the audio e.g. "mp3", "wav", etc.
+ */
+ outputFormat?: 'mp3' | 'wav' | (string & {});
+
+ /**
+ Instructions for the speech generation e.g. "Speak in a slow and steady tone".
+ */
+ instructions?: string;
+
+ /**
+ The speed of the speech generation.
+ */
+ speed?: number;
+
+ /**
+Additional provider-specific options that are passed through to the provider
+as body parameters.
+
+The outer record is keyed by the provider name, and the inner
+record is keyed by the provider-specific metadata key.
+```ts
+{
+ "openai": {}
+}
+```
+ */
+ providerOptions?: ProviderOptions;
+
+ /**
+Maximum number of retries per speech model call. Set to 0 to disable retries.
+
+@default 2
+ */
+ maxRetries?: number;
+
+ /**
+Abort signal.
+ */
+ abortSignal?: AbortSignal;
+
+ /**
+Additional headers to include in the request.
+Only applicable for HTTP-based providers.
+ */
+ headers?: Record;
+}): Promise {
+ const { retry } = prepareRetries({ maxRetries: maxRetriesArg });
+
+ const result = await retry(() =>
+ model.doGenerate({
+ text,
+ voice,
+ outputFormat,
+ instructions,
+ speed,
+ abortSignal,
+ headers,
+ providerOptions,
+ }),
+ );
+
+ if (!result.audio || result.audio.length === 0) {
+ throw new NoSpeechGeneratedError({ responses: [result.response] });
+ }
+
+ return new DefaultSpeechResult({
+ audio: new DefaultGeneratedAudioFile({
+ data: result.audio,
+ mimeType:
+ detectMimeType({
+ data: result.audio,
+ signatures: audioMimeTypeSignatures,
+ }) ?? 'audio/mp3',
+ }),
+ warnings: result.warnings,
+ responses: [result.response],
+ providerMetadata: result.providerMetadata,
+ });
+}
+
+class DefaultSpeechResult implements SpeechResult {
+ readonly audio: GeneratedAudioFile;
+ readonly warnings: Array;
+ readonly responses: Array;
+ readonly providerMetadata: Record>;
+
+ constructor(options: {
+ audio: GeneratedAudioFile;
+ warnings: Array;
+ responses: Array;
+ providerMetadata: Record> | undefined;
+ }) {
+ this.audio = options.audio;
+ this.warnings = options.warnings;
+ this.responses = options.responses;
+ this.providerMetadata = options.providerMetadata ?? {};
+ }
+}
diff --git a/packages/ai/core/generate-speech/generated-audio-file.ts b/packages/ai/core/generate-speech/generated-audio-file.ts
new file mode 100644
index 000000000000..065caf446c3f
--- /dev/null
+++ b/packages/ai/core/generate-speech/generated-audio-file.ts
@@ -0,0 +1,64 @@
+import {
+ GeneratedFile,
+ DefaultGeneratedFile,
+} from '../generate-text/generated-file';
+
+/**
+ * A generated audio file.
+ */
+export interface GeneratedAudioFile extends GeneratedFile {
+ /**
+ * Audio format of the file (e.g., 'mp3', 'wav', etc.)
+ */
+ readonly format: string;
+}
+
+export class DefaultGeneratedAudioFile
+ extends DefaultGeneratedFile
+ implements GeneratedAudioFile
+{
+ readonly format: string;
+
+ constructor({
+ data,
+ mimeType,
+ }: {
+ data: string | Uint8Array;
+ mimeType: string;
+ }) {
+ super({ data, mimeType });
+ let format = 'mp3';
+
+ // If format is not provided, try to determine it from the mimeType
+ if (mimeType) {
+ const mimeTypeParts = mimeType.split('/');
+
+ if (mimeTypeParts.length === 2) {
+ // Handle special cases for audio formats
+ if (mimeType !== 'audio/mpeg') {
+ format = mimeTypeParts[1];
+ }
+ }
+ }
+
+ if (!format) {
+ throw new Error(
+ 'Audio format must be provided or determinable from mimeType',
+ );
+ }
+
+ this.format = format;
+ }
+}
+
+export class DefaultGeneratedAudioFileWithType extends DefaultGeneratedAudioFile {
+ readonly type = 'audio';
+
+ constructor(options: {
+ data: string | Uint8Array;
+ mimeType: string;
+ format: string;
+ }) {
+ super(options);
+ }
+}
diff --git a/packages/ai/core/generate-speech/index.ts b/packages/ai/core/generate-speech/index.ts
new file mode 100644
index 000000000000..21d906f91cd8
--- /dev/null
+++ b/packages/ai/core/generate-speech/index.ts
@@ -0,0 +1,3 @@
+export { generateSpeech as experimental_generateSpeech } from './generate-speech';
+export type { SpeechResult as Experimental_SpeechResult } from './generate-speech-result';
+export type { GeneratedAudioFile } from './generated-audio-file';
diff --git a/packages/ai/core/generate-text/__snapshots__/generate-text.test.ts.snap b/packages/ai/core/generate-text/__snapshots__/generate-text.test.ts.snap
index 7a5e1fe83f5c..1cf354e75777 100644
--- a/packages/ai/core/generate-text/__snapshots__/generate-text.test.ts.snap
+++ b/packages/ai/core/generate-text/__snapshots__/generate-text.test.ts.snap
@@ -19,10 +19,6 @@ exports[`options.maxSteps > 2 steps: initial, tool-result > onStepFinish should
"messages": [
{
"content": [
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -101,10 +97,6 @@ exports[`options.maxSteps > 2 steps: initial, tool-result > onStepFinish should
"messages": [
{
"content": [
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -162,10 +154,6 @@ exports[`options.maxSteps > 2 steps: initial, tool-result > result.response.mess
[
{
"content": [
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -223,9 +211,155 @@ exports[`options.maxSteps > 2 steps: initial, tool-result > result.steps should
{
"content": [
{
- "text": "",
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "id": "msg-0",
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "id": "msg-1",
+ "role": "tool",
+ },
+ ],
+ "modelId": "test-response-model-id",
+ "timestamp": 1970-01-01T00:00:00.000Z,
+ },
+ "sources": [],
+ "stepType": "initial",
+ "text": "",
+ "toolCalls": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "toolResults": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "usage": {
+ "completionTokens": 5,
+ "promptTokens": 10,
+ "totalTokens": 15,
+ },
+ "warnings": undefined,
+ },
+ {
+ "experimental_providerMetadata": undefined,
+ "files": [],
+ "finishReason": "stop",
+ "isContinued": false,
+ "logprobs": undefined,
+ "providerMetadata": undefined,
+ "reasoning": undefined,
+ "reasoningDetails": [],
+ "request": {},
+ "response": {
+ "body": undefined,
+ "headers": {
+ "custom-response-header": "response-header-value",
+ },
+ "id": "test-id-2-from-model",
+ "messages": [
+ {
+ "content": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "id": "msg-0",
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "id": "msg-1",
+ "role": "tool",
+ },
+ {
+ "content": [
+ {
+ "text": "Hello, world!",
"type": "text",
},
+ ],
+ "id": "msg-2",
+ "role": "assistant",
+ },
+ ],
+ "modelId": "test-response-model-id",
+ "timestamp": 1970-01-01T00:00:10.000Z,
+ },
+ "sources": [],
+ "stepType": "tool-result",
+ "text": "Hello, world!",
+ "toolCalls": [],
+ "toolResults": [],
+ "usage": {
+ "completionTokens": 20,
+ "promptTokens": 10,
+ "totalTokens": 30,
+ },
+ "warnings": undefined,
+ },
+]
+`;
+
+exports[`options.maxSteps > 2 steps: initial, tool-result with prepareStep > onStepFinish should be called for each step 1`] = `
+[
+ {
+ "experimental_providerMetadata": undefined,
+ "files": [],
+ "finishReason": "tool-calls",
+ "isContinued": false,
+ "logprobs": undefined,
+ "providerMetadata": undefined,
+ "reasoning": undefined,
+ "reasoningDetails": [],
+ "request": {},
+ "response": {
+ "body": undefined,
+ "headers": undefined,
+ "id": "test-id-1-from-model",
+ "messages": [
+ {
+ "content": [
{
"args": {
"value": "value",
@@ -305,9 +439,196 @@ exports[`options.maxSteps > 2 steps: initial, tool-result > result.steps should
{
"content": [
{
- "text": "",
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "id": "msg-0",
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "id": "msg-1",
+ "role": "tool",
+ },
+ {
+ "content": [
+ {
+ "text": "Hello, world!",
"type": "text",
},
+ ],
+ "id": "msg-2",
+ "role": "assistant",
+ },
+ ],
+ "modelId": "test-response-model-id",
+ "timestamp": 1970-01-01T00:00:10.000Z,
+ },
+ "sources": [],
+ "stepType": "tool-result",
+ "text": "Hello, world!",
+ "toolCalls": [],
+ "toolResults": [],
+ "usage": {
+ "completionTokens": 20,
+ "promptTokens": 10,
+ "totalTokens": 30,
+ },
+ "warnings": undefined,
+ },
+]
+`;
+
+exports[`options.maxSteps > 2 steps: initial, tool-result with prepareStep > result.response.messages should contain response messages from all steps 1`] = `
+[
+ {
+ "content": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "id": "msg-0",
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "id": "msg-1",
+ "role": "tool",
+ },
+ {
+ "content": [
+ {
+ "text": "Hello, world!",
+ "type": "text",
+ },
+ ],
+ "id": "msg-2",
+ "role": "assistant",
+ },
+]
+`;
+
+exports[`options.maxSteps > 2 steps: initial, tool-result with prepareStep > result.steps should contain all steps 1`] = `
+[
+ {
+ "experimental_providerMetadata": undefined,
+ "files": [],
+ "finishReason": "tool-calls",
+ "isContinued": false,
+ "logprobs": undefined,
+ "providerMetadata": undefined,
+ "reasoning": undefined,
+ "reasoningDetails": [],
+ "request": {},
+ "response": {
+ "body": undefined,
+ "headers": undefined,
+ "id": "test-id-1-from-model",
+ "messages": [
+ {
+ "content": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "id": "msg-0",
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "id": "msg-1",
+ "role": "tool",
+ },
+ ],
+ "modelId": "test-response-model-id",
+ "timestamp": 1970-01-01T00:00:00.000Z,
+ },
+ "sources": [],
+ "stepType": "initial",
+ "text": "",
+ "toolCalls": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-call",
+ },
+ ],
+ "toolResults": [
+ {
+ "args": {
+ "value": "value",
+ },
+ "result": "result1",
+ "toolCallId": "call-1",
+ "toolName": "tool1",
+ "type": "tool-result",
+ },
+ ],
+ "usage": {
+ "completionTokens": 5,
+ "promptTokens": 10,
+ "totalTokens": 15,
+ },
+ "warnings": undefined,
+ },
+ {
+ "experimental_providerMetadata": undefined,
+ "files": [],
+ "finishReason": "stop",
+ "isContinued": false,
+ "logprobs": undefined,
+ "providerMetadata": undefined,
+ "reasoning": undefined,
+ "reasoningDetails": [],
+ "request": {},
+ "response": {
+ "body": undefined,
+ "headers": {
+ "custom-response-header": "response-header-value",
+ },
+ "id": "test-id-2-from-model",
+ "messages": [
+ {
+ "content": [
{
"args": {
"value": "value",
@@ -1193,7 +1514,7 @@ exports[`result.steps > should contain files 1`] = `
"type": "file",
},
{
- "text": "",
+ "text": "Hello, world!",
"type": "text",
},
],
@@ -1206,7 +1527,7 @@ exports[`result.steps > should contain files 1`] = `
},
"sources": [],
"stepType": "initial",
- "text": "",
+ "text": "Hello, world!",
"toolCalls": [],
"toolResults": [],
"usage": {
@@ -1239,7 +1560,7 @@ exports[`result.steps > should contain sources 1`] = `
{
"content": [
{
- "text": "",
+ "text": "Hello, world!",
"type": "text",
},
],
@@ -1275,7 +1596,7 @@ exports[`result.steps > should contain sources 1`] = `
},
],
"stepType": "initial",
- "text": "",
+ "text": "Hello, world!",
"toolCalls": [],
"toolResults": [],
"usage": {
diff --git a/packages/ai/core/generate-text/__snapshots__/stream-text.test.ts.snap b/packages/ai/core/generate-text/__snapshots__/stream-text.test.ts.snap
index 03ced770601a..b382b4c0782a 100644
--- a/packages/ai/core/generate-text/__snapshots__/stream-text.test.ts.snap
+++ b/packages/ai/core/generate-text/__snapshots__/stream-text.test.ts.snap
@@ -106,10 +106,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > callbac
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -177,10 +173,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > callbac
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -262,10 +254,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > callbac
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -358,10 +346,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > callbac
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -443,10 +427,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > callbac
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -751,10 +731,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > value p
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -821,10 +797,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > value p
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
@@ -906,10 +878,6 @@ exports[`streamText > options.maxSteps > 2 steps: initial, tool-result > value p
"text": "thinking",
"type": "reasoning",
},
- {
- "text": "",
- "type": "text",
- },
{
"args": {
"value": "value",
diff --git a/packages/ai/core/generate-text/generate-text.test.ts b/packages/ai/core/generate-text/generate-text.test.ts
index c946446270a9..63d1c2c0e2fa 100644
--- a/packages/ai/core/generate-text/generate-text.test.ts
+++ b/packages/ai/core/generate-text/generate-text.test.ts
@@ -37,6 +37,7 @@ const modelWithSources = new MockLanguageModelV1({
providerMetadata: { provider: { custom: 'value2' } },
},
],
+ text: 'Hello, world!',
}),
});
@@ -53,6 +54,7 @@ const modelWithFiles = new MockLanguageModelV1({
mimeType: 'image/jpeg',
},
],
+ text: 'Hello, world!',
}),
});
@@ -688,6 +690,209 @@ describe('options.maxSteps', () => {
});
});
+ describe('2 steps: initial, tool-result with prepareStep', () => {
+ let result: GenerateTextResult;
+ let onStepFinishResults: StepResult[];
+
+ beforeEach(async () => {
+ onStepFinishResults = [];
+
+ let responseCount = 0;
+
+ const trueModel = new MockLanguageModelV1({
+ doGenerate: async ({ prompt, mode }) => {
+ switch (responseCount++) {
+ case 0:
+ expect(mode).toStrictEqual({
+ type: 'regular',
+ toolChoice: { type: 'tool', toolName: 'tool1' },
+ tools: [
+ {
+ type: 'function',
+ name: 'tool1',
+ description: undefined,
+ parameters: {
+ $schema: 'http://json-schema.org/draft-07/schema#',
+ additionalProperties: false,
+ properties: { value: { type: 'string' } },
+ required: ['value'],
+ type: 'object',
+ },
+ },
+ ],
+ });
+
+ expect(prompt).toStrictEqual([
+ {
+ role: 'user',
+ content: [{ type: 'text', text: 'test-input' }],
+ providerMetadata: undefined,
+ },
+ ]);
+
+ return {
+ ...dummyResponseValues,
+ toolCalls: [
+ {
+ toolCallType: 'function',
+ toolCallId: 'call-1',
+ toolName: 'tool1',
+ args: `{ "value": "value" }`,
+ },
+ ],
+ toolResults: [
+ {
+ toolCallId: 'call-1',
+ toolName: 'tool1',
+ args: { value: 'value' },
+ result: 'result1',
+ },
+ ],
+ finishReason: 'tool-calls',
+ usage: { completionTokens: 5, promptTokens: 10 },
+ response: {
+ id: 'test-id-1-from-model',
+ timestamp: new Date(0),
+ modelId: 'test-response-model-id',
+ },
+ };
+ case 1:
+ expect(mode).toStrictEqual({
+ type: 'regular',
+ toolChoice: { type: 'auto' },
+ tools: [],
+ });
+
+ expect(prompt).toStrictEqual([
+ {
+ role: 'user',
+ content: [{ type: 'text', text: 'test-input' }],
+ providerMetadata: undefined,
+ },
+ {
+ role: 'assistant',
+ content: [
+ {
+ type: 'tool-call',
+ toolCallId: 'call-1',
+ toolName: 'tool1',
+ args: { value: 'value' },
+ providerMetadata: undefined,
+ },
+ ],
+ providerMetadata: undefined,
+ },
+ {
+ role: 'tool',
+ content: [
+ {
+ type: 'tool-result',
+ toolCallId: 'call-1',
+ toolName: 'tool1',
+ result: 'result1',
+ content: undefined,
+ isError: undefined,
+ providerMetadata: undefined,
+ },
+ ],
+ providerMetadata: undefined,
+ },
+ ]);
+ return {
+ ...dummyResponseValues,
+ text: 'Hello, world!',
+ response: {
+ id: 'test-id-2-from-model',
+ timestamp: new Date(10000),
+ modelId: 'test-response-model-id',
+ },
+ rawResponse: {
+ headers: {
+ 'custom-response-header': 'response-header-value',
+ },
+ },
+ };
+ default:
+ throw new Error(`Unexpected response count: ${responseCount}`);
+ }
+ },
+ });
+
+ result = await generateText({
+ model: modelWithFiles,
+ tools: {
+ tool1: tool({
+ parameters: z.object({ value: z.string() }),
+ execute: async (args, options) => {
+ expect(args).toStrictEqual({ value: 'value' });
+ expect(options.messages).toStrictEqual([
+ { role: 'user', content: 'test-input' },
+ ]);
+ return 'result1';
+ },
+ }),
+ },
+ prompt: 'test-input',
+ maxSteps: 3,
+ onStepFinish: async event => {
+ onStepFinishResults.push(event);
+ },
+ experimental_prepareStep: async ({ model, stepNumber, steps }) => {
+ expect(model).toStrictEqual(modelWithFiles);
+
+ if (stepNumber === 0) {
+ expect(steps).toStrictEqual([]);
+ return {
+ model: trueModel,
+ toolChoice: {
+ type: 'tool',
+ toolName: 'tool1' as const,
+ },
+ };
+ }
+
+ if (stepNumber === 1) {
+ expect(steps.length).toStrictEqual(1);
+ return { model: trueModel, experimental_activeTools: [] };
+ }
+ },
+ experimental_generateMessageId: mockId({ prefix: 'msg' }),
+ });
+ });
+
+ it('result.text should return text from last step', async () => {
+ assert.deepStrictEqual(result.text, 'Hello, world!');
+ });
+
+ it('result.toolCalls should return empty tool calls from last step', async () => {
+ assert.deepStrictEqual(result.toolCalls, []);
+ });
+
+ it('result.toolResults should return empty tool results from last step', async () => {
+ assert.deepStrictEqual(result.toolResults, []);
+ });
+
+ it('result.response.messages should contain response messages from all steps', () => {
+ expect(result.response.messages).toMatchSnapshot();
+ });
+
+ it('result.usage should sum token usage', () => {
+ assert.deepStrictEqual(result.usage, {
+ completionTokens: 25,
+ promptTokens: 20,
+ totalTokens: 45,
+ });
+ });
+
+ it('result.steps should contain all steps', () => {
+ expect(result.steps).toMatchSnapshot();
+ });
+
+ it('onStepFinish should be called for each step', () => {
+ expect(onStepFinishResults).toMatchSnapshot();
+ });
+ });
+
describe('4 steps: initial, continue, continue, continue', () => {
let result: GenerateTextResult;
let onStepFinishResults: StepResult[];
diff --git a/packages/ai/core/generate-text/generate-text.ts b/packages/ai/core/generate-text/generate-text.ts
index 58b82664b47b..03fa7b2806e5 100644
--- a/packages/ai/core/generate-text/generate-text.ts
+++ b/packages/ai/core/generate-text/generate-text.ts
@@ -125,6 +125,7 @@ export async function generateText<
experimental_providerMetadata,
providerOptions = experimental_providerMetadata,
experimental_activeTools: activeTools,
+ experimental_prepareStep: prepareStep,
experimental_repairToolCall: repairToolCall,
_internal: {
generateId = originalGenerateId,
@@ -198,6 +199,32 @@ Optional specification for parsing structured outputs from the LLM response.
*/
experimental_output?: Output;
+ /**
+Optional function that you can use to provide different settings for a step.
+
+@param options - The options for the step.
+@param options.steps - The steps that have been executed so far.
+@param options.stepNumber - The number of the step that is being executed.
+@param options.maxSteps - The maximum number of steps.
+@param options.model - The model that is being used.
+
+@returns An object that contains the settings for the step.
+If you return undefined (or for undefined settings), the settings from the outer level will be used.
+ */
+ experimental_prepareStep?: (options: {
+ steps: Array>;
+ stepNumber: number;
+ maxSteps: number;
+ model: LanguageModel;
+ }) => PromiseLike<
+ | {
+ model?: LanguageModel;
+ toolChoice?: ToolChoice;
+ experimental_activeTools?: Array;
+ }
+ | undefined
+ >;
+
/**
A function that attempts to repair a tool call that failed to parse.
*/
@@ -254,6 +281,9 @@ A function that attempts to repair a tool call that failed to parse.
telemetry,
}),
...baseTelemetryAttributes,
+ // model:
+ 'ai.model.provider': model.provider,
+ 'ai.model.id': model.modelId,
// specific settings that only make sense on the outer level:
'ai.prompt': {
input: () => JSON.stringify({ system, prompt, messages }),
@@ -263,11 +293,6 @@ A function that attempts to repair a tool call that failed to parse.
}),
tracer,
fn: async span => {
- const mode = {
- type: 'regular' as const,
- ...prepareToolsAndToolChoice({ tools, toolChoice, activeTools }),
- };
-
const callSettings = prepareCallSettings(settings);
let currentModelResponse: Awaited<
@@ -298,16 +323,37 @@ A function that attempts to repair a tool call that failed to parse.
...responseMessages,
];
+ const prepareStepResult = await prepareStep?.({
+ model,
+ steps,
+ maxSteps,
+ stepNumber: stepCount,
+ });
+
+ const stepToolChoice = prepareStepResult?.toolChoice ?? toolChoice;
+ const stepActiveTools =
+ prepareStepResult?.experimental_activeTools ?? activeTools;
+ const stepModel = prepareStepResult?.model ?? model;
+
const promptMessages = await convertToLanguageModelPrompt({
prompt: {
type: promptFormat,
system: initialPrompt.system,
messages: stepInputMessages,
},
- modelSupportsImageUrls: model.supportsImageUrls,
- modelSupportsUrl: model.supportsUrl?.bind(model), // support 'this' context
+ modelSupportsImageUrls: stepModel.supportsImageUrls,
+ modelSupportsUrl: stepModel.supportsUrl?.bind(stepModel), // support 'this' context
});
+ const mode = {
+ type: 'regular' as const,
+ ...prepareToolsAndToolChoice({
+ tools,
+ toolChoice: stepToolChoice,
+ activeTools: stepActiveTools,
+ }),
+ };
+
currentModelResponse = await retry(() =>
recordSpan({
name: 'ai.generateText.doGenerate',
@@ -319,6 +365,10 @@ A function that attempts to repair a tool call that failed to parse.
telemetry,
}),
...baseTelemetryAttributes,
+ // model:
+ 'ai.model.provider': stepModel.provider,
+ 'ai.model.id': stepModel.modelId,
+ // prompt:
'ai.prompt.format': { input: () => promptFormat },
'ai.prompt.messages': {
input: () => JSON.stringify(promptMessages),
@@ -335,8 +385,8 @@ A function that attempts to repair a tool call that failed to parse.
},
// standardized gen-ai llm span attributes:
- 'gen_ai.system': model.provider,
- 'gen_ai.request.model': model.modelId,
+ 'gen_ai.system': stepModel.provider,
+ 'gen_ai.request.model': stepModel.modelId,
'gen_ai.request.frequency_penalty': settings.frequencyPenalty,
'gen_ai.request.max_tokens': settings.maxTokens,
'gen_ai.request.presence_penalty': settings.presencePenalty,
@@ -348,7 +398,7 @@ A function that attempts to repair a tool call that failed to parse.
}),
tracer,
fn: async span => {
- const result = await model.doGenerate({
+ const result = await stepModel.doGenerate({
mode,
...callSettings,
inputFormat: promptFormat,
@@ -363,7 +413,7 @@ A function that attempts to repair a tool call that failed to parse.
const responseData = {
id: result.response?.id ?? generateId(),
timestamp: result.response?.timestamp ?? currentDate(),
- modelId: result.response?.modelId ?? model.modelId,
+ modelId: result.response?.modelId ?? stepModel.modelId,
};
// Add response information to the span:
@@ -576,7 +626,11 @@ A function that attempts to repair a tool call that failed to parse.
return output.parseOutput(
{ text },
- { response: currentModelResponse.response, usage },
+ {
+ response: currentModelResponse.response,
+ usage,
+ finishReason: currentModelResponse.finishReason,
+ },
);
},
toolCalls: currentToolCalls,
diff --git a/packages/ai/core/generate-text/index.ts b/packages/ai/core/generate-text/index.ts
index 8ca67561e4c9..ab53b626fc47 100644
--- a/packages/ai/core/generate-text/index.ts
+++ b/packages/ai/core/generate-text/index.ts
@@ -6,7 +6,7 @@ export type {
GeneratedFile,
} from './generated-file';
export * as Output from './output';
-export { smoothStream } from './smooth-stream';
+export { smoothStream, type ChunkDetector } from './smooth-stream';
export type { StepResult } from './step-result';
export { streamText } from './stream-text';
export type {
diff --git a/packages/ai/core/generate-text/output.test.ts b/packages/ai/core/generate-text/output.test.ts
index adde6bb5b610..d0a839036008 100644
--- a/packages/ai/core/generate-text/output.test.ts
+++ b/packages/ai/core/generate-text/output.test.ts
@@ -2,6 +2,7 @@ import { fail } from 'assert';
import { z } from 'zod';
import { verifyNoObjectGeneratedError } from '../../errors/no-object-generated-error';
import { object } from './output';
+import { FinishReason } from '../types';
const context = {
response: {
@@ -14,6 +15,7 @@ const context = {
completionTokens: 2,
totalTokens: 3,
},
+ finishReason: 'length' as FinishReason,
};
describe('Output.object', () => {
@@ -37,6 +39,7 @@ describe('Output.object', () => {
message: 'No object generated: could not parse the response.',
response: context.response,
usage: context.usage,
+ finishReason: context.finishReason,
});
}
});
@@ -50,6 +53,7 @@ describe('Output.object', () => {
message: 'No object generated: response did not match schema.',
response: context.response,
usage: context.usage,
+ finishReason: context.finishReason,
});
}
});
diff --git a/packages/ai/core/generate-text/output.ts b/packages/ai/core/generate-text/output.ts
index 7d54fccbbb11..faa632a6f7a2 100644
--- a/packages/ai/core/generate-text/output.ts
+++ b/packages/ai/core/generate-text/output.ts
@@ -9,6 +9,7 @@ import { z } from 'zod';
import { NoObjectGeneratedError } from '../../errors';
import { injectJsonInstruction } from '../generate-object/inject-json-instruction';
import {
+ FinishReason,
LanguageModel,
LanguageModelV1CallOptions,
} from '../types/language-model';
@@ -33,6 +34,7 @@ export interface Output {
context: {
response: LanguageModelResponseMetadata;
usage: LanguageModelUsage;
+ finishReason: FinishReason;
},
): OUTPUT;
}
@@ -108,6 +110,7 @@ export const object = ({
context: {
response: LanguageModelResponseMetadata;
usage: LanguageModelUsage;
+ finishReason: FinishReason;
},
) {
const parseResult = safeParseJSON({ text });
@@ -119,6 +122,7 @@ export const object = ({
text,
response: context.response,
usage: context.usage,
+ finishReason: context.finishReason,
});
}
@@ -134,6 +138,7 @@ export const object = ({
text,
response: context.response,
usage: context.usage,
+ finishReason: context.finishReason,
});
}
diff --git a/packages/ai/core/generate-text/smooth-stream.test.ts b/packages/ai/core/generate-text/smooth-stream.test.ts
index 6b01f7565ab8..b2bc8257fe2a 100644
--- a/packages/ai/core/generate-text/smooth-stream.test.ts
+++ b/packages/ai/core/generate-text/smooth-stream.test.ts
@@ -23,6 +23,24 @@ describe('smoothStream', () => {
return Promise.resolve();
}
+ describe('throws error if chunking option is invalid', async () => {
+ it('throws error if chunking strategy is invalid', async () => {
+ expect(() => {
+ smoothStream({
+ chunking: 'foo' as any,
+ });
+ }).toThrowError();
+ });
+
+ it('throws error if chunking option is null', async () => {
+ expect(() => {
+ smoothStream({
+ chunking: null as any,
+ });
+ }).toThrowError();
+ });
+ });
+
describe('word chunking', () => {
it('should combine partial words', async () => {
const stream = convertArrayToReadableStream([
@@ -173,6 +191,200 @@ describe('smoothStream', () => {
},
]);
});
+
+ it('should send remaining text buffer before tool call starts', async () => {
+ const stream = convertArrayToReadableStream([
+ { type: 'text-delta', textDelta: 'I will check the' },
+ { type: 'text-delta', textDelta: ' weather in Lon' },
+ { type: 'text-delta', textDelta: 'don.' },
+ { type: 'tool-call', name: 'weather', args: { city: 'London' } },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({
+ delayInMs: 10,
+ _internal: { delay },
+ })({ tools: {} }),
+ );
+
+ await consumeStream(stream);
+
+ expect(events).toMatchInlineSnapshot(`
+ [
+ "delay 10",
+ {
+ "textDelta": "I ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "will ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "check ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "the ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "weather ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "in ",
+ "type": "text-delta",
+ },
+ {
+ "textDelta": "London.",
+ "type": "text-delta",
+ },
+ {
+ "args": {
+ "city": "London",
+ },
+ "name": "weather",
+ "type": "tool-call",
+ },
+ {
+ "type": "step-finish",
+ },
+ {
+ "type": "finish",
+ },
+ ]
+ `);
+ });
+
+ it('should send remaining text buffer before tool call starts and tool call streaming is enabled', async () => {
+ const stream = convertArrayToReadableStream([
+ { type: 'text-delta', textDelta: 'I will check the' },
+ { type: 'text-delta', textDelta: ' weather in Lon' },
+ { type: 'text-delta', textDelta: 'don.' },
+ {
+ type: 'tool-call-streaming-start',
+ name: 'weather',
+ args: { city: 'London' },
+ },
+ { type: 'tool-call-delta', name: 'weather', args: { city: 'London' } },
+ { type: 'tool-call', name: 'weather', args: { city: 'London' } },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({
+ delayInMs: 10,
+ _internal: { delay },
+ })({ tools: {} }),
+ );
+
+ await consumeStream(stream);
+
+ expect(events).toMatchInlineSnapshot(`
+ [
+ "delay 10",
+ {
+ "textDelta": "I ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "will ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "check ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "the ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "weather ",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "in ",
+ "type": "text-delta",
+ },
+ {
+ "textDelta": "London.",
+ "type": "text-delta",
+ },
+ {
+ "args": {
+ "city": "London",
+ },
+ "name": "weather",
+ "type": "tool-call-streaming-start",
+ },
+ {
+ "args": {
+ "city": "London",
+ },
+ "name": "weather",
+ "type": "tool-call-delta",
+ },
+ {
+ "args": {
+ "city": "London",
+ },
+ "name": "weather",
+ "type": "tool-call",
+ },
+ {
+ "type": "step-finish",
+ },
+ {
+ "type": "finish",
+ },
+ ]
+ `);
+ });
+
+ it(`doesn't return chunks with just spaces`, async () => {
+ const stream = convertArrayToReadableStream([
+ { type: 'text-delta', textDelta: ' ' },
+ { type: 'text-delta', textDelta: ' ' },
+ { type: 'text-delta', textDelta: ' ' },
+ { type: 'text-delta', textDelta: 'foo' },
+
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({
+ delayInMs: 10,
+ _internal: { delay },
+ })({ tools: {} }),
+ );
+
+ await consumeStream(stream);
+
+ expect(events).toMatchInlineSnapshot(`
+ [
+ {
+ "textDelta": " foo",
+ "type": "text-delta",
+ },
+ {
+ "type": "step-finish",
+ },
+ {
+ "type": "finish",
+ },
+ ]
+ `);
+ });
});
describe('line chunking', () => {
@@ -263,6 +475,42 @@ describe('smoothStream', () => {
});
describe('custom chunking', () => {
+ it(`should return correct result for regexes that don't match from the exact start onwards`, async () => {
+ const stream = convertArrayToReadableStream([
+ { textDelta: 'Hello_, world!', type: 'text-delta' },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({
+ chunking: /_/,
+ delayInMs: 10,
+ _internal: { delay },
+ })({ tools: {} }),
+ );
+
+ await consumeStream(stream);
+
+ expect(events).toMatchInlineSnapshot(`
+ [
+ "delay 10",
+ {
+ "textDelta": "Hello_",
+ "type": "text-delta",
+ },
+ {
+ "textDelta": ", world!",
+ "type": "text-delta",
+ },
+ {
+ "type": "step-finish",
+ },
+ {
+ "type": "finish",
+ },
+ ]
+ `);
+ });
+
it('should support custom chunking regexps (character-level)', async () => {
const stream = convertArrayToReadableStream([
{ textDelta: 'Hello, world!', type: 'text-delta' },
@@ -311,6 +559,87 @@ describe('smoothStream', () => {
});
});
+ describe('custom callback chunking', () => {
+ it('should support custom chunking callback', async () => {
+ const stream = convertArrayToReadableStream([
+ { textDelta: 'He_llo, ', type: 'text-delta' },
+ { textDelta: 'w_orld!', type: 'text-delta' },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({
+ chunking: buffer => /[^_]*_/.exec(buffer)?.[0],
+ _internal: { delay },
+ })({ tools: {} }),
+ );
+
+ await consumeStream(stream);
+
+ expect(events).toMatchInlineSnapshot(`
+ [
+ "delay 10",
+ {
+ "textDelta": "He_",
+ "type": "text-delta",
+ },
+ "delay 10",
+ {
+ "textDelta": "llo, w_",
+ "type": "text-delta",
+ },
+ {
+ "textDelta": "orld!",
+ "type": "text-delta",
+ },
+ {
+ "type": "step-finish",
+ },
+ {
+ "type": "finish",
+ },
+ ]
+ `);
+ });
+
+ describe('throws errors if the chunking function invalid matches', async () => {
+ it('throws empty match error', async () => {
+ const stream = convertArrayToReadableStream([
+ { textDelta: 'Hello, world!', type: 'text-delta' },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({ chunking: () => '', _internal: { delay } })({
+ tools: {},
+ }),
+ );
+
+ await expect(
+ consumeStream(stream),
+ ).rejects.toThrowErrorMatchingInlineSnapshot(
+ `[Error: Chunking function must return a non-empty string.]`,
+ );
+ });
+
+ it('throws match prefix error', async () => {
+ const stream = convertArrayToReadableStream([
+ { textDelta: 'Hello, world!', type: 'text-delta' },
+ { type: 'step-finish' },
+ { type: 'finish' },
+ ]).pipeThrough(
+ smoothStream({ chunking: () => 'world', _internal: { delay } })({
+ tools: {},
+ }),
+ );
+
+ await expect(
+ consumeStream(stream),
+ ).rejects.toThrowErrorMatchingInlineSnapshot(
+ `[Error: Chunking function must return a match that is a prefix of the buffer. Received: "world" expected to start with "Hello, world!"]`,
+ );
+ });
+ });
+ });
+
describe('delay', () => {
it('should default to 10ms', async () => {
const stream = convertArrayToReadableStream([
diff --git a/packages/ai/core/generate-text/smooth-stream.ts b/packages/ai/core/generate-text/smooth-stream.ts
index 2f0ef085aee4..86be60ca70ca 100644
--- a/packages/ai/core/generate-text/smooth-stream.ts
+++ b/packages/ai/core/generate-text/smooth-stream.ts
@@ -1,13 +1,22 @@
-import { InvalidArgumentError } from '@ai-sdk/provider';
import { delay as originalDelay } from '@ai-sdk/provider-utils';
import { TextStreamPart } from './stream-text-result';
import { ToolSet } from './tool-set';
+import { InvalidArgumentError } from '@ai-sdk/provider';
const CHUNKING_REGEXPS = {
- word: /\s*\S+\s+/m,
- line: /[^\n]*\n/m,
+ word: /\S+\s+/m,
+ line: /\n+/m,
};
+/**
+ * Detects the first chunk in a buffer.
+ *
+ * @param buffer - The buffer to detect the first chunk in.
+ *
+ * @returns The first detected chunk, or `undefined` if no chunk was detected.
+ */
+export type ChunkDetector = (buffer: string) => string | undefined | null;
+
/**
* Smooths text streaming output.
*
@@ -22,7 +31,7 @@ export function smoothStream({
_internal: { delay = originalDelay } = {},
}: {
delayInMs?: number | null;
- chunking?: 'word' | 'line' | RegExp;
+ chunking?: 'word' | 'line' | RegExp | ChunkDetector;
/**
* Internal. For test use only. May change without notice.
*/
@@ -32,21 +41,56 @@ export function smoothStream({
} = {}): (options: {
tools: TOOLS;
}) => TransformStream, TextStreamPart> {
- const chunkingRegexp =
- typeof chunking === 'string' ? CHUNKING_REGEXPS[chunking] : chunking;
+ let detectChunk: ChunkDetector;
- if (chunkingRegexp == null) {
- throw new InvalidArgumentError({
- argument: 'chunking',
- message: `Chunking must be "word" or "line" or a RegExp. Received: ${chunking}`,
- });
+ if (typeof chunking === 'function') {
+ detectChunk = buffer => {
+ const match = chunking(buffer);
+
+ if (match == null) {
+ return null;
+ }
+
+ if (!match.length) {
+ throw new Error(`Chunking function must return a non-empty string.`);
+ }
+
+ if (!buffer.startsWith(match)) {
+ throw new Error(
+ `Chunking function must return a match that is a prefix of the buffer. Received: "${match}" expected to start with "${buffer}"`,
+ );
+ }
+
+ return match;
+ };
+ } else {
+ const chunkingRegex =
+ typeof chunking === 'string' ? CHUNKING_REGEXPS[chunking] : chunking;
+
+ if (chunkingRegex == null) {
+ throw new InvalidArgumentError({
+ argument: 'chunking',
+ message: `Chunking must be "word" or "line" or a RegExp. Received: ${chunking}`,
+ });
+ }
+
+ detectChunk = buffer => {
+ const match = chunkingRegex.exec(buffer);
+
+ if (!match) {
+ return null;
+ }
+
+ return buffer.slice(0, match.index) + match?.[0];
+ };
}
return () => {
let buffer = '';
+
return new TransformStream, TextStreamPart>({
async transform(chunk, controller) {
- if (chunk.type === 'step-finish') {
+ if (chunk.type !== 'text-delta') {
if (buffer.length > 0) {
controller.enqueue({ type: 'text-delta', textDelta: buffer });
buffer = '';
@@ -56,18 +100,13 @@ export function smoothStream({
return;
}
- if (chunk.type !== 'text-delta') {
- controller.enqueue(chunk);
- return;
- }
-
buffer += chunk.textDelta;
let match;
- while ((match = chunkingRegexp.exec(buffer)) != null) {
- const chunk = match[0];
- controller.enqueue({ type: 'text-delta', textDelta: chunk });
- buffer = buffer.slice(chunk.length);
+
+ while ((match = detectChunk(buffer)) != null) {
+ controller.enqueue({ type: 'text-delta', textDelta: match });
+ buffer = buffer.slice(match.length);
await delay(delayInMs);
}
diff --git a/packages/ai/core/generate-text/stream-text-result.ts b/packages/ai/core/generate-text/stream-text-result.ts
index 3dde6d3b765e..181518dac5d2 100644
--- a/packages/ai/core/generate-text/stream-text-result.ts
+++ b/packages/ai/core/generate-text/stream-text-result.ts
@@ -61,6 +61,10 @@ export type DataStreamOptions = {
experimental_sendStart?: boolean;
};
+export type ConsumeStreamOptions = {
+ onError?: (error: unknown) => void;
+};
+
/**
A result object for accessing different stream types and additional information.
*/
@@ -203,8 +207,10 @@ Consumes the stream without processing the parts.
This is useful to force the stream to finish.
It effectively removes the backpressure and allows the stream to finish,
triggering the `onFinish` callback and the promise resolution.
+
+If an error occurs, it is passed to the optional `onError` callback.
*/
- consumeStream(): Promise;
+ consumeStream(options?: ConsumeStreamOptions): Promise;
/**
Converts the result to a data stream.
diff --git a/packages/ai/core/generate-text/stream-text.test.ts b/packages/ai/core/generate-text/stream-text.test.ts
index bc31157575d3..395be5d96fd1 100644
--- a/packages/ai/core/generate-text/stream-text.test.ts
+++ b/packages/ai/core/generate-text/stream-text.test.ts
@@ -1542,6 +1542,92 @@ describe('streamText', () => {
});
});
+ describe('result.consumeStream', () => {
+ it('should ignore AbortError during stream consumption', async () => {
+ const result = streamText({
+ model: createTestModel({
+ stream: new ReadableStream({
+ start(controller) {
+ controller.enqueue({ type: 'text-delta', textDelta: 'Hello' });
+ queueMicrotask(() => {
+ controller.error(
+ Object.assign(new Error('Stream aborted'), {
+ name: 'AbortError',
+ }),
+ );
+ });
+ },
+ }),
+ }),
+ prompt: 'test-input',
+ });
+
+ await expect(result.consumeStream()).resolves.not.toThrow();
+ });
+
+ it('should ignore ResponseAborted error during stream consumption', async () => {
+ const result = streamText({
+ model: createTestModel({
+ stream: new ReadableStream({
+ start(controller) {
+ controller.enqueue({ type: 'text-delta', textDelta: 'Hello' });
+ queueMicrotask(() => {
+ controller.error(
+ Object.assign(new Error('Response aborted'), {
+ name: 'ResponseAborted',
+ }),
+ );
+ });
+ },
+ }),
+ }),
+ prompt: 'test-input',
+ });
+
+ await expect(result.consumeStream()).resolves.not.toThrow();
+ });
+
+ it('should ignore any errors during stream consumption', async () => {
+ const result = streamText({
+ model: createTestModel({
+ stream: new ReadableStream({
+ start(controller) {
+ controller.enqueue({ type: 'text-delta', textDelta: 'Hello' });
+ queueMicrotask(() => {
+ controller.error(Object.assign(new Error('Some error')));
+ });
+ },
+ }),
+ }),
+ prompt: 'test-input',
+ });
+
+ await expect(result.consumeStream()).resolves.not.toThrow();
+ });
+
+ it('should call the onError callback with the error', async () => {
+ const onErrorCallback = vi.fn();
+ const result = streamText({
+ model: createTestModel({
+ stream: new ReadableStream({
+ start(controller) {
+ controller.enqueue({ type: 'text-delta', textDelta: 'Hello' });
+ queueMicrotask(() => {
+ controller.error(Object.assign(new Error('Some error')));
+ });
+ },
+ }),
+ }),
+ prompt: 'test-input',
+ });
+
+ await expect(
+ result.consumeStream({ onError: onErrorCallback }),
+ ).resolves.not.toThrow();
+ expect(onErrorCallback).toHaveBeenCalledWith(new Error('Some error'));
+ });
+ });
+
describe('multiple stream consumption', () => {
it('should support text stream, ai stream, full stream on single result object', async () => {
const result = streamText({
diff --git a/packages/ai/core/generate-text/stream-text.ts b/packages/ai/core/generate-text/stream-text.ts
index b7d5c3a54efb..202524907906 100644
--- a/packages/ai/core/generate-text/stream-text.ts
+++ b/packages/ai/core/generate-text/stream-text.ts
@@ -8,6 +8,7 @@ import { InvalidStreamPartError } from '../../errors/invalid-stream-part-error';
import { NoOutputSpecifiedError } from '../../errors/no-output-specified-error';
import { StreamData } from '../../streams/stream-data';
import { asArray } from '../../util/as-array';
+import { consumeStream } from '../../util/consume-stream';
import { DelayedPromise } from '../../util/delayed-promise';
import { DataStreamWriter } from '../data-stream/data-stream-writer';
import { CallSettings } from '../prompt/call-settings';
@@ -53,6 +54,7 @@ import {
} from './run-tools-transformation';
import { ResponseMessage, StepResult } from './step-result';
import {
+ ConsumeStreamOptions,
DataStreamOptions,
StreamTextResult,
TextStreamPart,
@@ -1591,10 +1593,14 @@ However, the LLM results are expected to be small enough to not cause issues.
);
}
- async consumeStream(): Promise {
- const stream = this.fullStream;
- for await (const part of stream) {
- // no op
+ async consumeStream(options?: ConsumeStreamOptions): Promise {
+ try {
+ await consumeStream({
+ stream: this.fullStream,
+ onError: options?.onError,
+ });
+ } catch (error) {
+ options?.onError?.(error);
}
}
diff --git a/packages/ai/core/generate-text/to-response-messages.test.ts b/packages/ai/core/generate-text/to-response-messages.test.ts
index 0d74122b21c5..1e5cf4b38039 100644
--- a/packages/ai/core/generate-text/to-response-messages.test.ts
+++ b/packages/ai/core/generate-text/to-response-messages.test.ts
@@ -1,8 +1,8 @@
import { z } from 'zod';
import { mockValues } from '../test/mock-values';
import { tool } from '../tool';
-import { toResponseMessages } from './to-response-messages';
import { DefaultGeneratedFile } from './generated-file';
+import { toResponseMessages } from './to-response-messages';
describe('toResponseMessages', () => {
it('should return an assistant message with text when no tool calls or results', () => {
@@ -138,7 +138,13 @@ describe('toResponseMessages', () => {
const result = toResponseMessages({
text: undefined,
files: [],
- reasoning: [],
+ reasoning: [
+ {
+ type: 'text',
+ text: 'Thinking text',
+ signature: 'sig',
+ },
+ ],
tools: {},
toolCalls: [],
toolResults: [],
@@ -149,7 +155,9 @@ describe('toResponseMessages', () => {
expect(result).toEqual([
{
role: 'assistant',
- content: [{ type: 'text', text: '' }],
+ content: [
+ { type: 'reasoning', text: 'Thinking text', signature: 'sig' },
+ ],
id: 'msg-123',
},
]);
@@ -392,4 +400,59 @@ describe('toResponseMessages', () => {
},
]);
});
+
+ it('should not append text parts if text is empty string', () => {
+ const result = toResponseMessages({
+ text: '',
+ files: [],
+ reasoning: [],
+ tools: {
+ testTool: {
+ description: 'A test tool',
+ parameters: z.object({}),
+ },
+ },
+ toolCalls: [
+ {
+ type: 'tool-call',
+ toolCallId: '123',
+ toolName: 'testTool',
+ args: {},
+ },
+ ],
+ toolResults: [],
+ messageId: 'msg-123',
+ generateMessageId: mockValues('msg-345'),
+ });
+
+ expect(result).toEqual([
+ {
+ role: 'assistant',
+ id: 'msg-123',
+ content: [
+ {
+ type: 'tool-call',
+ toolCallId: '123',
+ toolName: 'testTool',
+ args: {},
+ },
+ ],
+ },
+ ]);
+ });
+
+ it('should not append assistant message if there is no content', () => {
+ const result = toResponseMessages({
+ text: '',
+ files: [],
+ reasoning: [],
+ tools: {},
+ toolCalls: [],
+ toolResults: [],
+ messageId: 'msg-123',
+ generateMessageId: mockValues('msg-345'),
+ });
+
+ expect(result).toEqual([]);
+ });
});
diff --git a/packages/ai/core/generate-text/to-response-messages.ts b/packages/ai/core/generate-text/to-response-messages.ts
index 1cb3fe8c834d..b87dee220496 100644
--- a/packages/ai/core/generate-text/to-response-messages.ts
+++ b/packages/ai/core/generate-text/to-response-messages.ts
@@ -30,25 +30,45 @@ export function toResponseMessages({
}): Array {
const responseMessages: Array = [];
- responseMessages.push({
- role: 'assistant',
- content: [
+ const content = [];
+
+ // TODO language model v2: switch to order response content (instead of type-based ordering)
+
+ if (reasoning.length > 0) {
+ content.push(
...reasoning.map(part =>
part.type === 'text'
? { ...part, type: 'reasoning' as const }
: { ...part, type: 'redacted-reasoning' as const },
),
- // TODO language model v2: switch to order response content (instead of type-based ordering)
+ );
+ }
+
+ if (files.length > 0) {
+ content.push(
...files.map(file => ({
type: 'file' as const,
data: file.base64,
mimeType: file.mimeType,
})),
- { type: 'text' as const, text },
- ...toolCalls,
- ],
- id: messageId,
- });
+ );
+ }
+
+ if (text.length > 0) {
+ content.push({ type: 'text' as const, text });
+ }
+
+ if (toolCalls.length > 0) {
+ content.push(...toolCalls);
+ }
+
+ if (content.length > 0) {
+ responseMessages.push({
+ role: 'assistant',
+ content,
+ id: messageId,
+ });
+ }
if (toolResults.length > 0) {
responseMessages.push({
diff --git a/packages/ai/core/index.ts b/packages/ai/core/index.ts
index d23acab74658..7f5d49cc07b7 100644
--- a/packages/ai/core/index.ts
+++ b/packages/ai/core/index.ts
@@ -37,6 +37,8 @@ export * from './embed';
export * from './generate-image';
export * from './generate-object';
export * from './generate-text';
+export * from './generate-speech';
+export * from './transcribe';
export * from './middleware';
export * from './prompt';
export * from './registry';
diff --git a/packages/ai/core/prompt/__snapshots__/standardize-prompt.test.ts.snap b/packages/ai/core/prompt/__snapshots__/standardize-prompt.test.ts.snap
new file mode 100644
index 000000000000..19e6be54a72f
--- /dev/null
+++ b/packages/ai/core/prompt/__snapshots__/standardize-prompt.test.ts.snap
@@ -0,0 +1,233 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`should detect mixed core messages and simple messages as valid messages 1`] = `
+[AI_InvalidPromptError: Invalid prompt: message must be a CoreMessage or a UI message
+Validation error: Type validation failed: Value: [{"role":"system","content":"System prompt","providerOptions":{"provider":"test"}},{"role":"user","content":"Hello"}].
+Error message: [
+ {
+ "code": "invalid_union",
+ "unionErrors": [
+ {
+ "issues": [
+ {
+ "code": "invalid_type",
+ "expected": "object",
+ "received": "string",
+ "path": [
+ 0,
+ "providerOptions",
+ "provider"
+ ],
+ "message": "Expected object, received string"
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "user",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"user\\""
+ },
+ {
+ "code": "invalid_type",
+ "expected": "object",
+ "received": "string",
+ "path": [
+ 0,
+ "providerOptions",
+ "provider"
+ ],
+ "message": "Expected object, received string"
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "assistant",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"assistant\\""
+ },
+ {
+ "code": "invalid_type",
+ "expected": "object",
+ "received": "string",
+ "path": [
+ 0,
+ "providerOptions",
+ "provider"
+ ],
+ "message": "Expected object, received string"
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "tool",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"tool\\""
+ },
+ {
+ "code": "invalid_type",
+ "expected": "array",
+ "received": "string",
+ "path": [
+ 0,
+ "content"
+ ],
+ "message": "Expected array, received string"
+ },
+ {
+ "code": "invalid_type",
+ "expected": "object",
+ "received": "string",
+ "path": [
+ 0,
+ "providerOptions",
+ "provider"
+ ],
+ "message": "Expected object, received string"
+ }
+ ],
+ "name": "ZodError"
+ }
+ ],
+ "path": [
+ 0
+ ],
+ "message": "Invalid input"
+ }
+]]
+`;
+
+exports[`should throw InvalidPromptError when system message has parts 1`] = `
+[AI_InvalidPromptError: Invalid prompt: message must be a CoreMessage or a UI message
+Validation error: Type validation failed: Value: [{"role":"system","content":[{"type":"text","text":"test"}]}].
+Error message: [
+ {
+ "code": "invalid_union",
+ "unionErrors": [
+ {
+ "issues": [
+ {
+ "code": "invalid_type",
+ "expected": "string",
+ "received": "array",
+ "path": [
+ 0,
+ "content"
+ ],
+ "message": "Expected string, received array"
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "user",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"user\\""
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "assistant",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"assistant\\""
+ }
+ ],
+ "name": "ZodError"
+ },
+ {
+ "issues": [
+ {
+ "received": "system",
+ "code": "invalid_literal",
+ "expected": "tool",
+ "path": [
+ 0,
+ "role"
+ ],
+ "message": "Invalid literal value, expected \\"tool\\""
+ },
+ {
+ "received": "text",
+ "code": "invalid_literal",
+ "expected": "tool-result",
+ "path": [
+ 0,
+ "content",
+ 0,
+ "type"
+ ],
+ "message": "Invalid literal value, expected \\"tool-result\\""
+ },
+ {
+ "code": "invalid_type",
+ "expected": "string",
+ "received": "undefined",
+ "path": [
+ 0,
+ "content",
+ 0,
+ "toolCallId"
+ ],
+ "message": "Required"
+ },
+ {
+ "code": "invalid_type",
+ "expected": "string",
+ "received": "undefined",
+ "path": [
+ 0,
+ "content",
+ 0,
+ "toolName"
+ ],
+ "message": "Required"
+ }
+ ],
+ "name": "ZodError"
+ }
+ ],
+ "path": [
+ 0
+ ],
+ "message": "Invalid input"
+ }
+]]
+`;
diff --git a/packages/ai/core/prompt/convert-to-language-model-prompt.ts b/packages/ai/core/prompt/convert-to-language-model-prompt.ts
index 61c9240362aa..e6dcf8000fef 100644
--- a/packages/ai/core/prompt/convert-to-language-model-prompt.ts
+++ b/packages/ai/core/prompt/convert-to-language-model-prompt.ts
@@ -7,7 +7,10 @@ import {
} from '@ai-sdk/provider';
import { download } from '../../util/download';
import { CoreMessage } from '../prompt/message';
-import { detectImageMimeType } from '../util/detect-image-mimetype';
+import {
+ detectMimeType,
+ imageMimeTypeSignatures,
+} from '../util/detect-mimetype';
import { FilePart, ImagePart, TextPart } from './content-part';
import {
convertDataContentToBase64String,
@@ -341,7 +344,11 @@ function convertPartToLanguageModelPart(
// When detection fails, use provided mimetype.
if (normalizedData instanceof Uint8Array) {
- mimeType = detectImageMimeType(normalizedData) ?? mimeType;
+ mimeType =
+ detectMimeType({
+ data: normalizedData,
+ signatures: imageMimeTypeSignatures,
+ }) ?? mimeType;
}
return {
type: 'image',
diff --git a/packages/ai/core/prompt/detect-prompt-type.test.ts b/packages/ai/core/prompt/detect-prompt-type.test.ts
deleted file mode 100644
index d0aca919722c..000000000000
--- a/packages/ai/core/prompt/detect-prompt-type.test.ts
+++ /dev/null
@@ -1,118 +0,0 @@
-import { Message } from '@ai-sdk/ui-utils';
-import { detectPromptType } from './detect-prompt-type';
-import type { CoreMessage } from './message';
-
-it('should return "other" for invalid inputs', () => {
- expect(detectPromptType(null as any)).toBe('other');
- expect(detectPromptType(undefined as any)).toBe('other');
- expect(detectPromptType('not an array' as any)).toBe('other');
-});
-
-it('should return "messages" for empty arrays', () => {
- expect(detectPromptType([])).toBe('messages');
-});
-
-it('should detect UI messages with data role', () => {
- const messages: Omit[] = [
- {
- role: 'data',
- content: 'some data',
- },
- ];
- expect(detectPromptType(messages)).toBe('ui-messages');
-});
-
-it('should detect UI messages with toolInvocations', () => {
- const messages: Omit[] = [
- {
- role: 'assistant',
- content: 'Hello',
- toolInvocations: [
- {
- state: 'result',
- toolCallId: '1',
- toolName: 'test',
- args: '{}',
- result: 'result',
- },
- ],
- },
- ];
- expect(detectPromptType(messages)).toBe('ui-messages');
-});
-
-it('should detect UI messages with experimental_attachments', () => {
- const messages: Omit[] = [
- {
- role: 'user',
- content: 'Check this file',
- experimental_attachments: [{ contentType: 'image/png', url: 'test.png' }],
- },
- ];
- expect(detectPromptType(messages)).toBe('ui-messages');
-});
-
-it('should detect core messages with array content', () => {
- const messages: CoreMessage[] = [
- {
- role: 'user',
- content: [{ type: 'text', text: 'Hello' }],
- },
- ];
- expect(detectPromptType(messages)).toBe('messages');
-});
-
-it('should detect core messages with providerOptions', () => {
- const messages: CoreMessage[] = [
- {
- role: 'system',
- content: 'System prompt',
- providerOptions: { provider: { test: 'value' } },
- },
- ];
- expect(detectPromptType(messages)).toBe('messages');
-});
-
-it('should detect simple valid messages', () => {
- const messages = [
- {
- role: 'system',
- content: 'You are a helpful assistant',
- },
- {
- role: 'user',
- content: 'Hello',
- },
- {
- role: 'assistant',
- content: 'Hi there!',
- },
- ];
- expect(detectPromptType(messages)).toBe('messages');
-});
-
-it('should detect mixed core messages and simple messages as valid messages', () => {
- const messages = [
- {
- role: 'system',
- content: 'System prompt',
- providerOptions: { provider: 'test' },
- },
- {
- role: 'user',
- content: 'Hello',
- },
- ];
- expect(detectPromptType(messages)).toBe('messages');
-});
-
-it('should detect UI messages with parts array', () => {
- const messages: Omit[] = [
- {
- role: 'assistant',
- content: 'Hello',
- parts: [{ type: 'text', text: 'Hello' }],
- },
- ];
- expect(detectPromptType(messages)).toBe('ui-messages');
-});
diff --git a/packages/ai/core/prompt/detect-prompt-type.ts b/packages/ai/core/prompt/detect-prompt-type.ts
deleted file mode 100644
index 8186f1c731ad..000000000000
--- a/packages/ai/core/prompt/detect-prompt-type.ts
+++ /dev/null
@@ -1,61 +0,0 @@
-export function detectPromptType(
- prompt: Array,
-): 'ui-messages' | 'messages' | 'other' {
- if (!Array.isArray(prompt)) {
- return 'other';
- }
-
- if (prompt.length === 0) {
- return 'messages';
- }
-
- const characteristics = prompt.map(detectSingleMessageCharacteristics);
-
- if (characteristics.some(c => c === 'has-ui-specific-parts')) {
- return 'ui-messages';
- } else if (
- characteristics.every(
- c => c === 'has-core-specific-parts' || c === 'message',
- )
- ) {
- return 'messages';
- } else {
- return 'other';
- }
-}
-
-function detectSingleMessageCharacteristics(
- message: any,
-): 'has-ui-specific-parts' | 'has-core-specific-parts' | 'message' | 'other' {
- if (
- typeof message === 'object' &&
- message !== null &&
- (message.role === 'function' || // UI-only role
- message.role === 'data' || // UI-only role
- 'toolInvocations' in message || // UI-specific field
- 'parts' in message || // UI-specific field
- 'experimental_attachments' in message)
- ) {
- return 'has-ui-specific-parts';
- } else if (
- typeof message === 'object' &&
- message !== null &&
- 'content' in message &&
- (Array.isArray(message.content) || // Core messages can have array content
- 'experimental_providerMetadata' in message ||
- 'providerOptions' in message)
- ) {
- return 'has-core-specific-parts';
- } else if (
- typeof message === 'object' &&
- message !== null &&
- 'role' in message &&
- 'content' in message &&
- typeof message.content === 'string' &&
- ['system', 'user', 'assistant', 'tool'].includes(message.role)
- ) {
- return 'message';
- } else {
- return 'other';
- }
-}
diff --git a/packages/ai/core/prompt/standardize-prompt.test.ts b/packages/ai/core/prompt/standardize-prompt.test.ts
index 62122caf6d09..dca45c816aaa 100644
--- a/packages/ai/core/prompt/standardize-prompt.test.ts
+++ b/packages/ai/core/prompt/standardize-prompt.test.ts
@@ -1,31 +1,384 @@
-import { InvalidPromptError } from '@ai-sdk/provider';
+import { Message } from '@ai-sdk/ui-utils';
import { standardizePrompt } from './standardize-prompt';
+import { CoreMessage } from './message';
-describe('message prompt', () => {
- it('should throw InvalidPromptError when system message has parts', () => {
- expect(() => {
- standardizePrompt({
- prompt: {
- messages: [
+it('should throw InvalidPromptError when system message has parts', () => {
+ expect(() => {
+ standardizePrompt({
+ prompt: {
+ messages: [
+ {
+ role: 'system',
+ content: [{ type: 'text', text: 'test' }] as any,
+ },
+ ],
+ },
+ tools: undefined,
+ });
+ }).toThrowErrorMatchingSnapshot();
+});
+
+it('should throw InvalidPromptError when messages array is empty', () => {
+ expect(() => {
+ standardizePrompt({
+ prompt: {
+ messages: [],
+ },
+ tools: undefined,
+ });
+ }).toThrowErrorMatchingInlineSnapshot(
+ `[AI_InvalidPromptError: Invalid prompt: messages must not be empty]`,
+ );
+});
+
+it('should throw error for invalid inputs', () => {
+ expect(() =>
+ standardizePrompt({
+ prompt: {
+ messages: null as any,
+ },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingInlineSnapshot(
+ `[AI_InvalidPromptError: Invalid prompt: prompt or messages must be defined]`,
+ );
+
+ expect(() =>
+ standardizePrompt({
+ prompt: {
+ messages: undefined as any,
+ },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingInlineSnapshot(
+ `[AI_InvalidPromptError: Invalid prompt: prompt or messages must be defined]`,
+ );
+
+ expect(() =>
+ standardizePrompt({
+ prompt: {
+ messages: 'not an array' as any,
+ },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingInlineSnapshot(`
+ [AI_InvalidPromptError: Invalid prompt: messages must be an array of CoreMessage or UIMessage
+ Received non-array value: "not an array"]
+ `);
+
+ expect(() =>
+ standardizePrompt({
+ prompt: {
+ messages: [
+ {
+ role: 'system',
+ },
+ ] as any,
+ },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingInlineSnapshot(`
+ [AI_InvalidPromptError: Invalid prompt: messages must be an array of CoreMessage or UIMessage
+ Received message of type: "other" at index 0
+ messages[0]: {"role":"system"}]
+ `);
+});
+
+it('should return empty array for empty arrays', () => {
+ expect(() =>
+ standardizePrompt({
+ prompt: {
+ messages: [],
+ },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingInlineSnapshot(
+ `[AI_InvalidPromptError: Invalid prompt: messages must not be empty]`,
+ );
+});
+
+it('should filter UI messages with data role', () => {
+ const messages: Omit[] = [
+ {
+ role: 'data',
+ content: 'some data',
+ },
+ {
+ role: 'user',
+ content: 'some user content',
+ },
+ ];
+
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": "some user content",
+ "role": "user",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect UI messages with toolInvocations', () => {
+ const messages: Omit[] = [
+ {
+ role: 'assistant',
+ content: 'Hello',
+ toolInvocations: [
+ {
+ state: 'result',
+ toolCallId: '1',
+ toolName: 'test',
+ args: '{}',
+ result: 'result',
+ },
+ ],
+ },
+ ];
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": [
+ {
+ "text": "Hello",
+ "type": "text",
+ },
+ {
+ "args": "{}",
+ "toolCallId": "1",
+ "toolName": "test",
+ "type": "tool-call",
+ },
+ ],
+ "role": "assistant",
+ },
+ {
+ "content": [
+ {
+ "result": "result",
+ "toolCallId": "1",
+ "toolName": "test",
+ "type": "tool-result",
+ },
+ ],
+ "role": "tool",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect UI messages with experimental_attachments', () => {
+ const messages: Omit[] = [
+ {
+ role: 'user',
+ content: 'Check this file',
+ experimental_attachments: [
+ { contentType: 'image/png', url: 'https://test.com' },
+ ],
+ },
+ ];
+
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": [
+ {
+ "text": "Check this file",
+ "type": "text",
+ },
+ {
+ "image": "https://test.com/",
+ "type": "image",
+ },
+ ],
+ "role": "user",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect core messages with array content', () => {
+ const messages: CoreMessage[] = [
+ {
+ role: 'user',
+ content: [{ type: 'text', text: 'Hello' }],
+ },
+ ];
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": [
+ {
+ "text": "Hello",
+ "type": "text",
+ },
+ ],
+ "role": "user",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect core messages with providerOptions', () => {
+ const messages: CoreMessage[] = [
+ {
+ role: 'system',
+ content: 'System prompt',
+ providerOptions: { provider: { test: 'value' } },
+ },
+ ];
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": "System prompt",
+ "providerOptions": {
+ "provider": {
+ "test": "value",
+ },
+ },
+ "role": "system",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect simple valid messages', () => {
+ const messages: CoreMessage[] = [
+ {
+ role: 'system',
+ content: 'You are a helpful assistant',
+ },
+ {
+ role: 'user',
+ content: 'Hello',
+ },
+ {
+ role: 'assistant',
+ content: 'Hi there!',
+ },
+ ];
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": "You are a helpful assistant",
+ "role": "system",
+ },
+ {
+ "content": "Hello",
+ "role": "user",
+ },
+ {
+ "content": "Hi there!",
+ "role": "assistant",
+ },
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
+});
+
+it('should detect mixed core messages and simple messages as valid messages', () => {
+ const messages: any[] = [
+ {
+ role: 'system',
+ content: 'System prompt',
+ providerOptions: { provider: 'test' },
+ },
+ {
+ role: 'user',
+ content: 'Hello',
+ },
+ ];
+
+ expect(() =>
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toThrowErrorMatchingSnapshot();
+});
+
+it('should detect UI messages with parts array', () => {
+ const messages: Omit[] = [
+ {
+ role: 'assistant',
+ content: 'Hello',
+ parts: [{ type: 'text', text: 'Hello' }],
+ },
+ ];
+
+ expect(
+ standardizePrompt({
+ prompt: { messages },
+ tools: undefined,
+ }),
+ ).toMatchInlineSnapshot(`
+ {
+ "messages": [
+ {
+ "content": [
{
- role: 'system',
- content: [{ type: 'text', text: 'test' }] as any,
+ "text": "Hello",
+ "type": "text",
},
],
+ "role": "assistant",
},
- tools: undefined,
- });
- }).toThrow(InvalidPromptError);
- });
-
- it('should throw InvalidPromptError when messages array is empty', () => {
- expect(() => {
- standardizePrompt({
- prompt: {
- messages: [],
- },
- tools: undefined,
- });
- }).toThrow(InvalidPromptError);
- });
+ ],
+ "system": undefined,
+ "type": "messages",
+ }
+ `);
});
diff --git a/packages/ai/core/prompt/standardize-prompt.ts b/packages/ai/core/prompt/standardize-prompt.ts
index 8e1c68aefe24..982f061d8a71 100644
--- a/packages/ai/core/prompt/standardize-prompt.ts
+++ b/packages/ai/core/prompt/standardize-prompt.ts
@@ -4,7 +4,6 @@ import { Message } from '@ai-sdk/ui-utils';
import { z } from 'zod';
import { ToolSet } from '../generate-text/tool-set';
import { convertToCoreMessages } from './convert-to-core-messages';
-import { detectPromptType } from './detect-prompt-type';
import { CoreMessage, coreMessageSchema } from './message';
import { Prompt } from './prompt';
@@ -81,13 +80,6 @@ export function standardizePrompt({
if (prompt.messages != null) {
const promptType = detectPromptType(prompt.messages);
- if (promptType === 'other') {
- throw new InvalidPromptError({
- prompt,
- message: 'messages must be an array of CoreMessage or UIMessage',
- });
- }
-
const messages: CoreMessage[] =
promptType === 'ui-messages'
? convertToCoreMessages(prompt.messages as Omit[], {
@@ -110,7 +102,10 @@ export function standardizePrompt({
if (!validationResult.success) {
throw new InvalidPromptError({
prompt,
- message: 'messages must be an array of CoreMessage or UIMessage',
+ message: [
+ 'message must be a CoreMessage or a UI message',
+ `Validation error: ${validationResult.error.message}`,
+ ].join('\n'),
cause: validationResult.error,
});
}
@@ -124,3 +119,82 @@ export function standardizePrompt({
throw new Error('unreachable');
}
+
+function detectPromptType(
+ prompt: Array,
+): 'ui-messages' | 'messages' | 'other' {
+ if (!Array.isArray(prompt)) {
+ throw new InvalidPromptError({
+ prompt,
+ message: [
+ 'messages must be an array of CoreMessage or UIMessage',
+ `Received non-array value: ${JSON.stringify(prompt)}`,
+ ].join('\n'),
+ cause: prompt,
+ });
+ }
+
+ if (prompt.length === 0) {
+ return 'messages';
+ }
+
+ const characteristics = prompt.map(detectSingleMessageCharacteristics);
+
+ if (characteristics.some(c => c === 'has-ui-specific-parts')) {
+ return 'ui-messages';
+ }
+
+ const nonMessageIndex = characteristics.findIndex(
+ c => c !== 'has-core-specific-parts' && c !== 'message',
+ );
+
+ if (nonMessageIndex === -1) {
+ return 'messages';
+ }
+
+ throw new InvalidPromptError({
+ prompt,
+ message: [
+ 'messages must be an array of CoreMessage or UIMessage',
+ `Received message of type: "${characteristics[nonMessageIndex]}" at index ${nonMessageIndex}`,
+ `messages[${nonMessageIndex}]: ${JSON.stringify(prompt[nonMessageIndex])}`,
+ ].join('\n'),
+ cause: prompt,
+ });
+}
+
+function detectSingleMessageCharacteristics(
+ message: any,
+): 'has-ui-specific-parts' | 'has-core-specific-parts' | 'message' | 'other' {
+ if (
+ typeof message === 'object' &&
+ message !== null &&
+ (message.role === 'function' || // UI-only role
+ message.role === 'data' || // UI-only role
+ 'toolInvocations' in message || // UI-specific field
+ 'parts' in message || // UI-specific field
+ 'experimental_attachments' in message)
+ ) {
+ return 'has-ui-specific-parts';
+ } else if (
+ typeof message === 'object' &&
+ message !== null &&
+ 'content' in message &&
+ (Array.isArray(message.content) || // Core messages can have array content
+ 'experimental_providerMetadata' in message ||
+ 'providerOptions' in message)
+ ) {
+ return 'has-core-specific-parts';
+ } else if (
+ typeof message === 'object' &&
+ message !== null &&
+ 'role' in message &&
+ 'content' in message &&
+ typeof message.content === 'string' &&
+ ['system', 'user', 'assistant', 'tool'].includes(message.role)
+ ) {
+ return 'message';
+ } else {
+ return 'other';
+ }
+}
diff --git a/packages/ai/core/registry/provider-registry.test.ts b/packages/ai/core/registry/provider-registry.test.ts
index 58ad82693c11..f1f27fdf19f4 100644
--- a/packages/ai/core/registry/provider-registry.test.ts
+++ b/packages/ai/core/registry/provider-registry.test.ts
@@ -97,6 +97,27 @@ describe('languageModel', () => {
expect(modelRegistry.languageModel('provider|model')).toEqual(model);
});
+
+ it('should support custom separator with multiple characters', () => {
+ const model = new MockLanguageModelV1();
+
+ const modelRegistry = createProviderRegistry(
+ {
+ provider: {
+ languageModel: id => {
+ expect(id).toEqual('model');
+ return model;
+ },
+ textEmbeddingModel: () => {
+ return null as any;
+ },
+ },
+ },
+ { separator: ' > ' },
+ );
+
+ expect(modelRegistry.languageModel('provider > model')).toEqual(model);
+ });
});
describe('textEmbeddingModel', () => {
diff --git a/packages/ai/core/registry/provider-registry.ts b/packages/ai/core/registry/provider-registry.ts
index 9a6bc74fe9bf..e9585d42c1fc 100644
--- a/packages/ai/core/registry/provider-registry.ts
+++ b/packages/ai/core/registry/provider-registry.ts
@@ -126,7 +126,7 @@ class DefaultProviderRegistry<
});
}
- return [id.slice(0, index), id.slice(index + 1)];
+ return [id.slice(0, index), id.slice(index + this.separator.length)];
}
languageModel(
diff --git a/packages/ai/core/test/mock-speech-model-v1.ts b/packages/ai/core/test/mock-speech-model-v1.ts
new file mode 100644
index 000000000000..bf34eff6006a
--- /dev/null
+++ b/packages/ai/core/test/mock-speech-model-v1.ts
@@ -0,0 +1,24 @@
+import { SpeechModelV1 } from '@ai-sdk/provider';
+import { notImplemented } from './not-implemented';
+
+export class MockSpeechModelV1 implements SpeechModelV1 {
+ readonly specificationVersion = 'v1';
+ readonly provider: SpeechModelV1['provider'];
+ readonly modelId: SpeechModelV1['modelId'];
+
+ doGenerate: SpeechModelV1['doGenerate'];
+
+ constructor({
+ provider = 'mock-provider',
+ modelId = 'mock-model-id',
+ doGenerate = notImplemented,
+ }: {
+ provider?: SpeechModelV1['provider'];
+ modelId?: SpeechModelV1['modelId'];
+ doGenerate?: SpeechModelV1['doGenerate'];
+ } = {}) {
+ this.provider = provider;
+ this.modelId = modelId;
+ this.doGenerate = doGenerate;
+ }
+}
diff --git a/packages/ai/core/test/mock-transcription-model-v1.ts b/packages/ai/core/test/mock-transcription-model-v1.ts
new file mode 100644
index 000000000000..87d6007793bc
--- /dev/null
+++ b/packages/ai/core/test/mock-transcription-model-v1.ts
@@ -0,0 +1,24 @@
+import { TranscriptionModelV1 } from '@ai-sdk/provider';
+import { notImplemented } from './not-implemented';
+
+export class MockTranscriptionModelV1 implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+ readonly provider: TranscriptionModelV1['provider'];
+ readonly modelId: TranscriptionModelV1['modelId'];
+
+ doGenerate: TranscriptionModelV1['doGenerate'];
+
+ constructor({
+ provider = 'mock-provider',
+ modelId = 'mock-model-id',
+ doGenerate = notImplemented,
+ }: {
+ provider?: TranscriptionModelV1['provider'];
+ modelId?: TranscriptionModelV1['modelId'];
+ doGenerate?: TranscriptionModelV1['doGenerate'];
+ } = {}) {
+ this.provider = provider;
+ this.modelId = modelId;
+ this.doGenerate = doGenerate;
+ }
+}
diff --git a/packages/ai/core/tool/mcp/mcp-client.test.ts b/packages/ai/core/tool/mcp/mcp-client.test.ts
index 66fb8a4e5436..5505cec6eedd 100644
--- a/packages/ai/core/tool/mcp/mcp-client.test.ts
+++ b/packages/ai/core/tool/mcp/mcp-client.test.ts
@@ -276,4 +276,25 @@ describe('MCPClient', () => {
createMCPClient({ transport: invalidTransport }),
).rejects.toThrow();
});
+
+ it('should support zero-argument tools', async () => {
+ client = await createMCPClient({
+ transport: { type: 'sse', url: 'https://example.com/sse' },
+ });
+ const tools = await client.tools();
+ const tool = tools['mock-tool-no-args'];
+ expect(tool).toHaveProperty('parameters');
+ expect(tool.parameters).toMatchObject({
+ jsonSchema: {
+ type: 'object',
+ properties: {},
+ additionalProperties: false,
+ },
+ });
+
+ const result = await tool.execute({}, { messages: [], toolCallId: '1' });
+ expect(result).toEqual({
+ content: [{ type: 'text', text: 'Mock tool call result' }],
+ });
+ });
});
diff --git a/packages/ai/core/tool/mcp/mcp-client.ts b/packages/ai/core/tool/mcp/mcp-client.ts
index 1b860b7b55b3..cf2822ad4074 100644
--- a/packages/ai/core/tool/mcp/mcp-client.ts
+++ b/packages/ai/core/tool/mcp/mcp-client.ts
@@ -61,9 +61,13 @@ export async function createMCPClient(
* Tool parameters are automatically inferred from the server's JSON schema
* if not explicitly provided in the tools configuration
*
+ * This client is meant to be used to communicate with a single server. To communicate and fetch tools across multiple servers, it's recommended to create a new client instance per server.
+ *
* Not supported:
* - Client options (e.g. sampling, roots) as they are not needed for tool conversion
* - Accepting notifications
+ * - Session management (when passing a sessionId to an instance of the Streamable HTTP transport)
+ * - Resumable SSE streams
*/
class MCPClient {
private transport: MCPTransport;
@@ -163,6 +167,25 @@ class MCPClient {
this.onClose();
}
+ private assertCapability(method: string): void {
+ switch (method) {
+ case 'initialize':
+ break;
+ case 'tools/list':
+ case 'tools/call':
+ if (!this.serverCapabilities.tools) {
+ throw new MCPClientError({
+ message: `Server does not support tools`,
+ });
+ }
+ break;
+ default:
+ throw new MCPClientError({
+ message: `Unsupported method: ${method}`,
+ });
+ }
+ }
+
private async request>({
request,
resultSchema,
@@ -181,6 +204,8 @@ class MCPClient {
);
}
+ this.assertCapability(request.method);
+
const signal = options?.signal;
signal?.throwIfAborted();
@@ -214,7 +239,7 @@ class MCPClient {
resolve(result);
} catch (error) {
const parseError = new MCPClientError({
- message: 'Failed to parse server initialization result',
+ message: 'Failed to parse server response',
cause: error,
});
reject(parseError);
@@ -235,12 +260,6 @@ class MCPClient {
params?: PaginatedRequest['params'];
options?: RequestOptions;
} = {}): Promise {
- if (!this.serverCapabilities.tools) {
- throw new MCPClientError({
- message: `Server does not support tools`,
- });
- }
-
try {
return this.request({
request: { method: 'tools/list', params },
@@ -261,12 +280,6 @@ class MCPClient {
args: Record;
options?: ToolExecutionOptions;
}): Promise {
- if (!this.serverCapabilities.tools) {
- throw new MCPClientError({
- message: `Server does not support tools`,
- });
- }
-
try {
return this.request({
request: { method: 'tools/call', params: { name, arguments: args } },
@@ -309,7 +322,11 @@ class MCPClient {
const parameters =
schemas === 'automatic'
- ? jsonSchema(inputSchema as JSONSchema7)
+ ? jsonSchema({
+ ...inputSchema,
+ properties: inputSchema.properties ?? {},
+ additionalProperties: false,
+ } as JSONSchema7)
: schemas[name].parameters;
const self = this;
diff --git a/packages/ai/core/tool/mcp/mock-mcp-transport.ts b/packages/ai/core/tool/mcp/mock-mcp-transport.ts
index f6e48097d077..86bdf3f0a6f3 100644
--- a/packages/ai/core/tool/mcp/mock-mcp-transport.ts
+++ b/packages/ai/core/tool/mcp/mock-mcp-transport.ts
@@ -14,6 +14,13 @@ const DEFAULT_TOOLS: MCPTool[] = [
},
},
},
+ {
+ name: 'mock-tool-no-args',
+ description: 'A mock tool for testing',
+ inputSchema: {
+ type: 'object',
+ },
+ },
];
export class MockMCPTransport implements MCPTransport {
@@ -75,6 +82,17 @@ export class MockMCPTransport implements MCPTransport {
if (message.method === 'tools/list') {
await delay(10);
+ if (this.tools.length === 0) {
+ this.onmessage?.({
+ jsonrpc: '2.0',
+ id: message.id,
+ error: {
+ code: -32000,
+ message: 'Method not supported',
+ },
+ });
+ return;
+ }
this.onmessage?.({
jsonrpc: '2.0',
id: message.id,
diff --git a/packages/ai/core/transcribe/index.ts b/packages/ai/core/transcribe/index.ts
new file mode 100644
index 000000000000..596a3d8b7af7
--- /dev/null
+++ b/packages/ai/core/transcribe/index.ts
@@ -0,0 +1,2 @@
+export { transcribe as experimental_transcribe } from './transcribe';
+export type { TranscriptionResult as Experimental_TranscriptionResult } from './transcribe-result';
diff --git a/packages/ai/core/transcribe/transcribe-result.ts b/packages/ai/core/transcribe/transcribe-result.ts
new file mode 100644
index 000000000000..ed5a34d2289f
--- /dev/null
+++ b/packages/ai/core/transcribe/transcribe-result.ts
@@ -0,0 +1,60 @@
+import { JSONValue } from '@ai-sdk/provider';
+import { TranscriptionWarning } from '../types/transcription-model';
+import { TranscriptionModelResponseMetadata } from '../types/transcription-model-response-metadata';
+
+/**
+The result of a `transcribe` call.
+It contains the transcript and additional information.
+ */
+export interface TranscriptionResult {
+ /**
+ * The complete transcribed text from the audio.
+ */
+ readonly text: string;
+
+ /**
+ * Array of transcript segments with timing information.
+ * Each segment represents a portion of the transcribed text with start and end times.
+ */
+ readonly segments: Array<{
+ /**
+ * The text content of this segment.
+ */
+ readonly text: string;
+ /**
+ * The start time of this segment in seconds.
+ */
+ readonly startSecond: number;
+ /**
+ * The end time of this segment in seconds.
+ */
+ readonly endSecond: number;
+ }>;
+
+ /**
+ * The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
+ * May be undefined if the language couldn't be detected.
+ */
+ readonly language: string | undefined;
+
+ /**
+ * The total duration of the audio file in seconds.
+ * May be undefined if the duration couldn't be determined.
+ */
+ readonly durationInSeconds: number | undefined;
+
+ /**
+ Warnings for the call, e.g. unsupported settings.
+ */
+ readonly warnings: Array;
+
+ /**
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
+ */
+ readonly responses: Array;
+
+ /**
+ Provider metadata from the provider.
+ */
+ readonly providerMetadata: Record>;
+}
diff --git a/packages/ai/core/transcribe/transcribe.test.ts b/packages/ai/core/transcribe/transcribe.test.ts
new file mode 100644
index 000000000000..91d843178031
--- /dev/null
+++ b/packages/ai/core/transcribe/transcribe.test.ts
@@ -0,0 +1,229 @@
+import {
+ JSONValue,
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import { MockTranscriptionModelV1 } from '../test/mock-transcription-model-v1';
+import { transcribe } from './transcribe';
+
+const audioData = new Uint8Array([1, 2, 3, 4]); // Sample audio data
+const testDate = new Date(2024, 0, 1);
+
+const sampleTranscript = {
+ text: 'This is a sample transcript.',
+ segments: [
+ {
+ startSecond: 0,
+ endSecond: 2.5,
+ text: 'This is a',
+ },
+ {
+ startSecond: 2.5,
+ endSecond: 4.0,
+ text: 'sample transcript.',
+ },
+ ],
+ language: 'en',
+ durationInSeconds: 4.0,
+};
+
+const createMockResponse = (options: {
+ text: string;
+ segments: Array<{
+ text: string;
+ startSecond: number;
+ endSecond: number;
+ }>;
+ language?: string;
+ durationInSeconds?: number;
+ warnings?: TranscriptionModelV1CallWarning[];
+ timestamp?: Date;
+ modelId?: string;
+ headers?: Record;
+ providerMetadata?: Record>;
+}) => ({
+ text: options.text,
+ segments: options.segments,
+ language: options.language,
+ durationInSeconds: options.durationInSeconds,
+ warnings: options.warnings ?? [],
+ response: {
+ timestamp: options.timestamp ?? new Date(),
+ modelId: options.modelId ?? 'test-model-id',
+ headers: options.headers ?? {},
+ },
+ providerMetadata: options.providerMetadata ?? {},
+});
+
+describe('transcribe', () => {
+ it('should send args to doGenerate', async () => {
+ const abortController = new AbortController();
+ const abortSignal = abortController.signal;
+
+ let capturedArgs!: Parameters[0];
+
+ await transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async args => {
+ capturedArgs = args;
+ return createMockResponse({
+ ...sampleTranscript,
+ });
+ },
+ }),
+ audio: audioData,
+ headers: { 'custom-request-header': 'request-header-value' },
+ abortSignal,
+ });
+
+ expect(capturedArgs).toStrictEqual({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: { 'custom-request-header': 'request-header-value' },
+ abortSignal,
+ providerOptions: {},
+ });
+ });
+
+ it('should return warnings', async () => {
+ const result = await transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ ...sampleTranscript,
+ warnings: [
+ {
+ type: 'other',
+ message: 'Setting is not supported',
+ },
+ ],
+ providerMetadata: {
+ 'test-provider': {
+ 'test-key': 'test-value',
+ },
+ },
+ }),
+ }),
+ audio: audioData,
+ });
+
+ expect(result.warnings).toStrictEqual([
+ {
+ type: 'other',
+ message: 'Setting is not supported',
+ },
+ ]);
+ });
+
+ it('should return the transcript', async () => {
+ const result = await transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ ...sampleTranscript,
+ }),
+ }),
+ audio: audioData,
+ });
+
+ expect(result).toEqual({
+ ...sampleTranscript,
+ warnings: [],
+ responses: [
+ {
+ timestamp: expect.any(Date),
+ modelId: 'test-model-id',
+ headers: {},
+ },
+ ],
+ providerMetadata: {},
+ });
+ });
+
+ describe('error handling', () => {
+ it('should throw NoTranscriptGeneratedError when no transcript is returned', async () => {
+ await expect(
+ transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ text: '',
+ segments: [],
+ language: 'en',
+ durationInSeconds: 0,
+ timestamp: testDate,
+ }),
+ }),
+ audio: audioData,
+ }),
+ ).rejects.toMatchObject({
+ name: 'AI_NoTranscriptGeneratedError',
+ message: 'No transcript generated.',
+ responses: [
+ {
+ timestamp: testDate,
+ modelId: expect.any(String),
+ },
+ ],
+ });
+ });
+
+ it('should include response headers in error when no transcript generated', async () => {
+ await expect(
+ transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ text: '',
+ segments: [],
+ language: 'en',
+ durationInSeconds: 0,
+ timestamp: testDate,
+ headers: {
+ 'custom-response-header': 'response-header-value',
+ },
+ }),
+ }),
+ audio: audioData,
+ }),
+ ).rejects.toMatchObject({
+ name: 'AI_NoTranscriptGeneratedError',
+ message: 'No transcript generated.',
+ responses: [
+ {
+ timestamp: testDate,
+ modelId: expect.any(String),
+ headers: {
+ 'custom-response-header': 'response-header-value',
+ },
+ },
+ ],
+ });
+ });
+ });
+
+ it('should return response metadata', async () => {
+ const testHeaders = { 'x-test': 'value' };
+
+ const result = await transcribe({
+ model: new MockTranscriptionModelV1({
+ doGenerate: async () =>
+ createMockResponse({
+ ...sampleTranscript,
+ timestamp: testDate,
+ modelId: 'test-model',
+ headers: testHeaders,
+ }),
+ }),
+ audio: audioData,
+ });
+
+ expect(result.responses).toStrictEqual([
+ {
+ timestamp: testDate,
+ modelId: 'test-model',
+ headers: testHeaders,
+ },
+ ]);
+ });
+});
diff --git a/packages/ai/core/transcribe/transcribe.ts b/packages/ai/core/transcribe/transcribe.ts
new file mode 100644
index 000000000000..1c7029102fc2
--- /dev/null
+++ b/packages/ai/core/transcribe/transcribe.ts
@@ -0,0 +1,150 @@
+import { JSONValue, TranscriptionModelV1 } from '@ai-sdk/provider';
+import { NoTranscriptGeneratedError } from '../../errors/no-transcript-generated-error';
+import { download } from '../../util/download';
+import { DataContent } from '../prompt';
+import { convertDataContentToUint8Array } from '../prompt/data-content';
+import { prepareRetries } from '../prompt/prepare-retries';
+import { ProviderOptions } from '../types/provider-metadata';
+import { TranscriptionWarning } from '../types/transcription-model';
+import { TranscriptionModelResponseMetadata } from '../types/transcription-model-response-metadata';
+import {
+ audioMimeTypeSignatures,
+ detectMimeType,
+} from '../util/detect-mimetype';
+import { TranscriptionResult } from './transcribe-result';
+
+/**
+Generates transcripts using a transcription model.
+
+@param model - The transcription model to use.
+@param audio - The audio data to transcribe as DataContent (string | Uint8Array | ArrayBuffer | Buffer) or a URL.
+@param providerOptions - Additional provider-specific options that are passed through to the provider
+as body parameters.
+@param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
+@param abortSignal - An optional abort signal that can be used to cancel the call.
+@param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
+
+@returns A result object that contains the generated transcript.
+ */
+export async function transcribe({
+ model,
+ audio,
+ providerOptions = {},
+ maxRetries: maxRetriesArg,
+ abortSignal,
+ headers,
+}: {
+ /**
+The transcription model to use.
+ */
+ model: TranscriptionModelV1;
+
+ /**
+The audio data to transcribe.
+ */
+ audio: DataContent | URL;
+
+ /**
+Additional provider-specific options that are passed through to the provider
+as body parameters.
+
+The outer record is keyed by the provider name, and the inner
+record is keyed by the provider-specific metadata key.
+```ts
+{
+ "openai": {
+ "temperature": 0
+ }
+}
+```
+ */
+ providerOptions?: ProviderOptions;
+
+ /**
+Maximum number of retries per transcript model call. Set to 0 to disable retries.
+
+@default 2
+ */
+ maxRetries?: number;
+
+ /**
+Abort signal.
+ */
+ abortSignal?: AbortSignal;
+
+ /**
+Additional headers to include in the request.
+Only applicable for HTTP-based providers.
+ */
+ headers?: Record;
+}): Promise {
+ const { retry } = prepareRetries({ maxRetries: maxRetriesArg });
+ const audioData =
+ audio instanceof URL
+ ? (await download({ url: audio })).data
+ : convertDataContentToUint8Array(audio);
+
+ const result = await retry(() =>
+ model.doGenerate({
+ audio: audioData,
+ abortSignal,
+ headers,
+ providerOptions,
+ mediaType:
+ detectMimeType({
+ data: audioData,
+ signatures: audioMimeTypeSignatures,
+ }) ?? 'audio/wav',
+ }),
+ );
+
+ if (!result.text) {
+ throw new NoTranscriptGeneratedError({ responses: [result.response] });
+ }
+
+ return new DefaultTranscriptionResult({
+ text: result.text,
+ segments: result.segments,
+ language: result.language,
+ durationInSeconds: result.durationInSeconds,
+ warnings: result.warnings,
+ responses: [result.response],
+ providerMetadata: result.providerMetadata,
+ });
+}
+
+class DefaultTranscriptionResult implements TranscriptionResult {
+ readonly text: string;
+ readonly segments: Array<{
+ text: string;
+ startSecond: number;
+ endSecond: number;
+ }>;
+ readonly language: string | undefined;
+ readonly durationInSeconds: number | undefined;
+ readonly warnings: Array;
+ readonly responses: Array;
+ readonly providerMetadata: Record>;
+
+ constructor(options: {
+ text: string;
+ segments: Array<{
+ text: string;
+ startSecond: number;
+ endSecond: number;
+ }>;
+ language: string | undefined;
+ durationInSeconds: number | undefined;
+ warnings: Array;
+ responses: Array;
+ providerMetadata: Record> | undefined;
+ }) {
+ this.text = options.text;
+ this.segments = options.segments;
+ this.language = options.language;
+ this.durationInSeconds = options.durationInSeconds;
+ this.warnings = options.warnings;
+ this.responses = options.responses;
+ this.providerMetadata = options.providerMetadata ?? {};
+ }
+}
diff --git a/packages/ai/core/types/index.ts b/packages/ai/core/types/index.ts
index 0aea0780458a..733fa9b5bee3 100644
--- a/packages/ai/core/types/index.ts
+++ b/packages/ai/core/types/index.ts
@@ -21,3 +21,10 @@ export type { LanguageModelResponseMetadata } from './language-model-response-me
export type { Provider } from './provider';
export type { ProviderMetadata } from './provider-metadata';
export type { EmbeddingModelUsage, LanguageModelUsage } from './usage';
+export type {
+ TranscriptionModel,
+ TranscriptionWarning,
+} from './transcription-model';
+export type { TranscriptionModelResponseMetadata } from './transcription-model-response-metadata';
+export type { SpeechModel, SpeechWarning } from './speech-model';
+export type { SpeechModelResponseMetadata } from './speech-model-response-metadata';
diff --git a/packages/ai/core/types/speech-model-response-metadata.ts b/packages/ai/core/types/speech-model-response-metadata.ts
new file mode 100644
index 000000000000..6c908c6da2fb
--- /dev/null
+++ b/packages/ai/core/types/speech-model-response-metadata.ts
@@ -0,0 +1,16 @@
+export type SpeechModelResponseMetadata = {
+ /**
+Timestamp for the start of the generated response.
+ */
+ timestamp: Date;
+
+ /**
+The ID of the response model that was used to generate the response.
+ */
+ modelId: string;
+
+ /**
+Response headers.
+ */
+ headers?: Record;
+};
diff --git a/packages/ai/core/types/speech-model.ts b/packages/ai/core/types/speech-model.ts
new file mode 100644
index 000000000000..92359fa2f66c
--- /dev/null
+++ b/packages/ai/core/types/speech-model.ts
@@ -0,0 +1,12 @@
+import { SpeechModelV1, SpeechModelV1CallWarning } from '@ai-sdk/provider';
+
+/**
+Speech model that is used by the AI SDK Core functions.
+ */
+export type SpeechModel = SpeechModelV1;
+
+/**
+Warning from the model provider for this call. The call will proceed, but e.g.
+some settings might not be supported, which can lead to suboptimal results.
+ */
+export type SpeechWarning = SpeechModelV1CallWarning;
diff --git a/packages/ai/core/types/transcription-model-response-metadata.ts b/packages/ai/core/types/transcription-model-response-metadata.ts
new file mode 100644
index 000000000000..9fcf62f3fcd6
--- /dev/null
+++ b/packages/ai/core/types/transcription-model-response-metadata.ts
@@ -0,0 +1,16 @@
+export type TranscriptionModelResponseMetadata = {
+ /**
+Timestamp for the start of the generated response.
+ */
+ timestamp: Date;
+
+ /**
+The ID of the response model that was used to generate the response.
+ */
+ modelId: string;
+
+ /**
+Response headers.
+ */
+ headers?: Record;
+};
diff --git a/packages/ai/core/types/transcription-model.ts b/packages/ai/core/types/transcription-model.ts
new file mode 100644
index 000000000000..d2eba1244fd0
--- /dev/null
+++ b/packages/ai/core/types/transcription-model.ts
@@ -0,0 +1,15 @@
+import {
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+
+/**
+Transcription model that is used by the AI SDK Core functions.
+ */
+export type TranscriptionModel = TranscriptionModelV1;
+
+/**
+Warning from the model provider for this call. The call will proceed, but e.g.
+some settings might not be supported, which can lead to suboptimal results.
+ */
+export type TranscriptionWarning = TranscriptionModelV1CallWarning;
diff --git a/packages/ai/core/util/detect-image-mimetype.test.ts b/packages/ai/core/util/detect-image-mimetype.test.ts
deleted file mode 100644
index cf5cfa61988e..000000000000
--- a/packages/ai/core/util/detect-image-mimetype.test.ts
+++ /dev/null
@@ -1,138 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { detectImageMimeType } from './detect-image-mimetype';
-
-describe('detectImageMimeType', () => {
- describe('GIF', () => {
- it('should detect GIF from bytes', () => {
- const gifBytes = new Uint8Array([0x47, 0x49, 0x46, 0xff, 0xff]);
- expect(detectImageMimeType(gifBytes)).toBe('image/gif');
- });
-
- it('should detect GIF from base64', () => {
- const gifBase64 = 'R0lGabc123'; // Base64 string starting with GIF signature
- expect(detectImageMimeType(gifBase64)).toBe('image/gif');
- });
- });
-
- describe('PNG', () => {
- it('should detect PNG from bytes', () => {
- const pngBytes = new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0xff, 0xff]);
- expect(detectImageMimeType(pngBytes)).toBe('image/png');
- });
-
- it('should detect PNG from base64', () => {
- const pngBase64 = 'iVBORwabc123'; // Base64 string starting with PNG signature
- expect(detectImageMimeType(pngBase64)).toBe('image/png');
- });
- });
-
- describe('JPEG', () => {
- it('should detect JPEG from bytes', () => {
- const jpegBytes = new Uint8Array([0xff, 0xd8, 0xff, 0xff]);
- expect(detectImageMimeType(jpegBytes)).toBe('image/jpeg');
- });
-
- it('should detect JPEG from base64', () => {
- const jpegBase64 = '/9j/abc123'; // Base64 string starting with JPEG signature
- expect(detectImageMimeType(jpegBase64)).toBe('image/jpeg');
- });
- });
-
- describe('WebP', () => {
- it('should detect WebP from bytes', () => {
- const webpBytes = new Uint8Array([0x52, 0x49, 0x46, 0x46, 0xff, 0xff]);
- expect(detectImageMimeType(webpBytes)).toBe('image/webp');
- });
-
- it('should detect WebP from base64', () => {
- const webpBase64 = 'UklGRgabc123'; // Base64 string starting with WebP signature
- expect(detectImageMimeType(webpBase64)).toBe('image/webp');
- });
- });
-
- describe('BMP', () => {
- it('should detect BMP from bytes', () => {
- const bmpBytes = new Uint8Array([0x42, 0x4d, 0xff, 0xff]);
- expect(detectImageMimeType(bmpBytes)).toBe('image/bmp');
- });
-
- it('should detect BMP from base64', () => {
- const bmpBase64 = 'Qkabc123'; // Base64 string starting with BMP signature
- expect(detectImageMimeType(bmpBase64)).toBe('image/bmp');
- });
- });
-
- describe('TIFF', () => {
- it('should detect TIFF (little endian) from bytes', () => {
- const tiffLEBytes = new Uint8Array([0x49, 0x49, 0x2a, 0x00, 0xff]);
- expect(detectImageMimeType(tiffLEBytes)).toBe('image/tiff');
- });
-
- it('should detect TIFF (little endian) from base64', () => {
- const tiffLEBase64 = 'SUkqAAabc123'; // Base64 string starting with TIFF LE signature
- expect(detectImageMimeType(tiffLEBase64)).toBe('image/tiff');
- });
-
- it('should detect TIFF (big endian) from bytes', () => {
- const tiffBEBytes = new Uint8Array([0x4d, 0x4d, 0x00, 0x2a, 0xff]);
- expect(detectImageMimeType(tiffBEBytes)).toBe('image/tiff');
- });
-
- it('should detect TIFF (big endian) from base64', () => {
- const tiffBEBase64 = 'TU0AKgabc123'; // Base64 string starting with TIFF BE signature
- expect(detectImageMimeType(tiffBEBase64)).toBe('image/tiff');
- });
- });
-
- describe('AVIF', () => {
- it('should detect AVIF from bytes', () => {
- const avifBytes = new Uint8Array([
- 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x61, 0x76, 0x69, 0x66,
- 0xff,
- ]);
- expect(detectImageMimeType(avifBytes)).toBe('image/avif');
- });
-
- it('should detect AVIF from base64', () => {
- const avifBase64 = 'AAAAIGZ0eXBhdmlmabc123'; // Base64 string starting with AVIF signature
- expect(detectImageMimeType(avifBase64)).toBe('image/avif');
- });
- });
-
- describe('HEIC', () => {
- it('should detect HEIC from bytes', () => {
- const heicBytes = new Uint8Array([
- 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63,
- 0xff,
- ]);
- expect(detectImageMimeType(heicBytes)).toBe('image/heic');
- });
-
- it('should detect HEIC from base64', () => {
- const heicBase64 = 'AAAAIGZ0eXBoZWljabc123'; // Base64 string starting with HEIC signature
- expect(detectImageMimeType(heicBase64)).toBe('image/heic');
- });
- });
-
- describe('error cases', () => {
- it('should return undefined for unknown image formats', () => {
- const unknownBytes = new Uint8Array([0x00, 0x01, 0x02, 0x03]);
- expect(detectImageMimeType(unknownBytes)).toBeUndefined();
- });
-
- it('should return undefined for empty arrays', () => {
- const emptyBytes = new Uint8Array([]);
- expect(detectImageMimeType(emptyBytes)).toBeUndefined();
- });
-
- it('should return undefined for arrays shorter than signature length', () => {
- const shortBytes = new Uint8Array([0x89, 0x50]); // Incomplete PNG signature
- expect(detectImageMimeType(shortBytes)).toBeUndefined();
- });
-
- it('should return undefined for invalid base64 strings', () => {
- const invalidBase64 = 'invalid123';
- expect(detectImageMimeType(invalidBase64)).toBeUndefined();
- });
- });
-});
diff --git a/packages/ai/core/util/detect-image-mimetype.ts b/packages/ai/core/util/detect-image-mimetype.ts
deleted file mode 100644
index cbb708fb50eb..000000000000
--- a/packages/ai/core/util/detect-image-mimetype.ts
+++ /dev/null
@@ -1,68 +0,0 @@
-const mimeTypeSignatures = [
- {
- mimeType: 'image/gif' as const,
- bytesPrefix: [0x47, 0x49, 0x46],
- base64Prefix: 'R0lG',
- },
- {
- mimeType: 'image/png' as const,
- bytesPrefix: [0x89, 0x50, 0x4e, 0x47],
- base64Prefix: 'iVBORw',
- },
- {
- mimeType: 'image/jpeg' as const,
- bytesPrefix: [0xff, 0xd8],
- base64Prefix: '/9j/',
- },
- {
- mimeType: 'image/webp' as const,
- bytesPrefix: [0x52, 0x49, 0x46, 0x46],
- base64Prefix: 'UklGRg',
- },
- {
- mimeType: 'image/bmp' as const,
- bytesPrefix: [0x42, 0x4d],
- base64Prefix: 'Qk',
- },
- {
- mimeType: 'image/tiff' as const,
- bytesPrefix: [0x49, 0x49, 0x2a, 0x00],
- base64Prefix: 'SUkqAA',
- },
- {
- mimeType: 'image/tiff' as const,
- bytesPrefix: [0x4d, 0x4d, 0x00, 0x2a],
- base64Prefix: 'TU0AKg',
- },
- {
- mimeType: 'image/avif' as const,
- bytesPrefix: [
- 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x61, 0x76, 0x69, 0x66,
- ],
- base64Prefix: 'AAAAIGZ0eXBhdmlm',
- },
- {
- mimeType: 'image/heic' as const,
- bytesPrefix: [
- 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63,
- ],
- base64Prefix: 'AAAAIGZ0eXBoZWlj',
- },
-] as const;
-
-export function detectImageMimeType(
- image: Uint8Array | string,
-): (typeof mimeTypeSignatures)[number]['mimeType'] | undefined {
- for (const signature of mimeTypeSignatures) {
- if (
- typeof image === 'string'
- ? image.startsWith(signature.base64Prefix)
- : image.length >= signature.bytesPrefix.length &&
- signature.bytesPrefix.every((byte, index) => image[index] === byte)
- ) {
- return signature.mimeType;
- }
- }
-
- return undefined;
-}
diff --git a/packages/ai/core/util/detect-mimetype.test.ts b/packages/ai/core/util/detect-mimetype.test.ts
new file mode 100644
index 000000000000..a4d921807fb1
--- /dev/null
+++ b/packages/ai/core/util/detect-mimetype.test.ts
@@ -0,0 +1,475 @@
+import { describe, it, expect } from 'vitest';
+import {
+ detectMimeType,
+ imageMimeTypeSignatures,
+ audioMimeTypeSignatures,
+} from './detect-mimetype';
+import { convertUint8ArrayToBase64 } from '@ai-sdk/provider-utils';
+
+describe('detectMimeType', () => {
+ describe('GIF', () => {
+ it('should detect GIF from bytes', () => {
+ const gifBytes = new Uint8Array([0x47, 0x49, 0x46, 0xff, 0xff]);
+ expect(
+ detectMimeType({ data: gifBytes, signatures: imageMimeTypeSignatures }),
+ ).toBe('image/gif');
+ });
+
+ it('should detect GIF from base64', () => {
+ const gifBase64 = 'R0lGabc123'; // Base64 string starting with GIF signature
+ expect(
+ detectMimeType({
+ data: gifBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/gif');
+ });
+ });
+
+ describe('PNG', () => {
+ it('should detect PNG from bytes', () => {
+ const pngBytes = new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0xff, 0xff]);
+ expect(
+ detectMimeType({ data: pngBytes, signatures: imageMimeTypeSignatures }),
+ ).toBe('image/png');
+ });
+
+ it('should detect PNG from base64', () => {
+ const pngBase64 = 'iVBORwabc123'; // Base64 string starting with PNG signature
+ expect(
+ detectMimeType({
+ data: pngBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/png');
+ });
+ });
+
+ describe('JPEG', () => {
+ it('should detect JPEG from bytes', () => {
+ const jpegBytes = new Uint8Array([0xff, 0xd8, 0xff, 0xff]);
+ expect(
+ detectMimeType({
+ data: jpegBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/jpeg');
+ });
+
+ it('should detect JPEG from base64', () => {
+ const jpegBase64 = '/9j/abc123'; // Base64 string starting with JPEG signature
+ expect(
+ detectMimeType({
+ data: jpegBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/jpeg');
+ });
+ });
+
+ describe('WebP', () => {
+ it('should detect WebP from bytes', () => {
+ const webpBytes = new Uint8Array([0x52, 0x49, 0x46, 0x46, 0xff, 0xff]);
+ expect(
+ detectMimeType({
+ data: webpBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/webp');
+ });
+
+ it('should detect WebP from base64', () => {
+ const webpBase64 = 'UklGRgabc123'; // Base64 string starting with WebP signature
+ expect(
+ detectMimeType({
+ data: webpBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/webp');
+ });
+ });
+
+ describe('BMP', () => {
+ it('should detect BMP from bytes', () => {
+ const bmpBytes = new Uint8Array([0x42, 0x4d, 0xff, 0xff]);
+ expect(
+ detectMimeType({ data: bmpBytes, signatures: imageMimeTypeSignatures }),
+ ).toBe('image/bmp');
+ });
+
+ it('should detect BMP from base64', () => {
+ const bmpBase64 = 'Qkabc123'; // Base64 string starting with BMP signature
+ expect(
+ detectMimeType({
+ data: bmpBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/bmp');
+ });
+ });
+
+ describe('TIFF', () => {
+ it('should detect TIFF (little endian) from bytes', () => {
+ const tiffLEBytes = new Uint8Array([0x49, 0x49, 0x2a, 0x00, 0xff]);
+ expect(
+ detectMimeType({
+ data: tiffLEBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/tiff');
+ });
+
+ it('should detect TIFF (little endian) from base64', () => {
+ const tiffLEBase64 = 'SUkqAAabc123'; // Base64 string starting with TIFF LE signature
+ expect(
+ detectMimeType({
+ data: tiffLEBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/tiff');
+ });
+
+ it('should detect TIFF (big endian) from bytes', () => {
+ const tiffBEBytes = new Uint8Array([0x4d, 0x4d, 0x00, 0x2a, 0xff]);
+ expect(
+ detectMimeType({
+ data: tiffBEBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/tiff');
+ });
+
+ it('should detect TIFF (big endian) from base64', () => {
+ const tiffBEBase64 = 'TU0AKgabc123'; // Base64 string starting with TIFF BE signature
+ expect(
+ detectMimeType({
+ data: tiffBEBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/tiff');
+ });
+ });
+
+ describe('AVIF', () => {
+ it('should detect AVIF from bytes', () => {
+ const avifBytes = new Uint8Array([
+ 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x61, 0x76, 0x69, 0x66,
+ 0xff,
+ ]);
+ expect(
+ detectMimeType({
+ data: avifBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/avif');
+ });
+
+ it('should detect AVIF from base64', () => {
+ const avifBase64 = 'AAAAIGZ0eXBhdmlmabc123'; // Base64 string starting with AVIF signature
+ expect(
+ detectMimeType({
+ data: avifBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/avif');
+ });
+ });
+
+ describe('HEIC', () => {
+ it('should detect HEIC from bytes', () => {
+ const heicBytes = new Uint8Array([
+ 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63,
+ 0xff,
+ ]);
+ expect(
+ detectMimeType({
+ data: heicBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/heic');
+ });
+
+ it('should detect HEIC from base64', () => {
+ const heicBase64 = 'AAAAIGZ0eXBoZWljabc123'; // Base64 string starting with HEIC signature
+ expect(
+ detectMimeType({
+ data: heicBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBe('image/heic');
+ });
+ });
+
+ describe('MP3', () => {
+ it('should detect MP3 from bytes', () => {
+ const mp3Bytes = new Uint8Array([0xff, 0xfb]);
+ expect(
+ detectMimeType({ data: mp3Bytes, signatures: audioMimeTypeSignatures }),
+ ).toBe('audio/mpeg');
+ });
+
+ it('should detect MP3 from base64', () => {
+ const mp3Base64 = '//s='; // Base64 string starting with MP3 signature
+ expect(
+ detectMimeType({
+ data: mp3Base64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/mpeg');
+ });
+
+ it('should detect MP3 with ID3v2 tags from bytes', () => {
+ const mp3WithID3Bytes = new Uint8Array([
+ 0x49,
+ 0x44,
+ 0x33, // 'ID3'
+ 0x03,
+ 0x00, // version
+ 0x00, // flags
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x0a, // size (10 bytes)
+ // 10 bytes of ID3 data
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ // MP3 frame header
+ 0xff,
+ 0xfb,
+ 0x00,
+ 0x00,
+ ]);
+ expect(
+ detectMimeType({
+ data: mp3WithID3Bytes,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/mpeg');
+ });
+ it('should detect MP3 with ID3v2 tags from base64', () => {
+ const mp3WithID3Bytes = new Uint8Array([
+ 0x49,
+ 0x44,
+ 0x33, // 'ID3'
+ 0x03,
+ 0x00, // version
+ 0x00, // flags
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x0a, // size (10 bytes)
+ // 10 bytes of ID3 data
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ // MP3 frame header
+ 0xff,
+ 0xfb,
+ 0x00,
+ 0x00,
+ ]);
+ const mp3WithID3Base64 = convertUint8ArrayToBase64(mp3WithID3Bytes);
+ expect(
+ detectMimeType({
+ data: mp3WithID3Base64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/mpeg');
+ });
+ });
+
+ describe('WAV', () => {
+ it('should detect WAV from bytes', () => {
+ const wavBytes = new Uint8Array([0x52, 0x49, 0x46, 0x46]);
+ expect(
+ detectMimeType({ data: wavBytes, signatures: audioMimeTypeSignatures }),
+ ).toBe('audio/wav');
+ });
+
+ it('should detect WAV from base64', () => {
+ const wavBase64 = 'UklGRiQ='; // Base64 string starting with WAV signature
+ expect(
+ detectMimeType({
+ data: wavBase64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/wav');
+ });
+ });
+
+ describe('OGG', () => {
+ it('should detect OGG from bytes', () => {
+ const oggBytes = new Uint8Array([0x4f, 0x67, 0x67, 0x53]);
+ expect(
+ detectMimeType({ data: oggBytes, signatures: audioMimeTypeSignatures }),
+ ).toBe('audio/ogg');
+ });
+
+ it('should detect OGG from base64', () => {
+ const oggBase64 = 'T2dnUw'; // Base64 string starting with OGG signature
+ expect(
+ detectMimeType({
+ data: oggBase64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/ogg');
+ });
+ });
+
+ describe('FLAC', () => {
+ it('should detect FLAC from bytes', () => {
+ const flacBytes = new Uint8Array([0x66, 0x4c, 0x61, 0x43]);
+ expect(
+ detectMimeType({
+ data: flacBytes,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/flac');
+ });
+
+ it('should detect FLAC from base64', () => {
+ const flacBase64 = 'ZkxhQw'; // Base64 string starting with FLAC signature
+ expect(
+ detectMimeType({
+ data: flacBase64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/flac');
+ });
+ });
+
+ describe('AAC', () => {
+ it('should detect AAC from bytes', () => {
+ const aacBytes = new Uint8Array([0x40, 0x15, 0x00, 0x00]);
+ expect(
+ detectMimeType({ data: aacBytes, signatures: audioMimeTypeSignatures }),
+ ).toBe('audio/aac');
+ });
+
+ it('should detect AAC from base64', () => {
+ const aacBase64 = 'QBUA'; // Base64 string starting with AAC signature
+ expect(
+ detectMimeType({
+ data: aacBase64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/aac');
+ });
+ });
+
+ describe('MP4', () => {
+ it('should detect MP4 from bytes', () => {
+ const mp4Bytes = new Uint8Array([0x66, 0x74, 0x79, 0x70]);
+ expect(
+ detectMimeType({ data: mp4Bytes, signatures: audioMimeTypeSignatures }),
+ ).toBe('audio/mp4');
+ });
+
+ it('should detect MP4 from base64', () => {
+ const mp4Base64 = 'ZnR5cA'; // Base64 string starting with MP4 signature
+ expect(
+ detectMimeType({
+ data: mp4Base64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBe('audio/mp4');
+ });
+ });
+
+ describe('error cases', () => {
+ it('should return undefined for unknown image formats', () => {
+ const unknownBytes = new Uint8Array([0x00, 0x01, 0x02, 0x03]);
+ expect(
+ detectMimeType({
+ data: unknownBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for unknown audio formats', () => {
+ const unknownBytes = new Uint8Array([0x00, 0x01, 0x02, 0x03]);
+ expect(
+ detectMimeType({
+ data: unknownBytes,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for empty arrays for image', () => {
+ const emptyBytes = new Uint8Array([]);
+ expect(
+ detectMimeType({
+ data: emptyBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for empty arrays for audio', () => {
+ const emptyBytes = new Uint8Array([]);
+ expect(
+ detectMimeType({
+ data: emptyBytes,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for arrays shorter than signature length for image', () => {
+ const shortBytes = new Uint8Array([0x89, 0x50]); // Incomplete PNG signature
+ expect(
+ detectMimeType({
+ data: shortBytes,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for arrays shorter than signature length for audio', () => {
+ const shortBytes = new Uint8Array([0x4f, 0x67]); // Incomplete OGG signature
+ expect(
+ detectMimeType({
+ data: shortBytes,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for invalid base64 strings for image', () => {
+ const invalidBase64 = 'invalid123';
+ expect(
+ detectMimeType({
+ data: invalidBase64,
+ signatures: imageMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+
+ it('should return undefined for invalid base64 strings for audio', () => {
+ const invalidBase64 = 'invalid123';
+ expect(
+ detectMimeType({
+ data: invalidBase64,
+ signatures: audioMimeTypeSignatures,
+ }),
+ ).toBeUndefined();
+ });
+ });
+});
diff --git a/packages/ai/core/util/detect-mimetype.ts b/packages/ai/core/util/detect-mimetype.ts
new file mode 100644
index 000000000000..e0fc7f8fa71b
--- /dev/null
+++ b/packages/ai/core/util/detect-mimetype.ts
@@ -0,0 +1,136 @@
+import { convertBase64ToUint8Array } from '@ai-sdk/provider-utils';
+
+export const imageMimeTypeSignatures = [
+ {
+ mimeType: 'image/gif' as const,
+ bytesPrefix: [0x47, 0x49, 0x46],
+ base64Prefix: 'R0lG',
+ },
+ {
+ mimeType: 'image/png' as const,
+ bytesPrefix: [0x89, 0x50, 0x4e, 0x47],
+ base64Prefix: 'iVBORw',
+ },
+ {
+ mimeType: 'image/jpeg' as const,
+ bytesPrefix: [0xff, 0xd8],
+ base64Prefix: '/9j/',
+ },
+ {
+ mimeType: 'image/webp' as const,
+ bytesPrefix: [0x52, 0x49, 0x46, 0x46],
+ base64Prefix: 'UklGRg',
+ },
+ {
+ mimeType: 'image/bmp' as const,
+ bytesPrefix: [0x42, 0x4d],
+ base64Prefix: 'Qk',
+ },
+ {
+ mimeType: 'image/tiff' as const,
+ bytesPrefix: [0x49, 0x49, 0x2a, 0x00],
+ base64Prefix: 'SUkqAA',
+ },
+ {
+ mimeType: 'image/tiff' as const,
+ bytesPrefix: [0x4d, 0x4d, 0x00, 0x2a],
+ base64Prefix: 'TU0AKg',
+ },
+ {
+ mimeType: 'image/avif' as const,
+ bytesPrefix: [
+ 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x61, 0x76, 0x69, 0x66,
+ ],
+ base64Prefix: 'AAAAIGZ0eXBhdmlm',
+ },
+ {
+ mimeType: 'image/heic' as const,
+ bytesPrefix: [
+ 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63,
+ ],
+ base64Prefix: 'AAAAIGZ0eXBoZWlj',
+ },
+] as const;
+
+export const audioMimeTypeSignatures = [
+ {
+ mimeType: 'audio/mpeg' as const,
+ bytesPrefix: [0xff, 0xfb],
+ base64Prefix: '//s=',
+ },
+ {
+ mimeType: 'audio/wav' as const,
+ bytesPrefix: [0x52, 0x49, 0x46, 0x46],
+ base64Prefix: 'UklGR',
+ },
+ {
+ mimeType: 'audio/ogg' as const,
+ bytesPrefix: [0x4f, 0x67, 0x67, 0x53],
+ base64Prefix: 'T2dnUw',
+ },
+ {
+ mimeType: 'audio/flac' as const,
+ bytesPrefix: [0x66, 0x4c, 0x61, 0x43],
+ base64Prefix: 'ZkxhQw',
+ },
+ {
+ mimeType: 'audio/aac' as const,
+ bytesPrefix: [0x40, 0x15, 0x00, 0x00],
+ base64Prefix: 'QBUA',
+ },
+ {
+ mimeType: 'audio/mp4' as const,
+ bytesPrefix: [0x66, 0x74, 0x79, 0x70],
+ base64Prefix: 'ZnR5cA',
+ },
+] as const;
+
+const stripID3 = (data: Uint8Array | string) => {
+ const bytes =
+ typeof data === 'string' ? convertBase64ToUint8Array(data) : data;
+ const id3Size =
+ ((bytes[6] & 0x7f) << 21) |
+ ((bytes[7] & 0x7f) << 14) |
+ ((bytes[8] & 0x7f) << 7) |
+ (bytes[9] & 0x7f);
+
+ // The raw MP3 starts here
+ return bytes.slice(id3Size + 10);
+};
+
+function stripID3TagsIfPresent(data: Uint8Array | string): Uint8Array | string {
+ const hasId3 =
+ (typeof data === 'string' && data.startsWith('SUQz')) ||
+ (typeof data !== 'string' &&
+ data.length > 10 &&
+ data[0] === 0x49 && // 'I'
+ data[1] === 0x44 && // 'D'
+ data[2] === 0x33); // '3'
+
+ return hasId3 ? stripID3(data) : data;
+}
+
+export function detectMimeType({
+ data,
+ signatures,
+}: {
+ data: Uint8Array | string;
+ signatures: typeof audioMimeTypeSignatures | typeof imageMimeTypeSignatures;
+}): (typeof signatures)[number]['mimeType'] | undefined {
+ const processedData = stripID3TagsIfPresent(data);
+
+ for (const signature of signatures) {
+ if (
+ typeof processedData === 'string'
+ ? processedData.startsWith(signature.base64Prefix)
+ : processedData.length >= signature.bytesPrefix.length &&
+ signature.bytesPrefix.every(
+ (byte, index) => processedData[index] === byte,
+ )
+ ) {
+ return signature.mimeType;
+ }
+ }
+
+ return undefined;
+}
diff --git a/packages/ai/errors/no-object-generated-error.ts b/packages/ai/errors/no-object-generated-error.ts
index 77ec7f5f1d52..06b7c05846c9 100644
--- a/packages/ai/errors/no-object-generated-error.ts
+++ b/packages/ai/errors/no-object-generated-error.ts
@@ -1,6 +1,7 @@
import { AISDKError } from '@ai-sdk/provider';
import { LanguageModelResponseMetadata } from '../core/types/language-model-response-metadata';
import { LanguageModelUsage } from '../core/types/usage';
+import { FinishReason } from '../core';
const name = 'AI_NoObjectGeneratedError';
const marker = `vercel.ai.error.${name}`;
@@ -35,24 +36,32 @@ export class NoObjectGeneratedError extends AISDKError {
*/
readonly usage: LanguageModelUsage | undefined;
+ /**
+ Reason why the model finished generating a response.
+ */
+ readonly finishReason: FinishReason | undefined;
+
constructor({
message = 'No object generated.',
cause,
text,
response,
usage,
+ finishReason,
}: {
message?: string;
cause?: Error;
text?: string;
response: LanguageModelResponseMetadata;
usage: LanguageModelUsage;
+ finishReason: FinishReason;
}) {
super({ name, message, cause });
this.text = text;
this.response = response;
this.usage = usage;
+ this.finishReason = finishReason;
}
static isInstance(error: unknown): error is NoObjectGeneratedError {
@@ -66,6 +75,7 @@ export function verifyNoObjectGeneratedError(
message: string;
response: LanguageModelResponseMetadata;
usage: LanguageModelUsage;
+ finishReason: FinishReason;
},
) {
expect(NoObjectGeneratedError.isInstance(error)).toBeTruthy();
@@ -73,4 +83,7 @@ export function verifyNoObjectGeneratedError(
expect(noObjectGeneratedError.message).toStrictEqual(expected.message);
expect(noObjectGeneratedError.response).toStrictEqual(expected.response);
expect(noObjectGeneratedError.usage).toStrictEqual(expected.usage);
+ expect(noObjectGeneratedError.finishReason).toStrictEqual(
+ expected.finishReason,
+ );
}
diff --git a/packages/ai/errors/no-speech-generated-error.ts b/packages/ai/errors/no-speech-generated-error.ts
new file mode 100644
index 000000000000..d5c5de2bffa0
--- /dev/null
+++ b/packages/ai/errors/no-speech-generated-error.ts
@@ -0,0 +1,18 @@
+import { AISDKError } from '@ai-sdk/provider';
+import { SpeechModelResponseMetadata } from '../core/types/speech-model-response-metadata';
+
+/**
+Error that is thrown when no speech audio was generated.
+ */
+export class NoSpeechGeneratedError extends AISDKError {
+ readonly responses: Array;
+
+ constructor(options: { responses: Array }) {
+ super({
+ name: 'AI_NoSpeechGeneratedError',
+ message: 'No speech audio generated.',
+ });
+
+ this.responses = options.responses;
+ }
+}
diff --git a/packages/ai/errors/no-transcript-generated-error.ts b/packages/ai/errors/no-transcript-generated-error.ts
new file mode 100644
index 000000000000..9ed11aa1eeb7
--- /dev/null
+++ b/packages/ai/errors/no-transcript-generated-error.ts
@@ -0,0 +1,20 @@
+import { AISDKError } from '@ai-sdk/provider';
+import { TranscriptionModelResponseMetadata } from '../core/types/transcription-model-response-metadata';
+
+/**
+Error that is thrown when no transcript was generated.
+ */
+export class NoTranscriptGeneratedError extends AISDKError {
+ readonly responses: Array;
+
+ constructor(options: {
+ responses: Array;
+ }) {
+ super({
+ name: 'AI_NoTranscriptGeneratedError',
+ message: 'No transcript generated.',
+ });
+
+ this.responses = options.responses;
+ }
+}
diff --git a/packages/ai/mcp-stdio/create-child-process.test.ts b/packages/ai/mcp-stdio/create-child-process.test.ts
index 4bd899bd7b35..ab2e7ca23b9e 100644
--- a/packages/ai/mcp-stdio/create-child-process.test.ts
+++ b/packages/ai/mcp-stdio/create-child-process.test.ts
@@ -26,7 +26,7 @@ describe('createChildProcess', () => {
});
it('should spawn a child process', async () => {
- const childProcess = await createChildProcess(
+ const childProcess = createChildProcess(
{ command: process.execPath },
new AbortController().signal,
);
@@ -38,7 +38,7 @@ describe('createChildProcess', () => {
it('should spawn a child process with custom env', async () => {
const customEnv = { FOO: 'bar' };
- const childProcessWithCustomEnv = await createChildProcess(
+ const childProcessWithCustomEnv = createChildProcess(
{ command: process.execPath, env: customEnv },
new AbortController().signal,
);
@@ -53,7 +53,7 @@ describe('createChildProcess', () => {
});
it('should spawn a child process with args', async () => {
- const childProcessWithArgs = await createChildProcess(
+ const childProcessWithArgs = createChildProcess(
{ command: process.execPath, args: ['-c', 'echo', 'test'] },
new AbortController().signal,
);
@@ -66,11 +66,12 @@ describe('createChildProcess', () => {
'echo',
'test',
]);
+
childProcessWithArgs.kill();
});
it('should spawn a child process with cwd', async () => {
- const childProcessWithCwd = await createChildProcess(
+ const childProcessWithCwd = createChildProcess(
{ command: process.execPath, cwd: '/tmp' },
new AbortController().signal,
);
@@ -80,7 +81,7 @@ describe('createChildProcess', () => {
});
it('should spawn a child process with stderr', async () => {
- const childProcessWithStderr = await createChildProcess(
+ const childProcessWithStderr = createChildProcess(
{ command: process.execPath, stderr: 'pipe' },
new AbortController().signal,
);
diff --git a/packages/ai/mcp-stdio/create-child-process.ts b/packages/ai/mcp-stdio/create-child-process.ts
index 1f7673888b08..005c1b2a0a1f 100644
--- a/packages/ai/mcp-stdio/create-child-process.ts
+++ b/packages/ai/mcp-stdio/create-child-process.ts
@@ -2,10 +2,10 @@ import { ChildProcess, spawn } from 'node:child_process';
import { getEnvironment } from './get-environment';
import { StdioConfig } from './mcp-stdio-transport';
-export async function createChildProcess(
+export function createChildProcess(
config: StdioConfig,
signal: AbortSignal,
-): Promise {
+): ChildProcess {
return spawn(config.command, config.args ?? [], {
env: getEnvironment(config.env),
stdio: ['pipe', 'pipe', config.stderr ?? 'inherit'],
diff --git a/packages/ai/mcp-stdio/get-environment.test.ts b/packages/ai/mcp-stdio/get-environment.test.ts
new file mode 100644
index 000000000000..1ea0bb900766
--- /dev/null
+++ b/packages/ai/mcp-stdio/get-environment.test.ts
@@ -0,0 +1,13 @@
+import { describe, it, expect } from 'vitest';
+import { getEnvironment } from './get-environment';
+
+describe('getEnvironment', () => {
+ it('should not mutate the original custom environment object', () => {
+ const customEnv = { CUSTOM_VAR: 'custom_value' };
+
+ const result = getEnvironment(customEnv);
+
+ expect(customEnv).toStrictEqual({ CUSTOM_VAR: 'custom_value' });
+ expect(result).not.toBe(customEnv);
+ });
+});
diff --git a/packages/ai/mcp-stdio/get-environment.ts b/packages/ai/mcp-stdio/get-environment.ts
index 843d7edb41e9..d22b7f854caf 100644
--- a/packages/ai/mcp-stdio/get-environment.ts
+++ b/packages/ai/mcp-stdio/get-environment.ts
@@ -24,7 +24,7 @@ export function getEnvironment(
]
: ['HOME', 'LOGNAME', 'PATH', 'SHELL', 'TERM', 'USER'];
- const env: Record = customEnv ?? {};
+ const env: Record = customEnv ? { ...customEnv } : {};
for (const key of DEFAULT_INHERITED_ENV_VARS) {
const value = globalThis.process.env[key];
diff --git a/packages/ai/mcp-stdio/mcp-stdio-transport.test.ts b/packages/ai/mcp-stdio/mcp-stdio-transport.test.ts
index 23fd70cb404b..805b268b6fb6 100644
--- a/packages/ai/mcp-stdio/mcp-stdio-transport.test.ts
+++ b/packages/ai/mcp-stdio/mcp-stdio-transport.test.ts
@@ -35,7 +35,7 @@ describe('StdioMCPTransport', () => {
removeAllListeners: vi.fn(),
};
- vi.mocked(createChildProcess).mockResolvedValue(
+ vi.mocked(createChildProcess).mockReturnValue(
mockChildProcess as unknown as ChildProcess,
);
@@ -115,19 +115,6 @@ describe('StdioMCPTransport', () => {
await expect(startPromise).rejects.toThrow('Spawn failed');
expect(onErrorSpy).toHaveBeenCalledWith(error);
});
-
- it('should handle child_process import errors', async () => {
- vi.mocked(createChildProcess).mockRejectedValue(
- new MCPClientError({
- message: 'Failed to load child_process module dynamically',
- }),
- );
-
- const startPromise = transport.start();
- await expect(startPromise).rejects.toThrow(
- 'Failed to load child_process module dynamically',
- );
- });
});
describe('send', () => {
diff --git a/packages/ai/mcp-stdio/mcp-stdio-transport.ts b/packages/ai/mcp-stdio/mcp-stdio-transport.ts
index b8510cde2934..ec56ecca5a81 100644
--- a/packages/ai/mcp-stdio/mcp-stdio-transport.ts
+++ b/packages/ai/mcp-stdio/mcp-stdio-transport.ts
@@ -37,9 +37,9 @@ export class StdioMCPTransport implements MCPTransport {
});
}
- return new Promise(async (resolve, reject) => {
+ return new Promise((resolve, reject) => {
try {
- const process = await createChildProcess(
+ const process = createChildProcess(
this.serverParams,
this.abortController.signal,
);
diff --git a/packages/ai/package.json b/packages/ai/package.json
index 6bebdcff74fd..a13e088f48a9 100644
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@@ -1,6 +1,6 @@
{
"name": "ai",
- "version": "4.2.10",
+ "version": "4.3.15",
"description": "AI SDK by Vercel - The AI Toolkit for TypeScript and JavaScript",
"license": "Apache-2.0",
"sideEffects": false,
@@ -9,7 +9,7 @@
"types": "./dist/index.d.ts",
"files": [
"dist/**/*",
- "mcp-stdio/**/*",
+ "mcp-stdio/dist/**/*",
"react/dist/**/*",
"rsc/dist/**/*",
"test/dist/**/*",
@@ -66,10 +66,10 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3",
- "@ai-sdk/react": "1.2.5",
- "@ai-sdk/ui-utils": "1.2.4",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8",
+ "@ai-sdk/react": "1.2.12",
+ "@ai-sdk/ui-utils": "1.2.11",
"@opentelemetry/api": "1.9.0",
"jsondiffpatch": "0.6.0"
},
@@ -103,7 +103,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/ai/tests/e2e/next-server/CHANGELOG.md b/packages/ai/tests/e2e/next-server/CHANGELOG.md
index d89e10b106c9..75fdb90d3aa8 100644
--- a/packages/ai/tests/e2e/next-server/CHANGELOG.md
+++ b/packages/ai/tests/e2e/next-server/CHANGELOG.md
@@ -4,6 +4,125 @@
### Patch Changes
+- ai@4.3.15
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [a295521]
+ - ai@4.3.14
+
+## 0.0.1
+
+### Patch Changes
+
+- ai@4.3.13
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [1ed3755]
+- Updated dependencies [46cb332]
+ - ai@4.3.12
+
+## 0.0.1
+
+### Patch Changes
+
+- ai@4.3.11
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [0432959]
+ - ai@4.3.10
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [b69a253]
+ - ai@4.3.9
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [6e8a73b]
+ - ai@4.3.8
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [f4f3945]
+ - ai@4.3.7
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+- Updated dependencies [bd41167]
+ - ai@4.3.6
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [452bf12]
+ - ai@4.3.5
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - ai@4.3.4
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [3e88f4d]
+- Updated dependencies [c21fa6d]
+ - ai@4.3.3
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [665a567]
+ - ai@4.3.2
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [3d1bd38]
+ - ai@4.3.1
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [772a2d7]
+ - ai@4.3.0
+
+## 0.0.1
+
+### Patch Changes
+
+- Updated dependencies [c45d100]
+ - ai@4.2.11
+
+## 0.0.1
+
+### Patch Changes
+
- ai@4.2.10
## 0.0.1
diff --git a/packages/ai/util/consume-stream.ts b/packages/ai/util/consume-stream.ts
index b38ab2fbb16d..c540696d72e3 100644
--- a/packages/ai/util/consume-stream.ts
+++ b/packages/ai/util/consume-stream.ts
@@ -8,10 +8,22 @@
* @param {ReadableStream} stream - The ReadableStream to be consumed.
* @returns {Promise} A promise that resolves when the stream is fully consumed.
*/
-export async function consumeStream(stream: ReadableStream): Promise {
+export async function consumeStream({
+ stream,
+ onError,
+}: {
+ stream: ReadableStream;
+ onError?: (error: unknown) => void;
+}): Promise {
const reader = stream.getReader();
- while (true) {
- const { done } = await reader.read();
- if (done) break;
+ try {
+ while (true) {
+ const { done } = await reader.read();
+ if (done) break;
+ }
+ } catch (error) {
+ onError?.(error);
+ } finally {
+ reader.releaseLock();
}
}
diff --git a/packages/amazon-bedrock/CHANGELOG.md b/packages/amazon-bedrock/CHANGELOG.md
index 277c8e2d10bc..41da041089c8 100644
--- a/packages/amazon-bedrock/CHANGELOG.md
+++ b/packages/amazon-bedrock/CHANGELOG.md
@@ -1,5 +1,43 @@
# @ai-sdk/amazon-bedrock
+## 2.2.9
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 2.2.8
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 2.2.7
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 2.2.6
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 2.2.5
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 2.2.4
### Patch Changes
diff --git a/packages/amazon-bedrock/README.md b/packages/amazon-bedrock/README.md
index 30b259362df1..35636b0fb37f 100644
--- a/packages/amazon-bedrock/README.md
+++ b/packages/amazon-bedrock/README.md
@@ -1,6 +1,6 @@
# AI SDK - Amazon Bedrock Provider
-The **[Amazon Bedrock provider](https://sdk.vercel.ai/providers/ai-sdk-providers/amazon-bedrock)** for the [AI SDK](https://sdk.vercel.ai/docs)
+The **[Amazon Bedrock provider](https://ai-sdk.dev/providers/ai-sdk-providers/amazon-bedrock)** for the [AI SDK](https://ai-sdk.dev/docs)
contains language model support for the Amazon Bedrock [converse API](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html).
## Setup
@@ -33,4 +33,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Amazon Bedrock provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/amazon-bedrock)** for more information.
+Please check out the **[Amazon Bedrock provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/amazon-bedrock)** for more information.
diff --git a/packages/amazon-bedrock/package.json b/packages/amazon-bedrock/package.json
index be1ea856e756..dd201482096b 100644
--- a/packages/amazon-bedrock/package.json
+++ b/packages/amazon-bedrock/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/amazon-bedrock",
- "version": "2.2.4",
+ "version": "2.2.9",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,8 +30,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8",
"@smithy/eventstream-codec": "^4.0.1",
"@smithy/util-utf8": "^4.0.0",
"aws4fetch": "^1.0.20"
@@ -52,7 +52,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/amazon-bedrock/src/bedrock-provider.ts b/packages/amazon-bedrock/src/bedrock-provider.ts
index d5dfb12bb572..0317872038f8 100644
--- a/packages/amazon-bedrock/src/bedrock-provider.ts
+++ b/packages/amazon-bedrock/src/bedrock-provider.ts
@@ -40,6 +40,7 @@ The AWS region to use for the Bedrock provider. Defaults to the value of the
/**
The AWS access key ID to use for the Bedrock provider. Defaults to the value of the
+`AWS_ACCESS_KEY_ID` environment variable.
*/
accessKeyId?: string;
diff --git a/packages/anthropic/CHANGELOG.md b/packages/anthropic/CHANGELOG.md
index 689856753b14..e2f10b8cd9dc 100644
--- a/packages/anthropic/CHANGELOG.md
+++ b/packages/anthropic/CHANGELOG.md
@@ -1,5 +1,55 @@
# @ai-sdk/anthropic
+## 1.2.11
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.2.10
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.2.9
+
+### Patch Changes
+
+- aeba38e: Add support for URL-based PDF documents in the Anthropic provider
+
+## 1.2.8
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.2.7
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
+## 1.2.5
+
+### Patch Changes
+
+- 292f543: fix (provider/google-vertex): fix anthropic support for image urls in messages
+
## 1.2.4
### Patch Changes
diff --git a/packages/anthropic/README.md b/packages/anthropic/README.md
index f58253a19605..7dd0981172d7 100644
--- a/packages/anthropic/README.md
+++ b/packages/anthropic/README.md
@@ -1,6 +1,6 @@
# AI SDK - Anthropic Provider
-The **[Anthropic provider](https://sdk.vercel.ai/providers/ai-sdk-providers/anthropic)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the [Anthropic Messages API](https://docs.anthropic.com/claude/reference/messages_post).
+The **[Anthropic provider](https://ai-sdk.dev/providers/ai-sdk-providers/anthropic)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [Anthropic Messages API](https://docs.anthropic.com/claude/reference/messages_post).
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Anthropic provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/anthropic)** for more information.
+Please check out the **[Anthropic provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/anthropic)** for more information.
diff --git a/packages/anthropic/package.json b/packages/anthropic/package.json
index 1e2f2d5ad6da..3f1ca85d2376 100644
--- a/packages/anthropic/package.json
+++ b/packages/anthropic/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/anthropic",
- "version": "1.2.4",
+ "version": "1.2.11",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -37,8 +37,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -56,7 +56,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/anthropic/src/anthropic-api-types.ts b/packages/anthropic/src/anthropic-api-types.ts
index 205df3859b1c..0e2e0c67ac5c 100644
--- a/packages/anthropic/src/anthropic-api-types.ts
+++ b/packages/anthropic/src/anthropic-api-types.ts
@@ -48,28 +48,26 @@ export interface AnthropicRedactedThinkingContent {
cache_control: AnthropicCacheControl | undefined;
}
+type AnthropicContentSource =
+ | {
+ type: 'base64';
+ media_type: string;
+ data: string;
+ }
+ | {
+ type: 'url';
+ url: string;
+ };
+
export interface AnthropicImageContent {
type: 'image';
- source:
- | {
- type: 'base64';
- media_type: string;
- data: string;
- }
- | {
- type: 'url';
- url: string;
- };
+ source: AnthropicContentSource;
cache_control: AnthropicCacheControl | undefined;
}
export interface AnthropicDocumentContent {
type: 'document';
- source: {
- type: 'base64';
- media_type: 'application/pdf';
- data: string;
- };
+ source: AnthropicContentSource;
cache_control: AnthropicCacheControl | undefined;
}
diff --git a/packages/anthropic/src/anthropic-messages-language-model.ts b/packages/anthropic/src/anthropic-messages-language-model.ts
index ddc063a660dc..bf91030414d5 100644
--- a/packages/anthropic/src/anthropic-messages-language-model.ts
+++ b/packages/anthropic/src/anthropic-messages-language-model.ts
@@ -1,5 +1,4 @@
import {
- InvalidArgumentError,
LanguageModelV1,
LanguageModelV1CallWarning,
LanguageModelV1FinishReason,
@@ -33,6 +32,7 @@ type AnthropicMessagesConfig = {
provider: string;
baseURL: string;
headers: Resolvable>;
+ supportsImageUrls: boolean;
fetch?: FetchFunction;
buildRequestUrl?: (baseURL: string, isStreaming: boolean) => string;
transformRequestBody?: (args: Record) => Record;
@@ -41,7 +41,6 @@ type AnthropicMessagesConfig = {
export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
readonly specificationVersion = 'v1';
readonly defaultObjectGenerationMode = 'tool';
- readonly supportsImageUrls = true;
readonly modelId: AnthropicMessagesModelId;
readonly settings: AnthropicMessagesSettings;
@@ -58,10 +57,18 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
this.config = config;
}
+ supportsUrl(url: URL): boolean {
+ return url.protocol === 'https:';
+ }
+
get provider(): string {
return this.config.provider;
}
+ get supportsImageUrls(): boolean {
+ return this.config.supportsImageUrls;
+ }
+
private async getArgs({
mode,
prompt,
diff --git a/packages/anthropic/src/anthropic-provider.ts b/packages/anthropic/src/anthropic-provider.ts
index 82440c29e347..7a598e834f22 100644
--- a/packages/anthropic/src/anthropic-provider.ts
+++ b/packages/anthropic/src/anthropic-provider.ts
@@ -109,6 +109,7 @@ export function createAnthropic(
baseURL,
headers: getHeaders,
fetch: options.fetch,
+ supportsImageUrls: true,
});
const provider = function (
diff --git a/packages/anthropic/src/convert-to-anthropic-messages-prompt.test.ts b/packages/anthropic/src/convert-to-anthropic-messages-prompt.test.ts
index a0faf40e49e7..42fad0e4c3f9 100644
--- a/packages/anthropic/src/convert-to-anthropic-messages-prompt.test.ts
+++ b/packages/anthropic/src/convert-to-anthropic-messages-prompt.test.ts
@@ -123,7 +123,7 @@ describe('user messages', () => {
});
});
- it('should add PDF file parts', async () => {
+ it('should add PDF file parts for base64 PDFs', async () => {
const result = convertToAnthropicMessagesPrompt({
prompt: [
{
@@ -164,6 +164,46 @@ describe('user messages', () => {
});
});
+ it('should add PDF file parts for URL PDFs', async () => {
+ const result = convertToAnthropicMessagesPrompt({
+ prompt: [
+ {
+ role: 'user',
+ content: [
+ {
+ type: 'file',
+ data: new URL('https://example.com/document.pdf'),
+ mimeType: 'application/pdf',
+ },
+ ],
+ },
+ ],
+ sendReasoning: true,
+ warnings: [],
+ });
+
+ expect(result).toEqual({
+ prompt: {
+ messages: [
+ {
+ role: 'user',
+ content: [
+ {
+ type: 'document',
+ source: {
+ type: 'url',
+ url: 'https://example.com/document.pdf',
+ },
+ },
+ ],
+ },
+ ],
+ system: undefined,
+ },
+ betas: new Set(['pdfs-2024-09-25']),
+ });
+ });
+
it('should throw error for non-PDF file types', async () => {
expect(() =>
convertToAnthropicMessagesPrompt({
diff --git a/packages/anthropic/src/convert-to-anthropic-messages-prompt.ts b/packages/anthropic/src/convert-to-anthropic-messages-prompt.ts
index 731ad6478f1b..3a839f273387 100644
--- a/packages/anthropic/src/convert-to-anthropic-messages-prompt.ts
+++ b/packages/anthropic/src/convert-to-anthropic-messages-prompt.ts
@@ -121,13 +121,6 @@ export function convertToAnthropicMessagesPrompt({
}
case 'file': {
- if (part.data instanceof URL) {
- // The AI SDK automatically downloads files for user file parts with URLs
- throw new UnsupportedFunctionalityError({
- functionality: 'Image URLs in user messages',
- });
- }
-
if (part.mimeType !== 'application/pdf') {
throw new UnsupportedFunctionalityError({
functionality: 'Non-PDF files in user messages',
@@ -138,11 +131,17 @@ export function convertToAnthropicMessagesPrompt({
anthropicContent.push({
type: 'document',
- source: {
- type: 'base64',
- media_type: 'application/pdf',
- data: part.data,
- },
+ source:
+ part.data instanceof URL
+ ? {
+ type: 'url',
+ url: part.data.toString(),
+ }
+ : {
+ type: 'base64',
+ media_type: 'application/pdf',
+ data: part.data,
+ },
cache_control: cacheControl,
});
diff --git a/packages/assemblyai/CHANGELOG.md b/packages/assemblyai/CHANGELOG.md
new file mode 100644
index 000000000000..5170def83e7e
--- /dev/null
+++ b/packages/assemblyai/CHANGELOG.md
@@ -0,0 +1,7 @@
+# @ai-sdk/assemblyai
+
+## 0.0.1
+
+### Patch Changes
+
+- cb05e9c: feat(providers/assemblyai): add transcribe
diff --git a/packages/assemblyai/README.md b/packages/assemblyai/README.md
new file mode 100644
index 000000000000..e3c0604e6a05
--- /dev/null
+++ b/packages/assemblyai/README.md
@@ -0,0 +1,38 @@
+# AI SDK - AssemblyAI Provider
+
+The **[AssemblyAI provider](https://ai-sdk.dev/providers/ai-sdk-providers/assemblyai)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains transcription model support for the AssemblyAI transcription API.
+
+## Setup
+
+The AssemblyAI provider is available in the `@ai-sdk/assemblyai` module. You can install it with
+
+```bash
+npm i @ai-sdk/assemblyai
+```
+
+## Provider Instance
+
+You can import the default provider instance `assemblyai` from `@ai-sdk/assemblyai`:
+
+```ts
+import { assemblyai } from '@ai-sdk/assemblyai';
+```
+
+## Example
+
+```ts
+import { assemblyai } from '@ai-sdk/assemblyai';
+import { experimental_transcribe as transcribe } from 'ai';
+
+const { text } = await transcribe({
+ model: assemblyai.transcription('best'),
+ audio: new URL(
+ 'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-core/data/galileo.mp3',
+ ),
+});
+```
+
+## Documentation
+
+Please check out the **[AssemblyAI provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/assemblyai)** for more information.
diff --git a/packages/assemblyai/package.json b/packages/assemblyai/package.json
new file mode 100644
index 000000000000..6cb2bec2a4dd
--- /dev/null
+++ b/packages/assemblyai/package.json
@@ -0,0 +1,64 @@
+{
+ "name": "@ai-sdk/assemblyai",
+ "version": "0.0.1",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.2",
+ "@ai-sdk/provider-utils": "2.2.6"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/assemblyai/src/assemblyai-api-types.ts b/packages/assemblyai/src/assemblyai-api-types.ts
new file mode 100644
index 000000000000..6b5cb7c0d7ea
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-api-types.ts
@@ -0,0 +1,367 @@
+export type AssemblyAITranscriptionAPITypes = {
+ /**
+ * The URL of the audio or video file to transcribe.
+ */
+ audio_url: string;
+
+ /**
+ * The point in time, in milliseconds, to stop transcribing in your media file
+ */
+ audio_end_at?: number;
+
+ /**
+ * The point in time, in milliseconds, to begin transcribing in your media file
+ */
+ audio_start_from?: number;
+
+ /**
+ * Enable Auto Chapters, can be true or false
+ * @default false
+ */
+ auto_chapters?: boolean;
+
+ /**
+ * Enable Key Phrases, either true or false
+ * @default false
+ */
+ auto_highlights?: boolean;
+
+ /**
+ * How much to boost specified words
+ */
+ boost_param?: 'low' | 'default' | 'high';
+
+ /**
+ * Enable Content Moderation, can be true or false
+ * @default false
+ */
+ content_safety?: boolean;
+
+ /**
+ * The confidence threshold for the Content Moderation model. Values must be between 25 and 100.
+ * @default 50
+ */
+ content_safety_confidence?: number;
+
+ /**
+ * Customize how words are spelled and formatted using to and from values
+ */
+ custom_spelling?: Array<{
+ /**
+ * Words or phrases to replace
+ */
+ from: string[];
+ /**
+ * Word to replace with
+ */
+ to: string;
+ }>;
+
+ /**
+ * Transcribe Filler Words, like "umm", in your media file; can be true or false
+ * @default false
+ */
+ disfluencies?: boolean;
+
+ /**
+ * Enable Entity Detection, can be true or false
+ * @default false
+ */
+ entity_detection?: boolean;
+
+ /**
+ * Filter profanity from the transcribed text, can be true or false
+ * @default false
+ */
+ filter_profanity?: boolean;
+
+ /**
+ * Enable Text Formatting, can be true or false
+ * @default true
+ */
+ format_text?: boolean;
+
+ /**
+ * Enable Topic Detection, can be true or false
+ * @default false
+ */
+ iab_categories?: boolean;
+
+ /**
+ * The language of your audio file. Possible values are found in Supported Languages.
+ * @default 'en_us'
+ */
+ language_code?:
+ | 'en'
+ | 'en_au'
+ | 'en_uk'
+ | 'en_us'
+ | 'es'
+ | 'fr'
+ | 'de'
+ | 'it'
+ | 'pt'
+ | 'nl'
+ | 'af'
+ | 'sq'
+ | 'am'
+ | 'ar'
+ | 'hy'
+ | 'as'
+ | 'az'
+ | 'ba'
+ | 'eu'
+ | 'be'
+ | 'bn'
+ | 'bs'
+ | 'br'
+ | 'bg'
+ | 'my'
+ | 'ca'
+ | 'zh'
+ | 'hr'
+ | 'cs'
+ | 'da'
+ | 'et'
+ | 'fo'
+ | 'fi'
+ | 'gl'
+ | 'ka'
+ | 'el'
+ | 'gu'
+ | 'ht'
+ | 'ha'
+ | 'haw'
+ | 'he'
+ | 'hi'
+ | 'hu'
+ | 'is'
+ | 'id'
+ | 'ja'
+ | 'jw'
+ | 'kn'
+ | 'kk'
+ | 'km'
+ | 'ko'
+ | 'lo'
+ | 'la'
+ | 'lv'
+ | 'ln'
+ | 'lt'
+ | 'lb'
+ | 'mk'
+ | 'mg'
+ | 'ms'
+ | 'ml'
+ | 'mt'
+ | 'mi'
+ | 'mr'
+ | 'mn'
+ | 'ne'
+ | 'no'
+ | 'nn'
+ | 'oc'
+ | 'pa'
+ | 'ps'
+ | 'fa'
+ | 'pl'
+ | 'ro'
+ | 'ru'
+ | 'sa'
+ | 'sr'
+ | 'sn'
+ | 'sd'
+ | 'si'
+ | 'sk'
+ | 'sl'
+ | 'so'
+ | 'su'
+ | 'sw'
+ | 'sv'
+ | 'tl'
+ | 'tg'
+ | 'ta'
+ | 'tt'
+ | 'te'
+ | 'th'
+ | 'bo'
+ | 'tr'
+ | 'tk'
+ | 'uk'
+ | 'ur'
+ | 'uz'
+ | 'vi'
+ | 'cy'
+ | 'yi'
+ | 'yo';
+
+ /**
+ * The confidence threshold for the automatically detected language. An error will be returned if the language confidence is below this threshold.
+ * @default 0
+ */
+ language_confidence_threshold?: number;
+
+ /**
+ * Enable Automatic language detection, either true or false.
+ * @default false
+ */
+ language_detection?: boolean;
+
+ /**
+ * Enable Multichannel transcription, can be true or false.
+ * @default false
+ */
+ multichannel?: boolean;
+
+ /**
+ * Enable Automatic Punctuation, can be true or false
+ * @default true
+ */
+ punctuate?: boolean;
+
+ /**
+ * Redact PII from the transcribed text using the Redact PII model, can be true or false
+ * @default false
+ */
+ redact_pii?: boolean;
+
+ /**
+ * Generate a copy of the original media file with spoken PII "beeped" out, can be true or false.
+ * @default false
+ */
+ redact_pii_audio?: boolean;
+
+ /**
+ * Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav.
+ */
+ redact_pii_audio_quality?: 'mp3' | 'wav';
+
+ /**
+ * The list of PII Redaction policies to enable.
+ */
+ redact_pii_policies?: Array<
+ | 'account_number'
+ | 'banking_information'
+ | 'blood_type'
+ | 'credit_card_cvv'
+ | 'credit_card_expiration'
+ | 'credit_card_number'
+ | 'date'
+ | 'date_interval'
+ | 'date_of_birth'
+ | 'drivers_license'
+ | 'drug'
+ | 'duration'
+ | 'email_address'
+ | 'event'
+ | 'filename'
+ | 'gender_sexuality'
+ | 'healthcare_number'
+ | 'injury'
+ | 'ip_address'
+ | 'language'
+ | 'location'
+ | 'marital_status'
+ | 'medical_condition'
+ | 'medical_process'
+ | 'money_amount'
+ | 'nationality'
+ | 'number_sequence'
+ | 'occupation'
+ | 'organization'
+ | 'passport_number'
+ | 'password'
+ | 'person_age'
+ | 'person_name'
+ | 'phone_number'
+ | 'physical_attribute'
+ | 'political_affiliation'
+ | 'religion'
+ | 'statistics'
+ | 'time'
+ | 'url'
+ | 'us_social_security_number'
+ | 'username'
+ | 'vehicle_id'
+ | 'zodiac_sign'
+ >;
+
+ /**
+ * The replacement logic for detected PII, can be "entity_name" or "hash".
+ */
+ redact_pii_sub?: 'entity_name' | 'hash';
+
+ /**
+ * Enable Sentiment Analysis, can be true or false
+ * @default false
+ */
+ sentiment_analysis?: boolean;
+
+ /**
+ * Enable Speaker diarization, can be true or false
+ * @default false
+ */
+ speaker_labels?: boolean;
+
+ /**
+ * Tells the speaker label model how many speakers it should attempt to identify, up to 10.
+ */
+ speakers_expected?: number;
+
+ /**
+ * The speech model to use for the transcription.
+ */
+ speech_model?: 'best' | 'nano';
+
+ /**
+ * Reject audio files that contain less than this fraction of speech. Valid values are in the range [0, 1] inclusive.
+ */
+ speech_threshold?: number;
+
+ /**
+ * Enable Summarization, can be true or false
+ * @default false
+ */
+ summarization?: boolean;
+
+ /**
+ * The model to summarize the transcript
+ */
+ summary_model?: 'informative' | 'conversational' | 'catchy';
+
+ /**
+ * The type of summary
+ */
+ summary_type?:
+ | 'bullets'
+ | 'bullets_verbose'
+ | 'gist'
+ | 'headline'
+ | 'paragraph';
+
+ /**
+ * The list of custom topics
+ */
+ topics?: string[];
+
+ /**
+ * The header name to be sent with the transcript completed or failed webhook requests
+ */
+ webhook_auth_header_name?: string;
+
+ /**
+ * The header value to send back with the transcript completed or failed webhook requests for added security
+ */
+ webhook_auth_header_value?: string;
+
+ /**
+ * The URL to which we send webhook requests. We sends two different types of webhook requests.
+ * One request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.
+ */
+ webhook_url?: string;
+
+ /**
+ * The list of custom vocabulary to boost transcription probability for
+ */
+ word_boost?: string[];
+};
diff --git a/packages/assemblyai/src/assemblyai-config.ts b/packages/assemblyai/src/assemblyai-config.ts
new file mode 100644
index 000000000000..acd749e88832
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type AssemblyAIConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/assemblyai/src/assemblyai-error.test.ts b/packages/assemblyai/src/assemblyai-error.test.ts
new file mode 100644
index 000000000000..8dbb2a5c884e
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { assemblyaiErrorDataSchema } from './assemblyai-error';
+
+describe('assemblyaiErrorDataSchema', () => {
+ it('should parse AssemblyAI resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: assemblyaiErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/assemblyai/src/assemblyai-error.ts b/packages/assemblyai/src/assemblyai-error.ts
new file mode 100644
index 000000000000..a76e84f94f49
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const assemblyaiErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type AssemblyAIErrorData = z.infer;
+
+export const assemblyaiFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: assemblyaiErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/assemblyai/src/assemblyai-provider.ts b/packages/assemblyai/src/assemblyai-provider.ts
new file mode 100644
index 000000000000..e11d309b1e0f
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-provider.ts
@@ -0,0 +1,77 @@
+import { TranscriptionModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { AssemblyAITranscriptionModel } from './assemblyai-transcription-model';
+import { AssemblyAITranscriptionModelId } from './assemblyai-transcription-settings';
+
+export interface AssemblyAIProvider
+ extends Pick {
+ (
+ modelId: 'best',
+ settings?: {},
+ ): {
+ transcription: AssemblyAITranscriptionModel;
+ };
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: AssemblyAITranscriptionModelId): TranscriptionModelV1;
+}
+
+export interface AssemblyAIProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create an AssemblyAI provider instance.
+ */
+export function createAssemblyAI(
+ options: AssemblyAIProviderSettings = {},
+): AssemblyAIProvider {
+ const getHeaders = () => ({
+ authorization: loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'ASSEMBLYAI_API_KEY',
+ description: 'AssemblyAI',
+ }),
+ ...options.headers,
+ });
+
+ const createTranscriptionModel = (modelId: AssemblyAITranscriptionModelId) =>
+ new AssemblyAITranscriptionModel(modelId, {
+ provider: `assemblyai.transcription`,
+ url: ({ path }) => `https://api.assemblyai.com${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function (modelId: AssemblyAITranscriptionModelId) {
+ return {
+ transcription: createTranscriptionModel(modelId),
+ };
+ };
+
+ provider.transcription = createTranscriptionModel;
+ provider.transcriptionModel = createTranscriptionModel;
+
+ return provider as AssemblyAIProvider;
+}
+
+/**
+Default AssemblyAI provider instance.
+ */
+export const assemblyai = createAssemblyAI();
diff --git a/packages/assemblyai/src/assemblyai-transcription-model.test.ts b/packages/assemblyai/src/assemblyai-transcription-model.test.ts
new file mode 100644
index 000000000000..6191da41aeaf
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-transcription-model.test.ts
@@ -0,0 +1,350 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { AssemblyAITranscriptionModel } from './assemblyai-transcription-model';
+import { createAssemblyAI } from './assemblyai-provider';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createAssemblyAI({ apiKey: 'test-api-key' });
+const model = provider.transcription('best');
+
+const server = createTestServer({
+ 'https://api.assemblyai.com/v2/transcript': {},
+ 'https://api.assemblyai.com/v2/upload': {
+ response: {
+ type: 'json-value',
+ body: {
+ id: '9ea68fd3-f953-42c1-9742-976c447fb463',
+ upload_url: 'https://storage.assemblyai.com/mock-upload-url',
+ },
+ },
+ },
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ server.urls['https://api.assemblyai.com/v2/transcript'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ id: '9ea68fd3-f953-42c1-9742-976c447fb463',
+ audio_url: 'https://assembly.ai/test.mp3',
+ status: 'completed',
+ webhook_auth: true,
+ auto_highlights: true,
+ redact_pii: true,
+ summarization: true,
+ language_model: 'assemblyai_default',
+ acoustic_model: 'assemblyai_default',
+ language_code: 'en_us',
+ language_detection: true,
+ language_confidence_threshold: 0.7,
+ language_confidence: 0.9959,
+ speech_model: 'best',
+ text: 'Hello, world!',
+ words: [
+ {
+ confidence: 0.97465,
+ start: 250,
+ end: 650,
+ text: 'Hello,',
+ channel: 'channel',
+ speaker: 'speaker',
+ },
+ {
+ confidence: 0.99999,
+ start: 730,
+ end: 1022,
+ text: 'world',
+ channel: 'channel',
+ speaker: 'speaker',
+ },
+ ],
+ utterances: [
+ {
+ confidence: 0.9359033333333334,
+ start: 250,
+ end: 26950,
+ text: 'Hello, world!',
+ words: [
+ {
+ confidence: 0.97503,
+ start: 250,
+ end: 650,
+ text: 'Hello,',
+ speaker: 'A',
+ },
+ {
+ confidence: 0.99999,
+ start: 730,
+ end: 1022,
+ text: 'world',
+ speaker: 'A',
+ },
+ ],
+ speaker: 'A',
+ channel: 'channel',
+ },
+ ],
+ confidence: 0.9404651451800253,
+ audio_duration: 281,
+ punctuate: true,
+ format_text: true,
+ disfluencies: false,
+ multichannel: false,
+ audio_channels: 1,
+ webhook_url: 'https://your-webhook-url.tld/path',
+ webhook_status_code: 200,
+ webhook_auth_header_name: 'webhook-secret',
+ auto_highlights_result: {
+ status: 'success',
+ results: [
+ {
+ count: 1,
+ rank: 0.08,
+ text: 'Hello, world!',
+ timestamps: [
+ {
+ start: 250,
+ end: 26950,
+ },
+ ],
+ },
+ ],
+ },
+ audio_start_from: 10,
+ audio_end_at: 280,
+ word_boost: ['hello', 'world'],
+ boost_param: 'high',
+ filter_profanity: true,
+ redact_pii_audio: true,
+ redact_pii_audio_quality: 'mp3',
+ redact_pii_policies: [
+ 'us_social_security_number',
+ 'credit_card_number',
+ ],
+ redact_pii_sub: 'hash',
+ speaker_labels: true,
+ speakers_expected: 2,
+ content_safety: true,
+ content_safety_labels: {
+ status: 'success',
+ results: [
+ {
+ text: 'Hello, world!',
+ labels: [
+ {
+ label: 'disasters',
+ confidence: 0.8142836093902588,
+ severity: 0.4093044400215149,
+ },
+ ],
+ sentences_idx_start: 0,
+ sentences_idx_end: 5,
+ timestamp: {
+ start: 250,
+ end: 28840,
+ },
+ },
+ ],
+ summary: {
+ disasters: 0.9940800441842205,
+ health_issues: 0.9216489289040967,
+ },
+ severity_score_summary: {
+ disasters: {
+ low: 0.5733263024656846,
+ medium: 0.42667369753431533,
+ high: 0,
+ },
+ health_issues: {
+ low: 0.22863814977924785,
+ medium: 0.45014154926938227,
+ high: 0.32122030095136983,
+ },
+ },
+ },
+ iab_categories: true,
+ iab_categories_result: {
+ status: 'success',
+ results: [
+ {
+ text: 'Hello, world!',
+ labels: [
+ {
+ relevance: 0.988274097442627,
+ label: 'Home&Garden>IndoorEnvironmentalQuality',
+ },
+ {
+ relevance: 0.5821335911750793,
+ label: 'NewsAndPolitics>Weather',
+ },
+ ],
+ timestamp: {
+ start: 250,
+ end: 28840,
+ },
+ },
+ ],
+ summary: {
+ 'NewsAndPolitics>Weather': 1,
+ 'Home&Garden>IndoorEnvironmentalQuality': 0.9043831825256348,
+ },
+ },
+ auto_chapters: true,
+ chapters: [
+ {
+ gist: 'Hello, world!',
+ headline: 'Hello, world!',
+ summary: 'Hello, world!',
+ start: 250,
+ end: 28840,
+ },
+ {
+ gist: 'Hello, world!',
+ headline: 'Hello, world!',
+ summary: 'Hello, world!',
+ start: 29610,
+ end: 280340,
+ },
+ ],
+ summary_type: 'bullets',
+ summary_model: 'informative',
+ summary: '- Hello, world!',
+ topics: ['topics'],
+ sentiment_analysis: true,
+ entity_detection: true,
+ entities: [
+ {
+ entity_type: 'location',
+ text: 'Canada',
+ start: 2548,
+ end: 3130,
+ },
+ {
+ entity_type: 'location',
+ text: 'the US',
+ start: 5498,
+ end: 6382,
+ },
+ ],
+ speech_threshold: 0.5,
+ error: 'error',
+ dual_channel: false,
+ speed_boost: true,
+ custom_topics: true,
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[1].requestBody).toMatchObject({
+ audio_url: 'https://storage.assemblyai.com/mock-upload-url',
+ speech_model: 'best',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createAssemblyAI({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription('best').doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ authorization: 'test-api-key',
+ 'content-type': 'application/octet-stream',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Hello, world!');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new AssemblyAITranscriptionModel('best', {
+ provider: 'test-provider',
+ url: ({ path }) => `https://api.assemblyai.com${path}`,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'best',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new AssemblyAITranscriptionModel('best', {
+ provider: 'test-provider',
+ url: ({ path }) => `https://api.assemblyai.com${path}`,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('best');
+ });
+});
diff --git a/packages/assemblyai/src/assemblyai-transcription-model.ts b/packages/assemblyai/src/assemblyai-transcription-model.ts
new file mode 100644
index 000000000000..f616b4a7b282
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-transcription-model.ts
@@ -0,0 +1,354 @@
+import {
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ createJsonResponseHandler,
+ parseProviderOptions,
+ postJsonToApi,
+ postToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { AssemblyAIConfig } from './assemblyai-config';
+import { assemblyaiFailedResponseHandler } from './assemblyai-error';
+import { AssemblyAITranscriptionModelId } from './assemblyai-transcription-settings';
+import { AssemblyAITranscriptionAPITypes } from './assemblyai-api-types';
+
+// https://www.assemblyai.com/docs/api-reference/transcripts/submit
+const assemblyaiProviderOptionsSchema = z.object({
+ /**
+ * End time of the audio in milliseconds.
+ */
+ audioEndAt: z.number().int().nullish(),
+ /**
+ * Start time of the audio in milliseconds.
+ */
+ audioStartFrom: z.number().int().nullish(),
+ /**
+ * Whether to automatically generate chapters for the transcription.
+ */
+ autoChapters: z.boolean().nullish(),
+ /**
+ * Whether to automatically generate highlights for the transcription.
+ */
+ autoHighlights: z.boolean().nullish(),
+ /**
+ * Boost parameter for the transcription.
+ * Allowed values: 'low', 'default', 'high'.
+ */
+ boostParam: z.string().nullish(),
+ /**
+ * Whether to enable content safety filtering.
+ */
+ contentSafety: z.boolean().nullish(),
+ /**
+ * Confidence threshold for content safety filtering (25-100).
+ */
+ contentSafetyConfidence: z.number().int().min(25).max(100).nullish(),
+ /**
+ * Custom spelling rules for the transcription.
+ */
+ customSpelling: z
+ .array(
+ z.object({
+ from: z.array(z.string()),
+ to: z.string(),
+ }),
+ )
+ .nullish(),
+ /**
+ * Whether to include filler words (um, uh, etc.) in the transcription.
+ */
+ disfluencies: z.boolean().nullish(),
+ /**
+ * Whether to enable entity detection.
+ */
+ entityDetection: z.boolean().nullish(),
+ /**
+ * Whether to filter profanity from the transcription.
+ */
+ filterProfanity: z.boolean().nullish(),
+ /**
+ * Whether to format text with punctuation and capitalization.
+ */
+ formatText: z.boolean().nullish(),
+ /**
+ * Whether to enable IAB categories detection.
+ */
+ iabCategories: z.boolean().nullish(),
+ /**
+ * Language code for the transcription.
+ */
+ languageCode: z.union([z.literal('en'), z.string()]).nullish(),
+ /**
+ * Confidence threshold for language detection.
+ */
+ languageConfidenceThreshold: z.number().nullish(),
+ /**
+ * Whether to enable language detection.
+ */
+ languageDetection: z.boolean().nullish(),
+ /**
+ * Whether to process audio as multichannel.
+ */
+ multichannel: z.boolean().nullish(),
+ /**
+ * Whether to add punctuation to the transcription.
+ */
+ punctuate: z.boolean().nullish(),
+ /**
+ * Whether to redact personally identifiable information (PII).
+ */
+ redactPii: z.boolean().nullish(),
+ /**
+ * Whether to redact PII in the audio file.
+ */
+ redactPiiAudio: z.boolean().nullish(),
+ /**
+ * Audio format for PII redaction.
+ */
+ redactPiiAudioQuality: z.string().nullish(),
+ /**
+ * List of PII types to redact.
+ */
+ redactPiiPolicies: z.array(z.string()).nullish(),
+ /**
+ * Substitution method for redacted PII.
+ */
+ redactPiiSub: z.string().nullish(),
+ /**
+ * Whether to enable sentiment analysis.
+ */
+ sentimentAnalysis: z.boolean().nullish(),
+ /**
+ * Whether to identify different speakers in the audio.
+ */
+ speakerLabels: z.boolean().nullish(),
+ /**
+ * Number of speakers expected in the audio.
+ */
+ speakersExpected: z.number().int().nullish(),
+ /**
+ * Threshold for speech detection (0-1).
+ */
+ speechThreshold: z.number().min(0).max(1).nullish(),
+ /**
+ * Whether to generate a summary of the transcription.
+ */
+ summarization: z.boolean().nullish(),
+ /**
+ * Model to use for summarization.
+ */
+ summaryModel: z.string().nullish(),
+ /**
+ * Type of summary to generate.
+ */
+ summaryType: z.string().nullish(),
+ /**
+ * List of topics to identify in the transcription.
+ */
+ topics: z.array(z.string()).nullish(),
+ /**
+ * Name of the authentication header for webhook requests.
+ */
+ webhookAuthHeaderName: z.string().nullish(),
+ /**
+ * Value of the authentication header for webhook requests.
+ */
+ webhookAuthHeaderValue: z.string().nullish(),
+ /**
+ * URL to send webhook notifications to.
+ */
+ webhookUrl: z.string().nullish(),
+ /**
+ * List of words to boost recognition for.
+ */
+ wordBoost: z.array(z.string()).nullish(),
+});
+
+export type AssemblyAITranscriptionCallOptions = z.infer<
+ typeof assemblyaiProviderOptionsSchema
+>;
+
+interface AssemblyAITranscriptionModelConfig extends AssemblyAIConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class AssemblyAITranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: AssemblyAITranscriptionModelId,
+ private readonly config: AssemblyAITranscriptionModelConfig,
+ ) {}
+
+ private async getArgs({
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const assemblyaiOptions = parseProviderOptions({
+ provider: 'assemblyai',
+ providerOptions,
+ schema: assemblyaiProviderOptionsSchema,
+ });
+
+ const body: Omit = {
+ speech_model: this.modelId,
+ };
+
+ // Add provider-specific options
+ if (assemblyaiOptions) {
+ body.audio_end_at = assemblyaiOptions.audioEndAt ?? undefined;
+ body.audio_start_from = assemblyaiOptions.audioStartFrom ?? undefined;
+ body.auto_chapters = assemblyaiOptions.autoChapters ?? undefined;
+ body.auto_highlights = assemblyaiOptions.autoHighlights ?? undefined;
+ body.boost_param = (assemblyaiOptions.boostParam as never) ?? undefined;
+ body.content_safety = assemblyaiOptions.contentSafety ?? undefined;
+ body.content_safety_confidence =
+ assemblyaiOptions.contentSafetyConfidence ?? undefined;
+ body.custom_spelling =
+ (assemblyaiOptions.customSpelling as never) ?? undefined;
+ body.disfluencies = assemblyaiOptions.disfluencies ?? undefined;
+ body.entity_detection = assemblyaiOptions.entityDetection ?? undefined;
+ body.filter_profanity = assemblyaiOptions.filterProfanity ?? undefined;
+ body.format_text = assemblyaiOptions.formatText ?? undefined;
+ body.iab_categories = assemblyaiOptions.iabCategories ?? undefined;
+ body.language_code =
+ (assemblyaiOptions.languageCode as never) ?? undefined;
+ body.language_confidence_threshold =
+ assemblyaiOptions.languageConfidenceThreshold ?? undefined;
+ body.language_detection =
+ assemblyaiOptions.languageDetection ?? undefined;
+ body.multichannel = assemblyaiOptions.multichannel ?? undefined;
+ body.punctuate = assemblyaiOptions.punctuate ?? undefined;
+ body.redact_pii = assemblyaiOptions.redactPii ?? undefined;
+ body.redact_pii_audio = assemblyaiOptions.redactPiiAudio ?? undefined;
+ body.redact_pii_audio_quality =
+ (assemblyaiOptions.redactPiiAudioQuality as never) ?? undefined;
+ body.redact_pii_policies =
+ (assemblyaiOptions.redactPiiPolicies as never) ?? undefined;
+ body.redact_pii_sub =
+ (assemblyaiOptions.redactPiiSub as never) ?? undefined;
+ body.sentiment_analysis =
+ assemblyaiOptions.sentimentAnalysis ?? undefined;
+ body.speaker_labels = assemblyaiOptions.speakerLabels ?? undefined;
+ body.speakers_expected = assemblyaiOptions.speakersExpected ?? undefined;
+ body.speech_threshold = assemblyaiOptions.speechThreshold ?? undefined;
+ body.summarization = assemblyaiOptions.summarization ?? undefined;
+ body.summary_model =
+ (assemblyaiOptions.summaryModel as never) ?? undefined;
+ body.summary_type = (assemblyaiOptions.summaryType as never) ?? undefined;
+ body.topics = assemblyaiOptions.topics ?? undefined;
+ body.webhook_auth_header_name =
+ assemblyaiOptions.webhookAuthHeaderName ?? undefined;
+ body.webhook_auth_header_value =
+ assemblyaiOptions.webhookAuthHeaderValue ?? undefined;
+ body.webhook_url = assemblyaiOptions.webhookUrl ?? undefined;
+ body.word_boost = assemblyaiOptions.wordBoost ?? undefined;
+ }
+
+ return {
+ body,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+
+ const { value: uploadResponse } = await postToApi({
+ url: this.config.url({
+ path: '/v2/upload',
+ modelId: '',
+ }),
+ headers: {
+ 'Content-Type': 'application/octet-stream',
+ ...combineHeaders(this.config.headers(), options.headers),
+ },
+ body: {
+ content: options.audio,
+ values: options.audio,
+ },
+ failedResponseHandler: assemblyaiFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ assemblyaiUploadResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ const { body, warnings } = await this.getArgs(options);
+
+ const {
+ value: response,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postJsonToApi({
+ url: this.config.url({
+ path: '/v2/transcript',
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ body: {
+ ...body,
+ audio_url: uploadResponse.upload_url,
+ },
+ failedResponseHandler: assemblyaiFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ assemblyaiTranscriptionResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ text: response.text ?? '',
+ segments:
+ response.words?.map(word => ({
+ text: word.text,
+ startSecond: word.start,
+ endSecond: word.end,
+ })) ?? [],
+ language: response.language_code ?? undefined,
+ durationInSeconds:
+ response.audio_duration ?? response.words?.at(-1)?.end ?? undefined,
+ warnings,
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
+
+const assemblyaiUploadResponseSchema = z.object({
+ upload_url: z.string(),
+});
+
+const assemblyaiTranscriptionResponseSchema = z.object({
+ text: z.string().nullish(),
+ language_code: z.string().nullish(),
+ words: z
+ .array(
+ z.object({
+ start: z.number(),
+ end: z.number(),
+ text: z.string(),
+ }),
+ )
+ .nullish(),
+ audio_duration: z.number().nullish(),
+});
diff --git a/packages/assemblyai/src/assemblyai-transcription-settings.ts b/packages/assemblyai/src/assemblyai-transcription-settings.ts
new file mode 100644
index 000000000000..f83e8d2e297a
--- /dev/null
+++ b/packages/assemblyai/src/assemblyai-transcription-settings.ts
@@ -0,0 +1 @@
+export type AssemblyAITranscriptionModelId = 'best' | 'nano';
diff --git a/packages/assemblyai/src/index.ts b/packages/assemblyai/src/index.ts
new file mode 100644
index 000000000000..6c90fa17c72b
--- /dev/null
+++ b/packages/assemblyai/src/index.ts
@@ -0,0 +1,5 @@
+export { createAssemblyAI, assemblyai } from './assemblyai-provider';
+export type {
+ AssemblyAIProvider,
+ AssemblyAIProviderSettings,
+} from './assemblyai-provider';
diff --git a/packages/assemblyai/src/transcript-test.mp3 b/packages/assemblyai/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/assemblyai/src/transcript-test.mp3 differ
diff --git a/packages/assemblyai/tsconfig.json b/packages/assemblyai/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/assemblyai/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/assemblyai/tsup.config.ts b/packages/assemblyai/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/assemblyai/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/assemblyai/turbo.json b/packages/assemblyai/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/assemblyai/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/assemblyai/vitest.edge.config.js b/packages/assemblyai/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/assemblyai/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/assemblyai/vitest.node.config.js b/packages/assemblyai/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/assemblyai/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/azure/CHANGELOG.md b/packages/azure/CHANGELOG.md
index c2a5f87fff91..4c2022bcd07f 100644
--- a/packages/azure/CHANGELOG.md
+++ b/packages/azure/CHANGELOG.md
@@ -1,5 +1,134 @@
# @ai-sdk/azure
+## 1.3.23
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/openai@1.3.22
+
+## 1.3.22
+
+### Patch Changes
+
+- Updated dependencies [5caac29]
+ - @ai-sdk/openai@1.3.21
+
+## 1.3.21
+
+### Patch Changes
+
+- Updated dependencies [dd5450e]
+ - @ai-sdk/openai@1.3.20
+
+## 1.3.20
+
+### Patch Changes
+
+- Updated dependencies [3cabda9]
+ - @ai-sdk/openai@1.3.19
+
+## 1.3.19
+
+### Patch Changes
+
+- Updated dependencies [74cd391]
+ - @ai-sdk/openai@1.3.18
+
+## 1.3.18
+
+### Patch Changes
+
+- Updated dependencies [ca7bce3]
+ - @ai-sdk/openai@1.3.17
+
+## 1.3.17
+
+### Patch Changes
+
+- Updated dependencies [bd6e457]
+ - @ai-sdk/openai@1.3.16
+
+## 1.3.16
+
+### Patch Changes
+
+- Updated dependencies [98d954e]
+ - @ai-sdk/openai@1.3.15
+
+## 1.3.15
+
+### Patch Changes
+
+- Updated dependencies [980141c]
+ - @ai-sdk/openai@1.3.14
+
+## 1.3.14
+
+### Patch Changes
+
+- Updated dependencies [75b9849]
+ - @ai-sdk/openai@1.3.13
+
+## 1.3.13
+
+### Patch Changes
+
+- Updated dependencies [575339f]
+ - @ai-sdk/openai@1.3.12
+
+## 1.3.12
+
+### Patch Changes
+
+- 3c26c55: feat(providers/azure): add transcribe
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/openai@1.3.11
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.3.11
+
+### Patch Changes
+
+- Updated dependencies [dbe53e7]
+- Updated dependencies [84ffaba]
+ - @ai-sdk/openai@1.3.10
+
+## 1.3.10
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/openai@1.3.9
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.3.9
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/openai@1.3.8
+
+## 1.3.8
+
+### Patch Changes
+
+- e82024e: feat (provider/azure): add OpenAI responses API support
+
+## 1.3.7
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/openai@1.3.7
+
## 1.3.6
### Patch Changes
diff --git a/packages/azure/README.md b/packages/azure/README.md
index 665826ad0bf4..df8c72e818e4 100644
--- a/packages/azure/README.md
+++ b/packages/azure/README.md
@@ -1,6 +1,6 @@
# AI SDK - Azure OpenAI Provider
-The **[Azure provider](https://sdk.vercel.ai/providers/ai-sdk-providers/azure)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the Azure OpenAI API.
+The **[Azure provider](https://ai-sdk.dev/providers/ai-sdk-providers/azure)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the Azure OpenAI API.
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Azure provider](https://sdk.vercel.ai/providers/ai-sdk-providers/azure)** for more information.
+Please check out the **[Azure provider](https://ai-sdk.dev/providers/ai-sdk-providers/azure)** for more information.
diff --git a/packages/azure/package.json b/packages/azure/package.json
index ff2d92b8130f..1605a69dde9b 100644
--- a/packages/azure/package.json
+++ b/packages/azure/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/azure",
- "version": "1.3.6",
+ "version": "1.3.23",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -31,9 +31,9 @@
}
},
"dependencies": {
- "@ai-sdk/openai": "1.3.6",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/openai": "1.3.22",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -51,7 +51,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/azure/src/azure-openai-provider.test.ts b/packages/azure/src/azure-openai-provider.test.ts
index 4958b07d5981..7c926e551bcc 100644
--- a/packages/azure/src/azure-openai-provider.test.ts
+++ b/packages/azure/src/azure-openai-provider.test.ts
@@ -29,6 +29,7 @@ const server = createTestServer({
{},
'https://test-resource.openai.azure.com/openai/deployments/dalle-deployment/images/generations':
{},
+ 'https://test-resource.openai.azure.com/openai/responses': {},
});
describe('chat', () => {
@@ -74,7 +75,7 @@ describe('chat', () => {
expect(
server.calls[0].requestUrlSearchParams.get('api-version'),
- ).toStrictEqual('2024-10-01-preview');
+ ).toStrictEqual('2025-03-01-preview');
});
it('should set the correct modified api version', async () => {
@@ -132,9 +133,8 @@ describe('chat', () => {
mode: { type: 'regular' },
prompt: TEST_PROMPT,
});
-
expect(server.calls[0].requestUrl).toStrictEqual(
- 'https://test-resource.openai.azure.com/openai/deployments/test-deployment/chat/completions?api-version=2024-10-01-preview',
+ 'https://test-resource.openai.azure.com/openai/deployments/test-deployment/chat/completions?api-version=2025-03-01-preview',
);
});
});
@@ -195,10 +195,9 @@ describe('completion', () => {
mode: { type: 'regular' },
prompt: TEST_PROMPT,
});
-
expect(
server.calls[0].requestUrlSearchParams.get('api-version'),
- ).toStrictEqual('2024-10-01-preview');
+ ).toStrictEqual('2025-03-01-preview');
});
it('should pass headers', async () => {
@@ -269,10 +268,9 @@ describe('embedding', () => {
await model.doEmbed({
values: testValues,
});
-
expect(
server.calls[0].requestUrlSearchParams.get('api-version'),
- ).toStrictEqual('2024-10-01-preview');
+ ).toStrictEqual('2025-03-01-preview');
});
it('should pass headers', async () => {
@@ -342,7 +340,7 @@ describe('image', () => {
expect(
server.calls[0].requestUrlSearchParams.get('api-version'),
- ).toStrictEqual('2024-10-01-preview');
+ ).toStrictEqual('2025-03-01-preview');
});
it('should set the correct modified api version', async () => {
@@ -413,7 +411,7 @@ describe('image', () => {
});
expect(server.calls[0].requestUrl).toStrictEqual(
- 'https://test-resource.openai.azure.com/openai/deployments/dalle-deployment/images/generations?api-version=2024-10-01-preview',
+ 'https://test-resource.openai.azure.com/openai/deployments/dalle-deployment/images/generations?api-version=2025-03-01-preview',
);
});
@@ -465,3 +463,107 @@ describe('image', () => {
});
});
});
+
+describe('responses', () => {
+ describe('doGenerate', () => {
+ function prepareJsonResponse({
+ content = '',
+ usage = {
+ input_tokens: 4,
+ output_tokens: 30,
+ total_tokens: 34,
+ },
+ } = {}) {
+ server.urls[
+ 'https://test-resource.openai.azure.com/openai/responses'
+ ].response = {
+ type: 'json-value',
+ body: {
+ id: 'resp_67c97c0203188190a025beb4a75242bc',
+ object: 'response',
+ created_at: 1741257730,
+ status: 'completed',
+ model: 'test-deployment',
+ output: [
+ {
+ id: 'msg_67c97c02656c81908e080dfdf4a03cd1',
+ type: 'message',
+ status: 'completed',
+ role: 'assistant',
+ content: [
+ {
+ type: 'output_text',
+ text: content,
+ annotations: [],
+ },
+ ],
+ },
+ ],
+ usage,
+ incomplete_details: null,
+ },
+ };
+ }
+
+ it('should set the correct api version', async () => {
+ prepareJsonResponse();
+
+ await provider.responses('test-deployment').doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(
+ server.calls[0].requestUrlSearchParams.get('api-version'),
+ ).toStrictEqual('2025-03-01-preview');
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createAzure({
+ resourceName: 'test-resource',
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.responses('test-deployment').doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toStrictEqual({
+ 'api-key': 'test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should use the baseURL correctly', async () => {
+ prepareJsonResponse();
+
+ const provider = createAzure({
+ baseURL: 'https://test-resource.openai.azure.com/openai',
+ apiKey: 'test-api-key',
+ });
+
+ await provider.responses('test-deployment').doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(server.calls[0].requestUrl).toStrictEqual(
+ 'https://test-resource.openai.azure.com/openai/responses?api-version=2025-03-01-preview',
+ );
+ });
+ });
+});
diff --git a/packages/azure/src/azure-openai-provider.ts b/packages/azure/src/azure-openai-provider.ts
index 8d51750f0727..7b62c430078e 100644
--- a/packages/azure/src/azure-openai-provider.ts
+++ b/packages/azure/src/azure-openai-provider.ts
@@ -7,12 +7,15 @@ import {
OpenAIEmbeddingSettings,
OpenAIImageModel,
OpenAIImageSettings,
+ OpenAIResponsesLanguageModel,
+ OpenAITranscriptionModel,
} from '@ai-sdk/openai/internal';
import {
EmbeddingModelV1,
LanguageModelV1,
ProviderV1,
ImageModelV1,
+ TranscriptionModelV1,
} from '@ai-sdk/provider';
import { FetchFunction, loadApiKey, loadSetting } from '@ai-sdk/provider-utils';
@@ -32,6 +35,11 @@ Creates an Azure OpenAI chat model for text generation.
*/
chat(deploymentId: string, settings?: OpenAIChatSettings): LanguageModelV1;
+ /**
+Creates an Azure OpenAI responses API model for text generation.
+ */
+ responses(deploymentId: string): LanguageModelV1;
+
/**
Creates an Azure OpenAI completion model for text generation.
*/
@@ -77,6 +85,11 @@ Creates an Azure OpenAI model for text embeddings.
deploymentId: string,
settings?: OpenAIEmbeddingSettings,
): EmbeddingModelV1;
+
+ /**
+ * Creates an Azure OpenAI model for audio transcription.
+ */
+ transcription(deploymentId: string): TranscriptionModelV1;
}
export interface AzureOpenAIProviderSettings {
@@ -140,11 +153,19 @@ export function createAzure(
description: 'Azure OpenAI resource name',
});
- const apiVersion = options.apiVersion ?? '2024-10-01-preview';
- const url = ({ path, modelId }: { path: string; modelId: string }) =>
- options.baseURL
+ const apiVersion = options.apiVersion ?? '2025-03-01-preview';
+ const url = ({ path, modelId }: { path: string; modelId: string }) => {
+ if (path === '/responses') {
+ return options.baseURL
+ ? `${options.baseURL}${path}?api-version=${apiVersion}`
+ : `https://${getResourceName()}.openai.azure.com/openai/responses?api-version=${apiVersion}`;
+ }
+
+ // Default URL format for other endpoints
+ return options.baseURL
? `${options.baseURL}/${modelId}${path}?api-version=${apiVersion}`
: `https://${getResourceName()}.openai.azure.com/openai/deployments/${modelId}${path}?api-version=${apiVersion}`;
+ };
const createChatModel = (
deploymentName: string,
@@ -181,6 +202,14 @@ export function createAzure(
fetch: options.fetch,
});
+ const createResponsesModel = (modelId: string) =>
+ new OpenAIResponsesLanguageModel(modelId, {
+ provider: 'azure-openai.responses',
+ url,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
const createImageModel = (
modelId: string,
settings: OpenAIImageSettings = {},
@@ -192,6 +221,14 @@ export function createAzure(
fetch: options.fetch,
});
+ const createTranscriptionModel = (modelId: string) =>
+ new OpenAITranscriptionModel(modelId, {
+ provider: 'azure-openai.transcription',
+ url,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
const provider = function (
deploymentId: string,
settings?: OpenAIChatSettings | OpenAICompletionSettings,
@@ -213,7 +250,8 @@ export function createAzure(
provider.imageModel = createImageModel;
provider.textEmbedding = createEmbeddingModel;
provider.textEmbeddingModel = createEmbeddingModel;
-
+ provider.responses = createResponsesModel;
+ provider.transcription = createTranscriptionModel;
return provider;
}
diff --git a/packages/cerebras/CHANGELOG.md b/packages/cerebras/CHANGELOG.md
index 45fe1286eeb7..8c3057a9aa6b 100644
--- a/packages/cerebras/CHANGELOG.md
+++ b/packages/cerebras/CHANGELOG.md
@@ -1,5 +1,76 @@
# @ai-sdk/cerebras
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/openai-compatible@0.2.14
+
+## 0.2.13
+
+### Patch Changes
+
+- Updated dependencies [23571c9]
+ - @ai-sdk/openai-compatible@0.2.13
+
+## 0.2.12
+
+### Patch Changes
+
+- Updated dependencies [13492fe]
+ - @ai-sdk/openai-compatible@0.2.12
+
+## 0.2.11
+
+### Patch Changes
+
+- Updated dependencies [b5c9cd4]
+ - @ai-sdk/openai-compatible@0.2.11
+
+## 0.2.10
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/openai-compatible@0.2.10
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.2.9
+
+### Patch Changes
+
+- Updated dependencies [1bbc698]
+ - @ai-sdk/openai-compatible@0.2.9
+
+## 0.2.8
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/openai-compatible@0.2.8
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.2.7
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/openai-compatible@0.2.7
+
+## 0.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/openai-compatible@0.2.6
+
## 0.2.5
### Patch Changes
diff --git a/packages/cerebras/README.md b/packages/cerebras/README.md
index 1bc1b9b776a2..224b921f9be5 100644
--- a/packages/cerebras/README.md
+++ b/packages/cerebras/README.md
@@ -1,6 +1,6 @@
# AI SDK - Cerebras Provider
-The **Cerebras provider** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for [Cerebras](https://cerebras.ai), offering high-speed AI model inference powered by Cerebras Wafer-Scale Engines and CS-3 systems.
+The **Cerebras provider** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for [Cerebras](https://cerebras.ai), offering high-speed AI model inference powered by Cerebras Wafer-Scale Engines and CS-3 systems.
## Setup
diff --git a/packages/cerebras/package.json b/packages/cerebras/package.json
index 870a0dbcea4b..4c732995245c 100644
--- a/packages/cerebras/package.json
+++ b/packages/cerebras/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/cerebras",
- "version": "0.2.5",
+ "version": "0.2.14",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,9 +30,9 @@
}
},
"dependencies": {
- "@ai-sdk/openai-compatible": "0.2.5",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/openai-compatible": "0.2.14",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/codemod/package.json b/packages/codemod/package.json
index 1156e2d0b649..f88a215c075b 100644
--- a/packages/codemod/package.json
+++ b/packages/codemod/package.json
@@ -40,7 +40,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/codemod/src/codemods/remove-ai-stream-methods-from-stream-text-result.ts b/packages/codemod/src/codemods/remove-ai-stream-methods-from-stream-text-result.ts
index 6864af1cfa6d..0baa5b84481d 100644
--- a/packages/codemod/src/codemods/remove-ai-stream-methods-from-stream-text-result.ts
+++ b/packages/codemod/src/codemods/remove-ai-stream-methods-from-stream-text-result.ts
@@ -23,7 +23,7 @@ export default createTransformer((fileInfo, api, options, context) => {
// Add block comment above the statement
const comment = j.commentBlock(
` WARNING: ${path.node.property.name} has been removed from streamText.\n` +
- ` See migration guide at https://sdk.vercel.ai/docs/migrations `,
+ ` See migration guide at https://ai-sdk.dev/docs/migration-guides `,
true, // leading
false, // trailing
);
diff --git a/packages/codemod/src/test/__testfixtures__/remove-ai-stream-methods-from-stream-text-result.output.ts b/packages/codemod/src/test/__testfixtures__/remove-ai-stream-methods-from-stream-text-result.output.ts
index fc0d15529671..3d828f4960e6 100644
--- a/packages/codemod/src/test/__testfixtures__/remove-ai-stream-methods-from-stream-text-result.output.ts
+++ b/packages/codemod/src/test/__testfixtures__/remove-ai-stream-methods-from-stream-text-result.output.ts
@@ -8,12 +8,12 @@ async function handler(req, res) {
});
const /* WARNING: toAIStream has been removed from streamText.
- See migration guide at https://sdk.vercel.ai/docs/migrations */
+ See migration guide at https://ai-sdk.dev/docs/migration-guides */
aiStream = stream.toAIStream();
/* WARNING: pipeAIStreamToResponse has been removed from streamText.
- See migration guide at https://sdk.vercel.ai/docs/migrations */
+ See migration guide at https://ai-sdk.dev/docs/migration-guides */
stream.pipeAIStreamToResponse(res);
/* WARNING: toAIStreamResponse has been removed from streamText.
- See migration guide at https://sdk.vercel.ai/docs/migrations */
+ See migration guide at https://ai-sdk.dev/docs/migration-guides */
return stream.toAIStreamResponse();
}
diff --git a/packages/cohere/CHANGELOG.md b/packages/cohere/CHANGELOG.md
index 1270637b8e16..b019aedc98db 100644
--- a/packages/cohere/CHANGELOG.md
+++ b/packages/cohere/CHANGELOG.md
@@ -1,5 +1,49 @@
# @ai-sdk/cohere
+## 1.2.10
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.2.9
+
+### Patch Changes
+
+- 033f445: fix (provider/cohere): tool calling
+
+## 1.2.8
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.2.7
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.2.6
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.2.5
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 1.2.4
### Patch Changes
diff --git a/packages/cohere/README.md b/packages/cohere/README.md
index 8c31e7d41632..fcc6068f035d 100644
--- a/packages/cohere/README.md
+++ b/packages/cohere/README.md
@@ -1,6 +1,6 @@
# AI SDK - Cohere Provider
-The **[Cohere provider](https://sdk.vercel.ai/providers/ai-sdk-providers/cohere)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the Cohere API.
+The **[Cohere provider](https://ai-sdk.dev/providers/ai-sdk-providers/cohere)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the Cohere API.
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Cohere provider](https://sdk.vercel.ai/providers/ai-sdk-providers/cohere)** for more information.
+Please check out the **[Cohere provider](https://ai-sdk.dev/providers/ai-sdk-providers/cohere)** for more information.
diff --git a/packages/cohere/package.json b/packages/cohere/package.json
index 504eafbf1ca2..b43b43817e40 100644
--- a/packages/cohere/package.json
+++ b/packages/cohere/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/cohere",
- "version": "1.2.4",
+ "version": "1.2.10",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -31,8 +31,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/cohere/src/convert-to-cohere-chat-prompt.test.ts b/packages/cohere/src/convert-to-cohere-chat-prompt.test.ts
index f0235e2a518e..6329a107bb44 100644
--- a/packages/cohere/src/convert-to-cohere-chat-prompt.test.ts
+++ b/packages/cohere/src/convert-to-cohere-chat-prompt.test.ts
@@ -22,7 +22,7 @@ describe('tool messages', () => {
expect(result).toEqual([
{
- content: 'Calling a tool',
+ content: undefined,
role: 'assistant',
tool_calls: [
{
diff --git a/packages/cohere/src/convert-to-cohere-chat-prompt.ts b/packages/cohere/src/convert-to-cohere-chat-prompt.ts
index d6db681e6aee..01dbdbc5457e 100644
--- a/packages/cohere/src/convert-to-cohere-chat-prompt.ts
+++ b/packages/cohere/src/convert-to-cohere-chat-prompt.ts
@@ -63,10 +63,7 @@ export function convertToCohereChatPrompt(
messages.push({
role: 'assistant',
- // note: this is a workaround for a Cohere API bug
- // that requires content to be provided
- // even if there are tool calls
- content: text !== '' ? text : 'call tool',
+ content: toolCalls.length > 0 ? undefined : text,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
tool_plan: undefined,
});
diff --git a/packages/deepgram/CHANGELOG.md b/packages/deepgram/CHANGELOG.md
new file mode 100644
index 000000000000..81e6c8af8286
--- /dev/null
+++ b/packages/deepgram/CHANGELOG.md
@@ -0,0 +1,7 @@
+# @ai-sdk/deepgram
+
+## 0.0.1
+
+### Patch Changes
+
+- 3eeb27f: feat(providers/deepgram): add transcribe
diff --git a/packages/deepgram/README.md b/packages/deepgram/README.md
new file mode 100644
index 000000000000..3e5f23a5d365
--- /dev/null
+++ b/packages/deepgram/README.md
@@ -0,0 +1,38 @@
+# AI SDK - Deepgram Provider
+
+The **[Deepgram provider](https://ai-sdk.dev/providers/ai-sdk-providers/deepgram)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains transcription model support for the Deepgram transcription API.
+
+## Setup
+
+The Deepgram provider is available in the `@ai-sdk/deepgram` module. You can install it with
+
+```bash
+npm i @ai-sdk/deepgram
+```
+
+## Provider Instance
+
+You can import the default provider instance `deepgram` from `@ai-sdk/deepgram`:
+
+```ts
+import { deepgram } from '@ai-sdk/deepgram';
+```
+
+## Example
+
+```ts
+import { deepgram } from '@ai-sdk/deepgram';
+import { experimental_transcribe as transcribe } from 'ai';
+
+const { text } = await transcribe({
+ model: deepgram.transcription('nova-3'),
+ audio: new URL(
+ 'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-core/data/galileo.mp3',
+ ),
+});
+```
+
+## Documentation
+
+Please check out the **[Deepgram provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/deepgram)** for more information.
diff --git a/packages/deepgram/package.json b/packages/deepgram/package.json
new file mode 100644
index 000000000000..3e8f91465fb5
--- /dev/null
+++ b/packages/deepgram/package.json
@@ -0,0 +1,64 @@
+{
+ "name": "@ai-sdk/deepgram",
+ "version": "0.0.1",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.2",
+ "@ai-sdk/provider-utils": "2.2.6"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/deepgram/src/deepgram-api-types.ts b/packages/deepgram/src/deepgram-api-types.ts
new file mode 100644
index 000000000000..e4f00b3c7296
--- /dev/null
+++ b/packages/deepgram/src/deepgram-api-types.ts
@@ -0,0 +1,35 @@
+export type DeepgramTranscriptionAPITypes = {
+ // Base parameters
+ language?: string;
+ model?: string;
+
+ // Formatting options
+ smart_format?: boolean;
+ punctuate?: boolean;
+ paragraphs?: boolean;
+
+ // Summarization and analysis
+ summarize?: 'v2' | false;
+ topics?: boolean;
+ intents?: boolean;
+ sentiment?: boolean;
+
+ // Entity detection
+ detect_entities?: boolean;
+
+ // Redaction options
+ redact?: string | string[];
+ replace?: string;
+
+ // Search and keywords
+ search?: string;
+ keyterm?: string;
+
+ // Speaker-related features
+ diarize?: boolean;
+ utterances?: boolean;
+ utt_split?: number;
+
+ // Miscellaneous
+ filler_words?: boolean;
+};
diff --git a/packages/deepgram/src/deepgram-config.ts b/packages/deepgram/src/deepgram-config.ts
new file mode 100644
index 000000000000..f4e800c9a74b
--- /dev/null
+++ b/packages/deepgram/src/deepgram-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type DeepgramConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/deepgram/src/deepgram-error.test.ts b/packages/deepgram/src/deepgram-error.test.ts
new file mode 100644
index 000000000000..6c1b7ee2e223
--- /dev/null
+++ b/packages/deepgram/src/deepgram-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { deepgramErrorDataSchema } from './deepgram-error';
+
+describe('deepgramErrorDataSchema', () => {
+ it('should parse Deepgram resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: deepgramErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/deepgram/src/deepgram-error.ts b/packages/deepgram/src/deepgram-error.ts
new file mode 100644
index 000000000000..e5607f1259cc
--- /dev/null
+++ b/packages/deepgram/src/deepgram-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const deepgramErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type DeepgramErrorData = z.infer;
+
+export const deepgramFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: deepgramErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/deepgram/src/deepgram-provider.ts b/packages/deepgram/src/deepgram-provider.ts
new file mode 100644
index 000000000000..f463b444839f
--- /dev/null
+++ b/packages/deepgram/src/deepgram-provider.ts
@@ -0,0 +1,77 @@
+import { TranscriptionModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { DeepgramTranscriptionModel } from './deepgram-transcription-model';
+import { DeepgramTranscriptionModelId } from './deepgram-transcription-settings';
+
+export interface DeepgramProvider
+ extends Pick {
+ (
+ modelId: 'nova-3',
+ settings?: {},
+ ): {
+ transcription: DeepgramTranscriptionModel;
+ };
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: DeepgramTranscriptionModelId): TranscriptionModelV1;
+}
+
+export interface DeepgramProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create an Deepgram provider instance.
+ */
+export function createDeepgram(
+ options: DeepgramProviderSettings = {},
+): DeepgramProvider {
+ const getHeaders = () => ({
+ authorization: `Token ${loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'DEEPGRAM_API_KEY',
+ description: 'Deepgram',
+ })}`,
+ ...options.headers,
+ });
+
+ const createTranscriptionModel = (modelId: DeepgramTranscriptionModelId) =>
+ new DeepgramTranscriptionModel(modelId, {
+ provider: `deepgram.transcription`,
+ url: ({ path }) => `https://api.deepgram.com${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function (modelId: DeepgramTranscriptionModelId) {
+ return {
+ transcription: createTranscriptionModel(modelId),
+ };
+ };
+
+ provider.transcription = createTranscriptionModel;
+ provider.transcriptionModel = createTranscriptionModel;
+
+ return provider as DeepgramProvider;
+}
+
+/**
+Default Deepgram provider instance.
+ */
+export const deepgram = createDeepgram();
diff --git a/packages/deepgram/src/deepgram-transcription-model.test.ts b/packages/deepgram/src/deepgram-transcription-model.test.ts
new file mode 100644
index 000000000000..f2b99bf0a4d4
--- /dev/null
+++ b/packages/deepgram/src/deepgram-transcription-model.test.ts
@@ -0,0 +1,194 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { DeepgramTranscriptionModel } from './deepgram-transcription-model';
+import { createDeepgram } from './deepgram-provider';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createDeepgram({ apiKey: 'test-api-key' });
+const model = provider.transcription('nova-3');
+
+const server = createTestServer({
+ 'https://api.deepgram.com/v1/listen': {},
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ server.urls['https://api.deepgram.com/v1/listen'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ metadata: {
+ transaction_key: 'deprecated',
+ request_id: '2479c8c8-8185-40ac-9ac6-f0874419f793',
+ sha256:
+ '154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33',
+ created: '2024-02-06T19:56:16.180Z',
+ duration: 25.933313,
+ channels: 1,
+ models: ['30089e05-99d1-4376-b32e-c263170674af'],
+ model_info: {
+ '30089e05-99d1-4376-b32e-c263170674af': {
+ name: '2-general-nova',
+ version: '2024-01-09.29447',
+ arch: 'nova-3',
+ },
+ },
+ },
+ results: {
+ channels: [
+ {
+ alternatives: [
+ {
+ transcript: 'Hello world!',
+ confidence: 0.99902344,
+ words: [
+ {
+ word: 'hello',
+ start: 0.08,
+ end: 0.32,
+ confidence: 0.9975586,
+ punctuated_word: 'Hello.',
+ },
+ {
+ word: 'world',
+ start: 0.32,
+ end: 0.79999995,
+ confidence: 0.9921875,
+ punctuated_word: 'World',
+ },
+ ],
+ paragraphs: {
+ transcript: 'Hello world!',
+ paragraphs: [
+ {
+ sentences: [
+ {
+ text: 'Hello world!',
+ start: 0.08,
+ end: 0.32,
+ },
+ ],
+ num_words: 2,
+ start: 0.08,
+ end: 0.79999995,
+ },
+ ],
+ },
+ },
+ ],
+ },
+ ],
+ },
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[0].requestBodyMultipart).toMatchObject({});
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createDeepgram({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription('nova-3').doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ authorization: 'Token test-api-key',
+ 'content-type': 'audio/wav',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Hello world!');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new DeepgramTranscriptionModel('nova-3', {
+ provider: 'test-provider',
+ url: () => 'https://api.deepgram.com/v1/listen',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'nova-3',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new DeepgramTranscriptionModel('nova-3', {
+ provider: 'test-provider',
+ url: () => 'https://api.deepgram.com/v1/listen',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('nova-3');
+ });
+});
diff --git a/packages/deepgram/src/deepgram-transcription-model.ts b/packages/deepgram/src/deepgram-transcription-model.ts
new file mode 100644
index 000000000000..6c021298bcce
--- /dev/null
+++ b/packages/deepgram/src/deepgram-transcription-model.ts
@@ -0,0 +1,210 @@
+import {
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ convertBase64ToUint8Array,
+ createJsonResponseHandler,
+ parseProviderOptions,
+ postToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { DeepgramConfig } from './deepgram-config';
+import { deepgramFailedResponseHandler } from './deepgram-error';
+import { DeepgramTranscriptionModelId } from './deepgram-transcription-settings';
+import { DeepgramTranscriptionAPITypes } from './deepgram-api-types';
+
+// https://developers.deepgram.com/docs/pre-recorded-audio#results
+const deepgramProviderOptionsSchema = z.object({
+ /** Language to use for transcription. If not specified, Deepgram will auto-detect the language. */
+ language: z.string().nullish(),
+ /** Whether to use smart formatting, which formats written-out numbers, dates, times, etc. */
+ smartFormat: z.boolean().nullish(),
+ /** Whether to add punctuation to the transcript. */
+ punctuate: z.boolean().nullish(),
+ /** Whether to format the transcript into paragraphs. */
+ paragraphs: z.boolean().nullish(),
+ /** Whether to generate a summary of the transcript. Use 'v2' for the latest version or false to disable. */
+ summarize: z.union([z.literal('v2'), z.literal(false)]).nullish(),
+ /** Whether to identify topics in the transcript. */
+ topics: z.boolean().nullish(),
+ /** Whether to identify intents in the transcript. */
+ intents: z.boolean().nullish(),
+ /** Whether to analyze sentiment in the transcript. */
+ sentiment: z.boolean().nullish(),
+ /** Whether to detect and tag named entities in the transcript. */
+ detectEntities: z.boolean().nullish(),
+ /** Specify terms or patterns to redact from the transcript. Can be a string or array of strings. */
+ redact: z.union([z.string(), z.array(z.string())]).nullish(),
+ /** String to replace redacted content with. */
+ replace: z.string().nullish(),
+ /** Term or phrase to search for in the transcript. */
+ search: z.string().nullish(),
+ /** Key term to identify in the transcript. */
+ keyterm: z.string().nullish(),
+ /** Whether to identify different speakers in the audio. */
+ diarize: z.boolean().nullish(),
+ /** Whether to segment the transcript into utterances. */
+ utterances: z.boolean().nullish(),
+ /** Minimum duration of silence (in seconds) to trigger a new utterance. */
+ uttSplit: z.number().nullish(),
+ /** Whether to include filler words (um, uh, etc.) in the transcript. */
+ fillerWords: z.boolean().nullish(),
+});
+
+export type DeepgramTranscriptionCallOptions = z.infer<
+ typeof deepgramProviderOptionsSchema
+>;
+
+interface DeepgramTranscriptionModelConfig extends DeepgramConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class DeepgramTranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: DeepgramTranscriptionModelId,
+ private readonly config: DeepgramTranscriptionModelConfig,
+ ) {}
+
+ private getArgs({
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const deepgramOptions = parseProviderOptions({
+ provider: 'deepgram',
+ providerOptions,
+ schema: deepgramProviderOptionsSchema,
+ });
+
+ const body: DeepgramTranscriptionAPITypes = {
+ model: this.modelId,
+ diarize: true,
+ };
+
+ // Add provider-specific options
+ if (deepgramOptions) {
+ body.detect_entities = deepgramOptions.detectEntities ?? undefined;
+ body.filler_words = deepgramOptions.fillerWords ?? undefined;
+ body.language = deepgramOptions.language ?? undefined;
+ body.punctuate = deepgramOptions.punctuate ?? undefined;
+ body.redact = deepgramOptions.redact ?? undefined;
+ body.search = deepgramOptions.search ?? undefined;
+ body.smart_format = deepgramOptions.smartFormat ?? undefined;
+ body.summarize = deepgramOptions.summarize ?? undefined;
+ body.topics = deepgramOptions.topics ?? undefined;
+ body.utterances = deepgramOptions.utterances ?? undefined;
+ body.utt_split = deepgramOptions.uttSplit ?? undefined;
+
+ if (typeof deepgramOptions.diarize === 'boolean') {
+ body.diarize = deepgramOptions.diarize;
+ }
+ }
+
+ // Convert body to URL query parameters
+ const queryParams = new URLSearchParams();
+ for (const [key, value] of Object.entries(body)) {
+ if (value !== undefined) {
+ queryParams.append(key, String(value));
+ }
+ }
+
+ return {
+ queryParams,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { queryParams, warnings } = this.getArgs(options);
+
+ const {
+ value: response,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postToApi({
+ url:
+ this.config.url({
+ path: '/v1/listen',
+ modelId: this.modelId,
+ }) +
+ '?' +
+ queryParams.toString(),
+ headers: {
+ ...combineHeaders(this.config.headers(), options.headers),
+ 'Content-Type': options.mediaType,
+ },
+ body: {
+ content: options.audio,
+ values: options.audio,
+ },
+ failedResponseHandler: deepgramFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ deepgramTranscriptionResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ text:
+ response.results?.channels.at(0)?.alternatives.at(0)?.transcript ?? '',
+ segments:
+ response.results?.channels[0].alternatives[0].words?.map(word => ({
+ text: word.word,
+ startSecond: word.start,
+ endSecond: word.end,
+ })) ?? [],
+ language: undefined,
+ durationInSeconds: response.metadata?.duration ?? undefined,
+ warnings,
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
+
+const deepgramTranscriptionResponseSchema = z.object({
+ metadata: z
+ .object({
+ duration: z.number(),
+ })
+ .nullish(),
+ results: z
+ .object({
+ channels: z.array(
+ z.object({
+ alternatives: z.array(
+ z.object({
+ transcript: z.string(),
+ words: z.array(
+ z.object({
+ word: z.string(),
+ start: z.number(),
+ end: z.number(),
+ }),
+ ),
+ }),
+ ),
+ }),
+ ),
+ })
+ .nullish(),
+});
diff --git a/packages/deepgram/src/deepgram-transcription-settings.ts b/packages/deepgram/src/deepgram-transcription-settings.ts
new file mode 100644
index 000000000000..6f427fd92666
--- /dev/null
+++ b/packages/deepgram/src/deepgram-transcription-settings.ts
@@ -0,0 +1,34 @@
+export type DeepgramTranscriptionModelId =
+ | 'base'
+ | 'base-general'
+ | 'base-meeting'
+ | 'base-phonecall'
+ | 'base-finance'
+ | 'base-conversationalai'
+ | 'base-voicemail'
+ | 'base-video'
+ | 'enhanced'
+ | 'enhanced-general'
+ | 'enhanced-meeting'
+ | 'enhanced-phonecall'
+ | 'enhanced-finance'
+ | 'nova'
+ | 'nova-general'
+ | 'nova-phonecall'
+ | 'nova-medical'
+ | 'nova-2'
+ | 'nova-2-general'
+ | 'nova-2-meeting'
+ | 'nova-2-phonecall'
+ | 'nova-2-finance'
+ | 'nova-2-conversationalai'
+ | 'nova-2-voicemail'
+ | 'nova-2-video'
+ | 'nova-2-medical'
+ | 'nova-2-drivethru'
+ | 'nova-2-automotive'
+ | 'nova-2-atc'
+ | 'nova-3'
+ | 'nova-3-general'
+ | 'nova-3-medical'
+ | (string & {});
diff --git a/packages/deepgram/src/index.ts b/packages/deepgram/src/index.ts
new file mode 100644
index 000000000000..4abcf2461f66
--- /dev/null
+++ b/packages/deepgram/src/index.ts
@@ -0,0 +1,5 @@
+export { createDeepgram, deepgram } from './deepgram-provider';
+export type {
+ DeepgramProvider,
+ DeepgramProviderSettings,
+} from './deepgram-provider';
diff --git a/packages/deepgram/src/transcript-test.mp3 b/packages/deepgram/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/deepgram/src/transcript-test.mp3 differ
diff --git a/packages/deepgram/tsconfig.json b/packages/deepgram/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/deepgram/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/deepgram/tsup.config.ts b/packages/deepgram/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/deepgram/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/deepgram/turbo.json b/packages/deepgram/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/deepgram/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/deepgram/vitest.edge.config.js b/packages/deepgram/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/deepgram/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/deepgram/vitest.node.config.js b/packages/deepgram/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/deepgram/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/deepinfra/CHANGELOG.md b/packages/deepinfra/CHANGELOG.md
index 913480777e74..04451a7e4453 100644
--- a/packages/deepinfra/CHANGELOG.md
+++ b/packages/deepinfra/CHANGELOG.md
@@ -1,5 +1,82 @@
# @ai-sdk/deepinfra
+## 0.2.15
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/openai-compatible@0.2.14
+
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [23571c9]
+ - @ai-sdk/openai-compatible@0.2.13
+
+## 0.2.13
+
+### Patch Changes
+
+- Updated dependencies [13492fe]
+ - @ai-sdk/openai-compatible@0.2.12
+
+## 0.2.12
+
+### Patch Changes
+
+- Updated dependencies [b5c9cd4]
+ - @ai-sdk/openai-compatible@0.2.11
+
+## 0.2.11
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/openai-compatible@0.2.10
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.2.10
+
+### Patch Changes
+
+- Updated dependencies [1bbc698]
+ - @ai-sdk/openai-compatible@0.2.9
+
+## 0.2.9
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/openai-compatible@0.2.8
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.2.8
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/openai-compatible@0.2.7
+
+## 0.2.7
+
+### Patch Changes
+
+- 264b1e0: feat (providers/deepinfra): add llama 4 models
+
+## 0.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/openai-compatible@0.2.6
+
## 0.2.5
### Patch Changes
diff --git a/packages/deepinfra/README.md b/packages/deepinfra/README.md
index 50412ba7f261..9dd53bbad6e3 100644
--- a/packages/deepinfra/README.md
+++ b/packages/deepinfra/README.md
@@ -1,6 +1,6 @@
# AI SDK - DeepInfra Provider
-The **[DeepInfra provider](https://sdk.vercel.ai/providers/ai-sdk-providers/deepinfra)** for the [AI SDK](https://sdk.vercel.ai/docs)
+The **[DeepInfra provider](https://ai-sdk.dev/providers/ai-sdk-providers/deepinfra)** for the [AI SDK](https://ai-sdk.dev/docs)
contains language model support for the DeepInfra API, giving you access to models like Llama 3, Mixtral, and other state-of-the-art LLMs.
## Setup
@@ -33,4 +33,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[DeepInfra provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/deepinfra)** for more information.
+Please check out the **[DeepInfra provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/deepinfra)** for more information.
diff --git a/packages/deepinfra/package.json b/packages/deepinfra/package.json
index 9fbb3b5d730b..96776de2845c 100644
--- a/packages/deepinfra/package.json
+++ b/packages/deepinfra/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/deepinfra",
- "version": "0.2.5",
+ "version": "0.2.15",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,9 +30,9 @@
}
},
"dependencies": {
- "@ai-sdk/openai-compatible": "0.2.5",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/openai-compatible": "0.2.14",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/deepinfra/src/deepinfra-chat-settings.ts b/packages/deepinfra/src/deepinfra-chat-settings.ts
index 8694d195e1e3..6c45c9f7dd0d 100644
--- a/packages/deepinfra/src/deepinfra-chat-settings.ts
+++ b/packages/deepinfra/src/deepinfra-chat-settings.ts
@@ -2,6 +2,8 @@ import { OpenAICompatibleChatSettings } from '@ai-sdk/openai-compatible';
// https://deepinfra.com/models/text-generation
export type DeepInfraChatModelId =
+ | 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'
+ | 'meta-llama/Llama-4-Scout-17B-16E-Instruct'
| 'meta-llama/Llama-3.3-70B-Instruct'
| 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
| 'meta-llama/Meta-Llama-3.1-70B-Instruct'
diff --git a/packages/deepseek/CHANGELOG.md b/packages/deepseek/CHANGELOG.md
index e1fde2d52f9f..bf5fd6ea9840 100644
--- a/packages/deepseek/CHANGELOG.md
+++ b/packages/deepseek/CHANGELOG.md
@@ -1,5 +1,76 @@
# @ai-sdk/deepseek
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/openai-compatible@0.2.14
+
+## 0.2.13
+
+### Patch Changes
+
+- Updated dependencies [23571c9]
+ - @ai-sdk/openai-compatible@0.2.13
+
+## 0.2.12
+
+### Patch Changes
+
+- Updated dependencies [13492fe]
+ - @ai-sdk/openai-compatible@0.2.12
+
+## 0.2.11
+
+### Patch Changes
+
+- Updated dependencies [b5c9cd4]
+ - @ai-sdk/openai-compatible@0.2.11
+
+## 0.2.10
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/openai-compatible@0.2.10
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.2.9
+
+### Patch Changes
+
+- Updated dependencies [1bbc698]
+ - @ai-sdk/openai-compatible@0.2.9
+
+## 0.2.8
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/openai-compatible@0.2.8
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.2.7
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/openai-compatible@0.2.7
+
+## 0.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/openai-compatible@0.2.6
+
## 0.2.5
### Patch Changes
diff --git a/packages/deepseek/README.md b/packages/deepseek/README.md
index 06cc21fd8c0b..e6c929d1a7d0 100644
--- a/packages/deepseek/README.md
+++ b/packages/deepseek/README.md
@@ -1,6 +1,6 @@
# AI SDK - DeepSeek Provider
-The **[DeepSeek provider](https://sdk.vercel.ai/providers/ai-sdk-providers/deepseek)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the [DeepSeek](https://www.deepseek.com) platform.
+The **[DeepSeek provider](https://ai-sdk.dev/providers/ai-sdk-providers/deepseek)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [DeepSeek](https://www.deepseek.com) platform.
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[DeepSeek provider](https://sdk.vercel.ai/providers/ai-sdk-providers/deepseek)** for more information.
+Please check out the **[DeepSeek provider](https://ai-sdk.dev/providers/ai-sdk-providers/deepseek)** for more information.
diff --git a/packages/deepseek/package.json b/packages/deepseek/package.json
index e07cfc9ccb50..991e605064ec 100644
--- a/packages/deepseek/package.json
+++ b/packages/deepseek/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/deepseek",
- "version": "0.2.5",
+ "version": "0.2.14",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,9 +30,9 @@
}
},
"dependencies": {
- "@ai-sdk/openai-compatible": "0.2.5",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/openai-compatible": "0.2.14",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/elevenlabs/CHANGELOG.md b/packages/elevenlabs/CHANGELOG.md
new file mode 100644
index 000000000000..6cac89655d8e
--- /dev/null
+++ b/packages/elevenlabs/CHANGELOG.md
@@ -0,0 +1,22 @@
+# @ai-sdk/elevenlabs
+
+## 0.0.3
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.0.1
+
+### Patch Changes
+
+- 01888d9: feat (provider/elevenlabs): add transcription provider
diff --git a/packages/elevenlabs/README.md b/packages/elevenlabs/README.md
new file mode 100644
index 000000000000..33da42c99feb
--- /dev/null
+++ b/packages/elevenlabs/README.md
@@ -0,0 +1,38 @@
+# AI SDK - ElevenLabs Provider
+
+The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs embeddings API.
+
+## Setup
+
+The ElevenLabs provider is available in the `@ai-sdk/elevenlabs` module. You can install it with
+
+```bash
+npm i @ai-sdk/elevenlabs
+```
+
+## Provider Instance
+
+You can import the default provider instance `elevenlabs` from `@ai-sdk/elevenlabs`:
+
+```ts
+import { elevenlabs } from '@ai-sdk/elevenlabs';
+```
+
+## Example
+
+```ts
+import { elevenlabs } from '@ai-sdk/elevenlabs';
+import { experimental_transcribe as transcribe } from 'ai';
+
+const { text } = await transcribe({
+ model: elevenlabs.transcription('scribe_v1'),
+ audio: new URL(
+ 'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-core/data/galileo.mp3',
+ ),
+});
+```
+
+## Documentation
+
+Please check out the **[ElevenLabs provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for more information.
diff --git a/packages/elevenlabs/package.json b/packages/elevenlabs/package.json
new file mode 100644
index 000000000000..a2050c7d9e91
--- /dev/null
+++ b/packages/elevenlabs/package.json
@@ -0,0 +1,65 @@
+{
+ "name": "@ai-sdk/elevenlabs",
+ "version": "0.0.3",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "internal/dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist && rm -rf internal/dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/elevenlabs/src/elevenlabs-api-types.ts b/packages/elevenlabs/src/elevenlabs-api-types.ts
new file mode 100644
index 000000000000..f180e08e351f
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-api-types.ts
@@ -0,0 +1,100 @@
+export type ElevenLabsTranscriptionAPITypes = {
+ /**
+ * An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file.
+ * Can sometimes improve transcription performance if known beforehand.
+ * Defaults to null, in this case the language is predicted automatically.
+ */
+ language_code?: string;
+
+ /**
+ * Whether to tag audio events like (laughter), (footsteps), etc. in the transcription.
+ * @default true
+ */
+ tag_audio_events?: boolean;
+
+ /**
+ * The maximum amount of speakers talking in the uploaded file.
+ * Can help with predicting who speaks when.
+ * The maximum amount of speakers that can be predicted is 32.
+ * Defaults to null, in this case the amount of speakers is set to the maximum value the model supports.
+ * @min 1
+ * @max 32
+ */
+ num_speakers?: number;
+
+ /**
+ * The granularity of the timestamps in the transcription.
+ * 'word' provides word-level timestamps and 'character' provides character-level timestamps per word.
+ * @default 'word'
+ */
+ timestamps_granularity?: 'none' | 'word' | 'character';
+
+ /**
+ * Whether to annotate which speaker is currently talking in the uploaded file.
+ * @default false
+ */
+ diarize?: boolean;
+
+ /**
+ * A list of additional formats to export the transcript to.
+ */
+ additional_formats?: Array<
+ | {
+ format: 'docx';
+ include_speakers?: boolean;
+ include_timestamps?: boolean;
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ | {
+ format: 'html';
+ include_speakers?: boolean;
+ include_timestamps?: boolean;
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ | {
+ format: 'pdf';
+ include_speakers?: boolean;
+ include_timestamps?: boolean;
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ | {
+ format: 'segmented_json';
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ | {
+ format: 'srt';
+ include_speakers?: boolean;
+ include_timestamps?: boolean;
+ max_characters_per_line?: number;
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ | {
+ format: 'txt';
+ include_speakers?: boolean;
+ include_timestamps?: boolean;
+ max_characters_per_line?: number;
+ max_segment_chars?: number;
+ max_segment_duration_s?: number;
+ segment_on_silence_longer_than_s?: number;
+ }
+ >;
+
+ /**
+ * The format of input audio.
+ * For pcm_s16le_16, the input audio must be 16-bit PCM at a 16kHz sample rate,
+ * single channel (mono), and little-endian byte order.
+ * Latency will be lower than with passing an encoded waveform.
+ * @default 'other'
+ */
+ file_format?: 'pcm_s16le_16' | 'other';
+};
diff --git a/packages/elevenlabs/src/elevenlabs-config.ts b/packages/elevenlabs/src/elevenlabs-config.ts
new file mode 100644
index 000000000000..31524f4ddbc1
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type ElevenLabsConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/elevenlabs/src/elevenlabs-error.test.ts b/packages/elevenlabs/src/elevenlabs-error.test.ts
new file mode 100644
index 000000000000..2e2e400e6170
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { elevenlabsErrorDataSchema } from './elevenlabs-error';
+
+describe('elevenlabsErrorDataSchema', () => {
+ it('should parse ElevenLabs resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: elevenlabsErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/elevenlabs/src/elevenlabs-error.ts b/packages/elevenlabs/src/elevenlabs-error.ts
new file mode 100644
index 000000000000..b8d1182e347d
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const elevenlabsErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type ElevenLabsErrorData = z.infer;
+
+export const elevenlabsFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: elevenlabsErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/elevenlabs/src/elevenlabs-provider.ts b/packages/elevenlabs/src/elevenlabs-provider.ts
new file mode 100644
index 000000000000..87ea08e43d2c
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-provider.ts
@@ -0,0 +1,77 @@
+import { TranscriptionModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { ElevenLabsTranscriptionModel } from './elevenlabs-transcription-model';
+import { ElevenLabsTranscriptionModelId } from './elevenlabs-transcription-settings';
+
+export interface ElevenLabsProvider
+ extends Pick {
+ (
+ modelId: 'scribe_v1',
+ settings?: {},
+ ): {
+ transcription: ElevenLabsTranscriptionModel;
+ };
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: ElevenLabsTranscriptionModelId): TranscriptionModelV1;
+}
+
+export interface ElevenLabsProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create an ElevenLabs provider instance.
+ */
+export function createElevenLabs(
+ options: ElevenLabsProviderSettings = {},
+): ElevenLabsProvider {
+ const getHeaders = () => ({
+ 'xi-api-key': loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'ELEVENLABS_API_KEY',
+ description: 'ElevenLabs',
+ }),
+ ...options.headers,
+ });
+
+ const createTranscriptionModel = (modelId: ElevenLabsTranscriptionModelId) =>
+ new ElevenLabsTranscriptionModel(modelId, {
+ provider: `elevenlabs.transcription`,
+ url: ({ path }) => `https://api.elevenlabs.io${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function (modelId: ElevenLabsTranscriptionModelId) {
+ return {
+ transcription: createTranscriptionModel(modelId),
+ };
+ };
+
+ provider.transcription = createTranscriptionModel;
+ provider.transcriptionModel = createTranscriptionModel;
+
+ return provider as ElevenLabsProvider;
+}
+
+/**
+Default ElevenLabs provider instance.
+ */
+export const elevenlabs = createElevenLabs();
diff --git a/packages/elevenlabs/src/elevenlabs-transcription-model.test.ts b/packages/elevenlabs/src/elevenlabs-transcription-model.test.ts
new file mode 100644
index 000000000000..cde244a16448
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-transcription-model.test.ts
@@ -0,0 +1,345 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { ElevenLabsTranscriptionModel } from './elevenlabs-transcription-model';
+import { createElevenLabs } from './elevenlabs-provider';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createElevenLabs({ apiKey: 'test-api-key' });
+const model = provider.transcription('scribe_v1');
+
+const server = createTestServer({
+ 'https://api.elevenlabs.io/v1/speech-to-text': {},
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ server.urls['https://api.elevenlabs.io/v1/speech-to-text'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ language_code: 'en',
+ language_probability: 0.98,
+ text: 'Hello world!',
+ words: [
+ {
+ text: 'Hello',
+ type: 'word',
+ start: 0,
+ end: 0.5,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ {
+ text: ' ',
+ type: 'spacing',
+ start: 0.5,
+ end: 0.5,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ {
+ text: 'world!',
+ type: 'word',
+ start: 0.5,
+ end: 1.2,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ ],
+ additional_formats: [
+ {
+ requested_format: 'requested_format',
+ file_extension: 'file_extension',
+ content_type: 'content_type',
+ is_base64_encoded: true,
+ content: 'content',
+ },
+ ],
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[0].requestBodyMultipart).toMatchObject({
+ model_id: 'scribe_v1',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createElevenLabs({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription('scribe_v1').doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ 'xi-api-key': 'test-api-key',
+ 'content-type': expect.stringMatching(
+ /^multipart\/form-data; boundary=----formdata-undici-\d+$/,
+ ),
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Hello world!');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
+ provider: 'test-provider',
+ url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'scribe_v1',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
+ provider: 'test-provider',
+ url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('scribe_v1');
+ });
+
+ it('should work when no additional formats are returned', async () => {
+ server.urls['https://api.elevenlabs.io/v1/speech-to-text'].response = {
+ type: 'json-value',
+ body: {
+ language_code: 'en',
+ language_probability: 0.98,
+ text: 'Hello world!',
+ words: [
+ {
+ text: 'Hello',
+ type: 'word',
+ start: 0,
+ end: 0.5,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ {
+ text: ' ',
+ type: 'spacing',
+ start: 0.5,
+ end: 0.5,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ {
+ text: 'world!',
+ type: 'word',
+ start: 0.5,
+ end: 1.2,
+ speaker_id: 'speaker_1',
+ characters: [
+ {
+ text: 'text',
+ start: 0,
+ end: 0.1,
+ },
+ ],
+ },
+ ],
+ },
+ };
+
+ const testDate = new Date(0);
+ const customModel = new ElevenLabsTranscriptionModel('scribe_v1', {
+ provider: 'test-provider',
+ url: () => 'https://api.elevenlabs.io/v1/speech-to-text',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result).toMatchInlineSnapshot(`
+ {
+ "durationInSeconds": 1.2,
+ "language": "en",
+ "response": {
+ "body": {
+ "language_code": "en",
+ "language_probability": 0.98,
+ "text": "Hello world!",
+ "words": [
+ {
+ "characters": [
+ {
+ "end": 0.1,
+ "start": 0,
+ "text": "text",
+ },
+ ],
+ "end": 0.5,
+ "speaker_id": "speaker_1",
+ "start": 0,
+ "text": "Hello",
+ "type": "word",
+ },
+ {
+ "characters": [
+ {
+ "end": 0.1,
+ "start": 0,
+ "text": "text",
+ },
+ ],
+ "end": 0.5,
+ "speaker_id": "speaker_1",
+ "start": 0.5,
+ "text": " ",
+ "type": "spacing",
+ },
+ {
+ "characters": [
+ {
+ "end": 0.1,
+ "start": 0,
+ "text": "text",
+ },
+ ],
+ "end": 1.2,
+ "speaker_id": "speaker_1",
+ "start": 0.5,
+ "text": "world!",
+ "type": "word",
+ },
+ ],
+ },
+ "headers": {
+ "content-length": "467",
+ "content-type": "application/json",
+ },
+ "modelId": "scribe_v1",
+ "timestamp": 1970-01-01T00:00:00.000Z,
+ },
+ "segments": [
+ {
+ "endSecond": 0.5,
+ "startSecond": 0,
+ "text": "Hello",
+ },
+ {
+ "endSecond": 0.5,
+ "startSecond": 0.5,
+ "text": " ",
+ },
+ {
+ "endSecond": 1.2,
+ "startSecond": 0.5,
+ "text": "world!",
+ },
+ ],
+ "text": "Hello world!",
+ "warnings": [],
+ }
+ `);
+ });
+});
diff --git a/packages/elevenlabs/src/elevenlabs-transcription-model.ts b/packages/elevenlabs/src/elevenlabs-transcription-model.ts
new file mode 100644
index 000000000000..23f952833894
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-transcription-model.ts
@@ -0,0 +1,181 @@
+import {
+ TranscriptionModelV1,
+ TranscriptionModelV1CallOptions,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ convertBase64ToUint8Array,
+ createJsonResponseHandler,
+ parseProviderOptions,
+ postFormDataToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { ElevenLabsConfig } from './elevenlabs-config';
+import { elevenlabsFailedResponseHandler } from './elevenlabs-error';
+import { ElevenLabsTranscriptionModelId } from './elevenlabs-transcription-settings';
+import { ElevenLabsTranscriptionAPITypes } from './elevenlabs-api-types';
+
+// https://elevenlabs.io/docs/api-reference/speech-to-text/convert
+const elevenLabsProviderOptionsSchema = z.object({
+ languageCode: z.string().nullish(),
+ tagAudioEvents: z.boolean().nullish().default(true),
+ numSpeakers: z.number().int().min(1).max(32).nullish(),
+ timestampsGranularity: z
+ .enum(['none', 'word', 'character'])
+ .nullish()
+ .default('word'),
+ diarize: z.boolean().nullish().default(false),
+ file_format: z.enum(['pcm_s16le_16', 'other']).nullish().default('other'),
+});
+
+export type ElevenLabsTranscriptionCallOptions = z.infer<
+ typeof elevenLabsProviderOptionsSchema
+>;
+
+interface ElevenLabsTranscriptionModelConfig extends ElevenLabsConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class ElevenLabsTranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: ElevenLabsTranscriptionModelId,
+ private readonly config: ElevenLabsTranscriptionModelConfig,
+ ) {}
+
+ private getArgs({
+ audio,
+ mediaType,
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const elevenlabsOptions = parseProviderOptions({
+ provider: 'elevenlabs',
+ providerOptions,
+ schema: elevenLabsProviderOptionsSchema,
+ });
+
+ // Create form data with base fields
+ const formData = new FormData();
+ const blob =
+ audio instanceof Uint8Array
+ ? new Blob([audio])
+ : new Blob([convertBase64ToUint8Array(audio)]);
+
+ formData.append('model_id', this.modelId);
+ formData.append('file', new File([blob], 'audio', { type: mediaType }));
+ formData.append('diarize', 'true');
+
+ // Add provider-specific options
+ if (elevenlabsOptions) {
+ const transcriptionModelOptions: ElevenLabsTranscriptionAPITypes = {
+ language_code: elevenlabsOptions.languageCode ?? undefined,
+ tag_audio_events: elevenlabsOptions.tagAudioEvents ?? undefined,
+ num_speakers: elevenlabsOptions.numSpeakers ?? undefined,
+ timestamps_granularity:
+ elevenlabsOptions.timestampsGranularity ?? undefined,
+ file_format: elevenlabsOptions.file_format ?? undefined,
+ };
+
+ if (typeof elevenlabsOptions.diarize === 'boolean') {
+ formData.append('diarize', String(elevenlabsOptions.diarize));
+ }
+
+ for (const key in transcriptionModelOptions) {
+ const value =
+ transcriptionModelOptions[
+ key as keyof ElevenLabsTranscriptionAPITypes
+ ];
+ if (value !== undefined) {
+ formData.append(key, String(value));
+ }
+ }
+ }
+
+ return {
+ formData,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { formData, warnings } = this.getArgs(options);
+
+ const {
+ value: response,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postFormDataToApi({
+ url: this.config.url({
+ path: '/v1/speech-to-text',
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ formData,
+ failedResponseHandler: elevenlabsFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ elevenlabsTranscriptionResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ text: response.text,
+ segments:
+ response.words?.map(word => ({
+ text: word.text,
+ startSecond: word.start ?? 0,
+ endSecond: word.end ?? 0,
+ })) ?? [],
+ language: response.language_code,
+ durationInSeconds: response.words?.at(-1)?.end ?? undefined,
+ warnings,
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
+
+const elevenlabsTranscriptionResponseSchema = z.object({
+ language_code: z.string(),
+ language_probability: z.number(),
+ text: z.string(),
+ words: z
+ .array(
+ z.object({
+ text: z.string(),
+ type: z.enum(['word', 'spacing', 'audio_event']),
+ start: z.number().nullish(),
+ end: z.number().nullish(),
+ speaker_id: z.string().nullish(),
+ characters: z
+ .array(
+ z.object({
+ text: z.string(),
+ start: z.number().nullish(),
+ end: z.number().nullish(),
+ }),
+ )
+ .nullish(),
+ }),
+ )
+ .nullish(),
+});
diff --git a/packages/elevenlabs/src/elevenlabs-transcription-settings.ts b/packages/elevenlabs/src/elevenlabs-transcription-settings.ts
new file mode 100644
index 000000000000..81f46c398c04
--- /dev/null
+++ b/packages/elevenlabs/src/elevenlabs-transcription-settings.ts
@@ -0,0 +1,4 @@
+export type ElevenLabsTranscriptionModelId =
+ | 'scribe_v1'
+ | 'scribe_v1_experimental'
+ | (string & {});
diff --git a/packages/elevenlabs/src/index.ts b/packages/elevenlabs/src/index.ts
new file mode 100644
index 000000000000..8cd59b322b6e
--- /dev/null
+++ b/packages/elevenlabs/src/index.ts
@@ -0,0 +1,5 @@
+export { createElevenLabs, elevenlabs } from './elevenlabs-provider';
+export type {
+ ElevenLabsProvider,
+ ElevenLabsProviderSettings,
+} from './elevenlabs-provider';
diff --git a/packages/elevenlabs/src/transcript-test.mp3 b/packages/elevenlabs/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/elevenlabs/src/transcript-test.mp3 differ
diff --git a/packages/elevenlabs/tsconfig.json b/packages/elevenlabs/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/elevenlabs/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/elevenlabs/tsup.config.ts b/packages/elevenlabs/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/elevenlabs/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/elevenlabs/turbo.json b/packages/elevenlabs/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/elevenlabs/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/elevenlabs/vitest.edge.config.js b/packages/elevenlabs/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/elevenlabs/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/elevenlabs/vitest.node.config.js b/packages/elevenlabs/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/elevenlabs/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/fal/CHANGELOG.md b/packages/fal/CHANGELOG.md
index caf5b088c60b..22b1b1265c83 100644
--- a/packages/fal/CHANGELOG.md
+++ b/packages/fal/CHANGELOG.md
@@ -1,5 +1,55 @@
# @ai-sdk/fal
+## 0.1.11
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.1.10
+
+### Patch Changes
+
+- 487a1c0: fix (providers/fal): improve model compatibility
+
+## 0.1.9
+
+### Patch Changes
+
+- 6e8a73b: feat(providers/fal): add transcribe
+
+## 0.1.8
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.1.7
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.1.6
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 0.1.5
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 0.1.4
### Patch Changes
diff --git a/packages/fal/README.md b/packages/fal/README.md
index 779016430dd4..5ecb2846eba7 100644
--- a/packages/fal/README.md
+++ b/packages/fal/README.md
@@ -1,6 +1,6 @@
# AI SDK - fal Provider
-The **[fal provider](https://sdk.vercel.ai/providers/ai-sdk-providers/fal)** for the [AI SDK](https://sdk.vercel.ai/docs) contains image model support for the [fal.ai API](https://fal.ai/).
+The **[fal provider](https://ai-sdk.dev/providers/ai-sdk-providers/fal)** for the [AI SDK](https://ai-sdk.dev/docs) contains image model support for the [fal.ai API](https://fal.ai/).
## Setup
@@ -53,4 +53,4 @@ const { image } = await generateImage({
## Documentation
-Please check out the **[fal provider](https://sdk.vercel.ai/providers/ai-sdk-providers/fal)** for more information.
+Please check out the **[fal provider](https://ai-sdk.dev/providers/ai-sdk-providers/fal)** for more information.
diff --git a/packages/fal/package.json b/packages/fal/package.json
index 092606a4d979..2caba0d743e8 100644
--- a/packages/fal/package.json
+++ b/packages/fal/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/fal",
- "version": "0.1.4",
+ "version": "0.1.11",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,8 +30,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -49,7 +49,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/fal/src/fal-api-types.ts b/packages/fal/src/fal-api-types.ts
new file mode 100644
index 000000000000..f9108a66c208
--- /dev/null
+++ b/packages/fal/src/fal-api-types.ts
@@ -0,0 +1,149 @@
+export type FalTranscriptionAPITypes = {
+ /**
+ * URL of the audio file to transcribe. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav or webm.
+ */
+ audio_url: string;
+
+ /**
+ * Task to perform on the audio file. Either transcribe or translate. Default value: "transcribe"
+ */
+ task?: 'transcribe' | 'translate';
+
+ /**
+ * Language of the audio file. If set to null, the language will be automatically detected. Defaults to null.
+ *
+ * If translate is selected as the task, the audio will be translated to English, regardless of the language selected.
+ */
+ language?:
+ | 'af'
+ | 'am'
+ | 'ar'
+ | 'as'
+ | 'az'
+ | 'ba'
+ | 'be'
+ | 'bg'
+ | 'bn'
+ | 'bo'
+ | 'br'
+ | 'bs'
+ | 'ca'
+ | 'cs'
+ | 'cy'
+ | 'da'
+ | 'de'
+ | 'el'
+ | 'en'
+ | 'es'
+ | 'et'
+ | 'eu'
+ | 'fa'
+ | 'fi'
+ | 'fo'
+ | 'fr'
+ | 'gl'
+ | 'gu'
+ | 'ha'
+ | 'haw'
+ | 'he'
+ | 'hi'
+ | 'hr'
+ | 'ht'
+ | 'hu'
+ | 'hy'
+ | 'id'
+ | 'is'
+ | 'it'
+ | 'ja'
+ | 'jw'
+ | 'ka'
+ | 'kk'
+ | 'km'
+ | 'kn'
+ | 'ko'
+ | 'la'
+ | 'lb'
+ | 'ln'
+ | 'lo'
+ | 'lt'
+ | 'lv'
+ | 'mg'
+ | 'mi'
+ | 'mk'
+ | 'ml'
+ | 'mn'
+ | 'mr'
+ | 'ms'
+ | 'mt'
+ | 'my'
+ | 'ne'
+ | 'nl'
+ | 'nn'
+ | 'no'
+ | 'oc'
+ | 'pa'
+ | 'pl'
+ | 'ps'
+ | 'pt'
+ | 'ro'
+ | 'ru'
+ | 'sa'
+ | 'sd'
+ | 'si'
+ | 'sk'
+ | 'sl'
+ | 'sn'
+ | 'so'
+ | 'sq'
+ | 'sr'
+ | 'su'
+ | 'sv'
+ | 'sw'
+ | 'ta'
+ | 'te'
+ | 'tg'
+ | 'th'
+ | 'tk'
+ | 'tl'
+ | 'tr'
+ | 'tt'
+ | 'uk'
+ | 'ur'
+ | 'uz'
+ | 'vi'
+ | 'yi'
+ | 'yo'
+ | 'yue'
+ | 'zh'
+ | null;
+
+ /**
+ * Whether to diarize the audio file. Defaults to true.
+ */
+ diarize?: boolean;
+
+ /**
+ * Level of the chunks to return. Either segment or word. Default value: "segment"
+ */
+ chunk_level?: 'segment' | 'word';
+
+ /**
+ * Version of the model to use. All of the models are the Whisper large variant. Default value: "3"
+ */
+ version?: '3';
+
+ /**
+ * Default value: 64
+ */
+ batch_size?: number;
+
+ /**
+ * Prompt to use for generation. Defaults to an empty string. Default value: ""
+ */
+ prompt?: string;
+
+ /**
+ * Number of speakers in the audio file. Defaults to null. If not provided, the number of speakers will be automatically detected.
+ */
+ num_speakers?: number | null;
+};
diff --git a/packages/fal/src/fal-config.ts b/packages/fal/src/fal-config.ts
new file mode 100644
index 000000000000..2ca12d9144cc
--- /dev/null
+++ b/packages/fal/src/fal-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type FalConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/fal/src/fal-error.test.ts b/packages/fal/src/fal-error.test.ts
new file mode 100644
index 000000000000..8a81fc6965c3
--- /dev/null
+++ b/packages/fal/src/fal-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { falErrorDataSchema } from './fal-error';
+
+describe('falErrorDataSchema', () => {
+ it('should parse Fal resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: falErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/fal/src/fal-error.ts b/packages/fal/src/fal-error.ts
new file mode 100644
index 000000000000..f4f238f8f5b0
--- /dev/null
+++ b/packages/fal/src/fal-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const falErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type FalErrorData = z.infer;
+
+export const falFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: falErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/fal/src/fal-image-model.ts b/packages/fal/src/fal-image-model.ts
index 11bd486168c8..a62693a8224f 100644
--- a/packages/fal/src/fal-image-model.ts
+++ b/packages/fal/src/fal-image-model.ts
@@ -173,8 +173,6 @@ const falErrorSchema = z.union([falValidationErrorSchema, falHttpErrorSchema]);
const falImageSchema = z.object({
url: z.string(),
- width: z.number(),
- height: z.number(),
content_type: z.string(),
});
diff --git a/packages/fal/src/fal-provider.ts b/packages/fal/src/fal-provider.ts
index 2cf4caf76138..32e8a82f7ac8 100644
--- a/packages/fal/src/fal-provider.ts
+++ b/packages/fal/src/fal-provider.ts
@@ -1,8 +1,15 @@
-import { ImageModelV1, NoSuchModelError, ProviderV1 } from '@ai-sdk/provider';
+import {
+ ImageModelV1,
+ NoSuchModelError,
+ ProviderV1,
+ TranscriptionModelV1,
+} from '@ai-sdk/provider';
import type { FetchFunction } from '@ai-sdk/provider-utils';
import { withoutTrailingSlash } from '@ai-sdk/provider-utils';
import { FalImageModel } from './fal-image-model';
import { FalImageModelId, FalImageSettings } from './fal-image-settings';
+import { FalTranscriptionModelId } from './fal-transcription-settings';
+import { FalTranscriptionModel } from './fal-transcription-model';
export interface FalProviderSettings {
/**
@@ -42,6 +49,11 @@ Creates a model for image generation.
modelId: FalImageModelId,
settings?: FalImageSettings,
): ImageModelV1;
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: FalTranscriptionModelId): TranscriptionModelV1;
}
const defaultBaseURL = 'https://fal.run';
@@ -110,9 +122,17 @@ export function createFal(options: FalProviderSettings = {}): FalProvider {
fetch: options.fetch,
});
+ const createTranscriptionModel = (modelId: FalTranscriptionModelId) =>
+ new FalTranscriptionModel(modelId, {
+ provider: `fal.transcription`,
+ url: ({ path }) => path,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
return {
- image: createImageModel,
imageModel: createImageModel,
+ image: createImageModel,
languageModel: () => {
throw new NoSuchModelError({
modelId: 'languageModel',
@@ -125,6 +145,7 @@ export function createFal(options: FalProviderSettings = {}): FalProvider {
modelType: 'textEmbeddingModel',
});
},
+ transcription: createTranscriptionModel,
};
}
diff --git a/packages/fal/src/fal-transcription-model.test.ts b/packages/fal/src/fal-transcription-model.test.ts
new file mode 100644
index 000000000000..3d2b3218c008
--- /dev/null
+++ b/packages/fal/src/fal-transcription-model.test.ts
@@ -0,0 +1,180 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { createFal } from './fal-provider';
+import { FalTranscriptionModel } from './fal-transcription-model';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createFal({ apiKey: 'test-api-key' });
+const model = provider.transcription('wizper');
+
+const server = createTestServer({
+ 'https://queue.fal.run/fal-ai/wizper': {},
+ 'https://queue.fal.run/fal-ai/wizper/requests/test-id': {},
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ server.urls['https://queue.fal.run/fal-ai/wizper'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ status: 'COMPLETED',
+ request_id: 'test-id',
+ response_url:
+ 'https://queue.fal.run/fal-ai/wizper/requests/test-id/result',
+ status_url: 'https://queue.fal.run/fal-ai/wizper/requests/test-id',
+ cancel_url:
+ 'https://queue.fal.run/fal-ai/wizper/requests/test-id/cancel',
+ logs: null,
+ metrics: {},
+ queue_position: 0,
+ },
+ };
+ server.urls[
+ 'https://queue.fal.run/fal-ai/wizper/requests/test-id'
+ ].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ text: 'Hello world!',
+ chunks: [
+ {
+ text: 'Hello',
+ timestamp: [0, 1],
+ speaker: 'speaker_1',
+ },
+ {
+ text: ' ',
+ timestamp: [1, 2],
+ speaker: 'speaker_1',
+ },
+ {
+ text: 'world!',
+ timestamp: [2, 3],
+ speaker: 'speaker_1',
+ },
+ ],
+ diarization_segments: [
+ {
+ speaker: 'speaker_1',
+ timestamp: [0, 3],
+ },
+ ],
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ audio_url: expect.stringMatching(/^data:audio\//),
+ task: 'transcribe',
+ diarize: true,
+ chunk_level: 'word',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createFal({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription('wizper').doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ authorization: 'Key test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Hello world!');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new FalTranscriptionModel('wizper', {
+ provider: 'test-provider',
+ url: ({ path }) => path,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'wizper',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new FalTranscriptionModel('wizper', {
+ provider: 'test-provider',
+ url: ({ path }) => path,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('wizper');
+ });
+});
diff --git a/packages/fal/src/fal-transcription-model.ts b/packages/fal/src/fal-transcription-model.ts
new file mode 100644
index 000000000000..db297ad5d3fd
--- /dev/null
+++ b/packages/fal/src/fal-transcription-model.ts
@@ -0,0 +1,270 @@
+import {
+ AISDKError,
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ convertUint8ArrayToBase64,
+ createJsonErrorResponseHandler,
+ createJsonResponseHandler,
+ delay,
+ getFromApi,
+ parseProviderOptions,
+ postJsonToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { FalConfig } from './fal-config';
+import { falErrorDataSchema, falFailedResponseHandler } from './fal-error';
+import { FalTranscriptionModelId } from './fal-transcription-settings';
+import { FalTranscriptionAPITypes } from './fal-api-types';
+
+// https://fal.ai/models/fal-ai/whisper/api?platform=http
+const falProviderOptionsSchema = z.object({
+ /**
+ * Language of the audio file. If set to null, the language will be automatically detected. Defaults to null.
+ *
+ * If translate is selected as the task, the audio will be translated to English, regardless of the language selected.
+ */
+ language: z
+ .union([z.enum(['en']), z.string()])
+ .nullish()
+ .default('en'),
+
+ /**
+ * Whether to diarize the audio file. Defaults to true.
+ */
+ diarize: z.boolean().nullish().default(true),
+
+ /**
+ * Level of the chunks to return. Either segment or word. Default value: "segment"
+ */
+ chunkLevel: z.enum(['segment', 'word']).nullish().default('segment'),
+
+ /**
+ * Version of the model to use. All of the models are the Whisper large variant. Default value: "3"
+ */
+ version: z.enum(['3']).nullish().default('3'),
+
+ /**
+ * Default value: 64
+ */
+ batchSize: z.number().nullish().default(64),
+
+ /**
+ * Number of speakers in the audio file. Defaults to null. If not provided, the number of speakers will be automatically detected.
+ */
+ numSpeakers: z.number().nullable().nullish(),
+});
+
+export type FalTranscriptionCallOptions = z.infer<
+ typeof falProviderOptionsSchema
+>;
+
+interface FalTranscriptionModelConfig extends FalConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class FalTranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: FalTranscriptionModelId,
+ private readonly config: FalTranscriptionModelConfig,
+ ) {}
+
+ private async getArgs({
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const falOptions = parseProviderOptions({
+ provider: 'fal',
+ providerOptions,
+ schema: falProviderOptionsSchema,
+ });
+
+ // Create form data with base fields
+ const body: Omit = {
+ task: 'transcribe',
+ diarize: true,
+ chunk_level: 'word',
+ };
+
+ // Add provider-specific options
+ if (falOptions) {
+ body.language = falOptions.language as never;
+ body.version = falOptions.version ?? undefined;
+ body.batch_size = falOptions.batchSize ?? undefined;
+ body.num_speakers = falOptions.numSpeakers ?? undefined;
+
+ if (typeof falOptions.diarize === 'boolean') {
+ body.diarize = falOptions.diarize;
+ }
+
+ if (falOptions.chunkLevel) {
+ body.chunk_level = falOptions.chunkLevel;
+ }
+ }
+
+ return {
+ body,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { body, warnings } = await this.getArgs(options);
+
+ const base64Audio =
+ typeof options.audio === 'string'
+ ? options.audio
+ : convertUint8ArrayToBase64(options.audio);
+
+ const audioUrl = `data:${options.mediaType};base64,${base64Audio}`;
+
+ const { value: queueResponse } = await postJsonToApi({
+ url: this.config.url({
+ path: `https://queue.fal.run/fal-ai/${this.modelId}`,
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ body: {
+ ...body,
+ audio_url: audioUrl,
+ },
+ failedResponseHandler: falFailedResponseHandler,
+ successfulResponseHandler:
+ createJsonResponseHandler(falJobResponseSchema),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ // Poll for completion with timeout
+ const startTime = Date.now();
+ const timeoutMs = 60000; // 60 seconds timeout
+ const pollIntervalMs = 1000; // 1 second interval
+
+ let response;
+ let responseHeaders;
+ let rawResponse;
+
+ while (true) {
+ try {
+ const {
+ value: statusResponse,
+ responseHeaders: statusHeaders,
+ rawValue: statusRawResponse,
+ } = await getFromApi({
+ url: this.config.url({
+ path: `https://queue.fal.run/fal-ai/${this.modelId}/requests/${queueResponse.request_id}`,
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ failedResponseHandler: async ({
+ requestBodyValues,
+ response,
+ url,
+ }) => {
+ const clone = response.clone();
+ const body = (await clone.json()) as { detail: string };
+
+ if (body.detail === 'Request is still in progress') {
+ // This is not an error, just a status update that the request is still processing
+ // Continue polling by returning a special error that signals to continue
+ return {
+ value: new Error('Request is still in progress'),
+ rawValue: body,
+ responseHeaders: {},
+ };
+ }
+
+ return createJsonErrorResponseHandler({
+ errorSchema: falErrorDataSchema,
+ errorToMessage: data => data.error.message,
+ })({ requestBodyValues, response, url });
+ },
+ successfulResponseHandler: createJsonResponseHandler(
+ falTranscriptionResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ response = statusResponse;
+ responseHeaders = statusHeaders;
+ rawResponse = statusRawResponse;
+ break;
+ } catch (error) {
+ // If the error message indicates the request is still in progress, ignore it and continue polling
+ if (
+ error instanceof Error &&
+ error.message === 'Request is still in progress'
+ ) {
+ // Continue with the polling loop
+ } else {
+ // Re-throw any other errors
+ throw error;
+ }
+ }
+
+ // Check if we've exceeded the timeout
+ if (Date.now() - startTime > timeoutMs) {
+ throw new AISDKError({
+ message: 'Transcription request timed out after 60 seconds',
+ name: 'TranscriptionRequestTimedOut',
+ cause: response,
+ });
+ }
+
+ // Wait before polling again
+ await delay(pollIntervalMs);
+ }
+
+ return {
+ text: response.text,
+ segments:
+ response.chunks?.map(chunk => ({
+ text: chunk.text,
+ startSecond: chunk.timestamp?.at(0) ?? 0,
+ endSecond: chunk.timestamp?.at(1) ?? 0,
+ })) ?? [],
+ language: response.inferred_languages?.at(0) ?? undefined,
+ durationInSeconds: response.chunks?.at(-1)?.timestamp?.at(1) ?? undefined,
+ warnings,
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
+
+const falJobResponseSchema = z.object({
+ request_id: z.string().nullish(),
+});
+
+const falTranscriptionResponseSchema = z.object({
+ text: z.string(),
+ chunks: z
+ .array(
+ z.object({
+ text: z.string(),
+ timestamp: z.array(z.number()).nullish(),
+ }),
+ )
+ .nullish(),
+ inferred_languages: z.array(z.string()).nullish(),
+});
diff --git a/packages/fal/src/fal-transcription-settings.ts b/packages/fal/src/fal-transcription-settings.ts
new file mode 100644
index 000000000000..733dd7772634
--- /dev/null
+++ b/packages/fal/src/fal-transcription-settings.ts
@@ -0,0 +1 @@
+export type FalTranscriptionModelId = 'whisper' | 'wizper' | (string & {});
diff --git a/packages/fal/src/transcript-test.mp3 b/packages/fal/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/fal/src/transcript-test.mp3 differ
diff --git a/packages/fireworks/CHANGELOG.md b/packages/fireworks/CHANGELOG.md
index 2126488c81f1..d92a0909deb0 100644
--- a/packages/fireworks/CHANGELOG.md
+++ b/packages/fireworks/CHANGELOG.md
@@ -1,5 +1,76 @@
# @ai-sdk/fireworks
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/openai-compatible@0.2.14
+
+## 0.2.13
+
+### Patch Changes
+
+- Updated dependencies [23571c9]
+ - @ai-sdk/openai-compatible@0.2.13
+
+## 0.2.12
+
+### Patch Changes
+
+- Updated dependencies [13492fe]
+ - @ai-sdk/openai-compatible@0.2.12
+
+## 0.2.11
+
+### Patch Changes
+
+- Updated dependencies [b5c9cd4]
+ - @ai-sdk/openai-compatible@0.2.11
+
+## 0.2.10
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/openai-compatible@0.2.10
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.2.9
+
+### Patch Changes
+
+- Updated dependencies [1bbc698]
+ - @ai-sdk/openai-compatible@0.2.9
+
+## 0.2.8
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/openai-compatible@0.2.8
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.2.7
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/openai-compatible@0.2.7
+
+## 0.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/openai-compatible@0.2.6
+
## 0.2.5
### Patch Changes
diff --git a/packages/fireworks/README.md b/packages/fireworks/README.md
index a5bdc7e5cadd..eedaba70284e 100644
--- a/packages/fireworks/README.md
+++ b/packages/fireworks/README.md
@@ -1,6 +1,6 @@
# AI SDK - Fireworks Provider
-The **[Fireworks provider](https://sdk.vercel.ai/providers/ai-sdk-providers/fireworks)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model and image model support for the [Fireworks](https://fireworks.ai) platform.
+The **[Fireworks provider](https://ai-sdk.dev/providers/ai-sdk-providers/fireworks)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model and image model support for the [Fireworks](https://fireworks.ai) platform.
## Setup
@@ -48,4 +48,4 @@ console.log(`Image saved to ${filename}`);
## Documentation
-Please check out the **[Fireworks provider](https://sdk.vercel.ai/providers/ai-sdk-providers/fireworks)** for more information.
+Please check out the **[Fireworks provider](https://ai-sdk.dev/providers/ai-sdk-providers/fireworks)** for more information.
diff --git a/packages/fireworks/package.json b/packages/fireworks/package.json
index a21951f0aa80..1253d5bef6da 100644
--- a/packages/fireworks/package.json
+++ b/packages/fireworks/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/fireworks",
- "version": "0.2.5",
+ "version": "0.2.14",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,9 +30,9 @@
}
},
"dependencies": {
- "@ai-sdk/openai-compatible": "0.2.5",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/openai-compatible": "0.2.14",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/gladia/CHANGELOG.md b/packages/gladia/CHANGELOG.md
new file mode 100644
index 000000000000..c65e98a73cb3
--- /dev/null
+++ b/packages/gladia/CHANGELOG.md
@@ -0,0 +1,7 @@
+# @ai-sdk/gladia
+
+## 0.0.1
+
+### Patch Changes
+
+- e6e1cd9: feat(providers/gladia): add transcribe
diff --git a/packages/gladia/README.md b/packages/gladia/README.md
new file mode 100644
index 000000000000..a594c6b5629d
--- /dev/null
+++ b/packages/gladia/README.md
@@ -0,0 +1,38 @@
+# AI SDK - Gladia Provider
+
+The **[Gladia provider](https://ai-sdk.dev/providers/ai-sdk-providers/assemblyai)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains transcription model support for the Gladia transcription API.
+
+## Setup
+
+The Gladia provider is available in the `@ai-sdk/gladia` module. You can install it with
+
+```bash
+npm i @ai-sdk/gladia
+```
+
+## Provider Instance
+
+You can import the default provider instance `gladia` from `@ai-sdk/gladia`:
+
+```ts
+import { gladia } from '@ai-sdk/gladia';
+```
+
+## Example
+
+```ts
+import { gladia } from '@ai-sdk/gladia';
+import { experimental_transcribe as transcribe } from 'ai';
+
+const { text } = await transcribe({
+ model: gladia.transcription(),
+ audio: new URL(
+ 'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-core/data/galileo.mp3',
+ ),
+});
+```
+
+## Documentation
+
+Please check out the **[Gladia provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/gladia)** for more information.
diff --git a/packages/gladia/package.json b/packages/gladia/package.json
new file mode 100644
index 000000000000..2dd40913a7e8
--- /dev/null
+++ b/packages/gladia/package.json
@@ -0,0 +1,64 @@
+{
+ "name": "@ai-sdk/gladia",
+ "version": "0.0.1",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.2",
+ "@ai-sdk/provider-utils": "2.2.6"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/gladia/src/gladia-api-types.ts b/packages/gladia/src/gladia-api-types.ts
new file mode 100644
index 000000000000..2fc9930eb16b
--- /dev/null
+++ b/packages/gladia/src/gladia-api-types.ts
@@ -0,0 +1,134 @@
+export type GladiaTranscriptionInitiateAPITypes = {
+ /** URL to a Gladia file or to an external audio or video file */
+ audio_url: string;
+ /** [Alpha] Context to feed the transcription model with for possible better accuracy */
+ context_prompt?: string;
+ /** [Beta] Can be either boolean to enable custom_vocabulary or an array with specific vocabulary */
+ custom_vocabulary?: boolean | any[];
+ /** [Beta] Custom vocabulary configuration */
+ custom_vocabulary_config?: {
+ /** Vocabulary array with string or object containing value, intensity, pronunciations, and language */
+ vocabulary: Array<
+ | string
+ | {
+ /** Vocabulary value */
+ value: string;
+ /** Intensity of the vocabulary */
+ intensity?: number;
+ /** Pronunciation variations */
+ pronunciations?: string[];
+ /** Language of the vocabulary */
+ language?: string;
+ }
+ >;
+ /** Default intensity for vocabulary */
+ default_intensity?: number;
+ };
+ /** Detect the language from the given audio */
+ detect_language?: boolean;
+ /** Detect multiple languages in the given audio */
+ enable_code_switching?: boolean;
+ /** Configuration for code-switching */
+ code_switching_config?: {
+ /** Specify the languages you want to use when detecting multiple languages */
+ languages?: string[];
+ };
+ /** The original language in iso639-1 format */
+ language?: string;
+ /** Enable callback for this transcription */
+ callback?: boolean;
+ /** Configuration for callback */
+ callback_config?: {
+ /** The URL to be called with the result of the transcription */
+ url: string;
+ /** The HTTP method to be used */
+ method?: 'POST' | 'PUT';
+ };
+ /** Enable subtitles generation for this transcription */
+ subtitles?: boolean;
+ /** Configuration for subtitles */
+ subtitles_config?: {
+ /** Subtitles formats */
+ formats?: ('srt' | 'vtt')[];
+ /** Minimum duration of a subtitle in seconds */
+ minimum_duration?: number;
+ /** Maximum duration of a subtitle in seconds */
+ maximum_duration?: number;
+ /** Maximum number of characters per row */
+ maximum_characters_per_row?: number;
+ /** Maximum number of rows per caption */
+ maximum_rows_per_caption?: number;
+ /** Style of the subtitles */
+ style?: 'default' | 'compliance';
+ };
+ /** Enable speaker recognition (diarization) for this audio */
+ diarization?: boolean;
+ /** Configuration for diarization */
+ diarization_config?: {
+ /** Exact number of speakers in the audio */
+ number_of_speakers?: number;
+ /** Minimum number of speakers in the audio */
+ min_speakers?: number;
+ /** Maximum number of speakers in the audio */
+ max_speakers?: number;
+ /** [Alpha] Use enhanced diarization for this audio */
+ enhanced?: boolean;
+ };
+ /** [Beta] Enable translation for this audio */
+ translation?: boolean;
+ /** Configuration for translation */
+ translation_config?: {
+ /** The target language in iso639-1 format */
+ target_languages: string[];
+ /** Model for translation */
+ model?: 'base' | 'enhanced';
+ /** Align translated utterances with the original ones */
+ match_original_utterances?: boolean;
+ };
+ /** [Beta] Enable summarization for this audio */
+ summarization?: boolean;
+ /** Configuration for summarization */
+ summarization_config?: {
+ /** The type of summarization to apply */
+ type?: 'general' | 'bullet_points' | 'concise';
+ };
+ /** [Alpha] Enable moderation for this audio */
+ moderation?: boolean;
+ /** [Alpha] Enable named entity recognition for this audio */
+ named_entity_recognition?: boolean;
+ /** [Alpha] Enable chapterization for this audio */
+ chapterization?: boolean;
+ /** [Alpha] Enable names consistency for this audio */
+ name_consistency?: boolean;
+ /** [Alpha] Enable custom spelling for this audio */
+ custom_spelling?: boolean;
+ /** Configuration for custom spelling */
+ custom_spelling_config?: {
+ /** The list of spelling applied on the audio transcription */
+ spelling_dictionary: Record;
+ };
+ /** [Alpha] Enable structured data extraction for this audio */
+ structured_data_extraction?: boolean;
+ /** Configuration for structured data extraction */
+ structured_data_extraction_config?: {
+ /** The list of classes to extract from the audio transcription */
+ classes: string[];
+ };
+ /** [Alpha] Enable sentiment analysis for this audio */
+ sentiment_analysis?: boolean;
+ /** [Alpha] Enable audio to llm processing for this audio */
+ audio_to_llm?: boolean;
+ /** Configuration for audio to llm */
+ audio_to_llm_config?: {
+ /** The list of prompts applied on the audio transcription */
+ prompts: string[];
+ };
+ /** Custom metadata you can attach to this transcription */
+ custom_metadata?: Record;
+ /** Enable sentences for this audio */
+ sentences?: boolean;
+ /** [Alpha] Allows to change the output display_mode for this audio */
+ display_mode?: boolean;
+ /** [Alpha] Use enhanced punctuation for this audio */
+ punctuation_enhanced?: boolean;
+};
diff --git a/packages/gladia/src/gladia-config.ts b/packages/gladia/src/gladia-config.ts
new file mode 100644
index 000000000000..3e82d8ec6778
--- /dev/null
+++ b/packages/gladia/src/gladia-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type GladiaConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/gladia/src/gladia-error.test.ts b/packages/gladia/src/gladia-error.test.ts
new file mode 100644
index 000000000000..e7f9ab4d804b
--- /dev/null
+++ b/packages/gladia/src/gladia-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { gladiaErrorDataSchema } from './gladia-error';
+
+describe('gladiaErrorDataSchema', () => {
+ it('should parse Gladia resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: gladiaErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/gladia/src/gladia-error.ts b/packages/gladia/src/gladia-error.ts
new file mode 100644
index 000000000000..ecf3989c2404
--- /dev/null
+++ b/packages/gladia/src/gladia-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const gladiaErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type GladiaErrorData = z.infer;
+
+export const gladiaFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: gladiaErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/gladia/src/gladia-provider.ts b/packages/gladia/src/gladia-provider.ts
new file mode 100644
index 000000000000..16e27cac8664
--- /dev/null
+++ b/packages/gladia/src/gladia-provider.ts
@@ -0,0 +1,72 @@
+import { TranscriptionModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { GladiaTranscriptionModel } from './gladia-transcription-model';
+
+export interface GladiaProvider extends Pick {
+ (): {
+ transcription: GladiaTranscriptionModel;
+ };
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(): TranscriptionModelV1;
+}
+
+export interface GladiaProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create a Gladia provider instance.
+ */
+export function createGladia(
+ options: GladiaProviderSettings = {},
+): GladiaProvider {
+ const getHeaders = () => ({
+ 'x-gladia-key': loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'GLADIA_API_KEY',
+ description: 'Gladia',
+ }),
+ ...options.headers,
+ });
+
+ const createTranscriptionModel = () =>
+ new GladiaTranscriptionModel('default', {
+ provider: `gladia.transcription`,
+ url: ({ path }) => `https://api.gladia.io${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function () {
+ return {
+ transcription: createTranscriptionModel(),
+ };
+ };
+
+ provider.transcription = createTranscriptionModel;
+ provider.transcriptionModel = createTranscriptionModel;
+
+ return provider as GladiaProvider;
+}
+
+/**
+Default Gladia provider instance.
+ */
+export const gladia = createGladia();
diff --git a/packages/gladia/src/gladia-transcription-model.test.ts b/packages/gladia/src/gladia-transcription-model.test.ts
new file mode 100644
index 000000000000..aba9fb5f1afd
--- /dev/null
+++ b/packages/gladia/src/gladia-transcription-model.test.ts
@@ -0,0 +1,222 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { GladiaTranscriptionModel } from './gladia-transcription-model';
+import { createGladia } from './gladia-provider';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createGladia({ apiKey: 'test-api-key' });
+const model = provider.transcription();
+
+const server = createTestServer({
+ 'https://api.gladia.io/v2/upload': {
+ response: {
+ type: 'json-value',
+ body: {
+ audio_url: 'https://storage.gladia.io/mock-upload-url',
+ audio_metadata: {
+ id: 'test-id',
+ filename: 'test-file.mp3',
+ extension: 'mp3',
+ size: 1024,
+ audio_duration: 60,
+ number_of_channels: 2,
+ },
+ },
+ },
+ },
+ 'https://api.gladia.io/v2/pre-recorded': {},
+ 'https://api.gladia.io/v2/transcription/test-id': {},
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ // No need to set the upload response here as it's already set in the server creation
+ server.urls['https://api.gladia.io/v2/pre-recorded'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ id: 'test-id',
+ result_url: 'https://api.gladia.io/v2/transcription/test-id',
+ },
+ };
+ server.urls['https://api.gladia.io/v2/transcription/test-id'].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ id: '45463597-20b7-4af7-b3b3-f5fb778203ab',
+ request_id: 'G-45463597',
+ version: 2,
+ status: 'done',
+ created_at: '2023-12-28T09:04:17.210Z',
+ completed_at: '2023-12-28T09:04:37.210Z',
+ custom_metadata: {},
+ error_code: null,
+ kind: 'pre-recorded',
+ file: {
+ id: 'test-id',
+ filename: 'test-file.mp3',
+ source: 'upload',
+ audio_duration: 60,
+ number_of_channels: 2,
+ },
+ request_params: {
+ audio_url: 'https://storage.gladia.io/mock-upload-url',
+ },
+ result: {
+ metadata: {
+ audio_duration: 60,
+ number_of_distinct_channels: 2,
+ billing_time: 60,
+ transcription_time: 20,
+ },
+ transcription: {
+ full_transcript: 'Smoke from hundreds of wildfires.',
+ languages: ['en'],
+ utterances: [
+ {
+ language: 'en',
+ start: 0,
+ end: 3,
+ confidence: 0.95,
+ channel: 1,
+ speaker: 1,
+ words: [
+ {
+ word: 'Smoke',
+ start: 0,
+ end: 1,
+ confidence: 0.95,
+ },
+ {
+ word: 'from',
+ start: 1,
+ end: 2,
+ confidence: 0.95,
+ },
+ {
+ word: 'hundreds',
+ start: 2,
+ end: 3,
+ confidence: 0.95,
+ },
+ ],
+ text: 'Smoke from hundreds of wildfires.',
+ },
+ ],
+ },
+ },
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[1].requestBody).toMatchObject({
+ audio_url: 'https://storage.gladia.io/mock-upload-url',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createGladia({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription().doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[1].requestHeaders).toMatchObject({
+ 'x-gladia-key': 'test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Smoke from hundreds of wildfires.');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new GladiaTranscriptionModel('default', {
+ provider: 'test-provider',
+ url: ({ path }) => `https://api.gladia.io${path}`,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'default',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new GladiaTranscriptionModel('default', {
+ provider: 'test-provider',
+ url: ({ path }) => `https://api.gladia.io${path}`,
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('default');
+ });
+});
diff --git a/packages/gladia/src/gladia-transcription-model.ts b/packages/gladia/src/gladia-transcription-model.ts
new file mode 100644
index 000000000000..0f2b44c8af8b
--- /dev/null
+++ b/packages/gladia/src/gladia-transcription-model.ts
@@ -0,0 +1,650 @@
+import {
+ AISDKError,
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ convertBase64ToUint8Array,
+ createJsonResponseHandler,
+ delay,
+ getFromApi,
+ parseProviderOptions,
+ postFormDataToApi,
+ postJsonToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { GladiaConfig } from './gladia-config';
+import { gladiaFailedResponseHandler } from './gladia-error';
+import { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
+
+// https://docs.gladia.io/api-reference/v2/pre-recorded/init
+const gladiaProviderOptionsSchema = z.object({
+ /**
+ * Optional context prompt to guide the transcription.
+ */
+ contextPrompt: z.string().nullish(),
+
+ /**
+ * Custom vocabulary to improve transcription accuracy.
+ * Can be a boolean or an array of custom terms.
+ */
+ customVocabulary: z.union([z.boolean(), z.array(z.any())]).nullish(),
+
+ /**
+ * Configuration for custom vocabulary.
+ */
+ customVocabularyConfig: z
+ .object({
+ /**
+ * Array of vocabulary terms or objects with pronunciation details.
+ */
+ vocabulary: z.array(
+ z.union([
+ z.string(),
+ z.object({
+ /**
+ * The vocabulary term.
+ */
+ value: z.string(),
+ /**
+ * Intensity of the term in recognition (optional).
+ */
+ intensity: z.number().nullish(),
+ /**
+ * Alternative pronunciations for the term (optional).
+ */
+ pronunciations: z.array(z.string()).nullish(),
+ /**
+ * Language of the term (optional).
+ */
+ language: z.string().nullish(),
+ }),
+ ]),
+ ),
+ /**
+ * Default intensity for all vocabulary terms.
+ */
+ defaultIntensity: z.number().nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to automatically detect the language of the audio.
+ */
+ detectLanguage: z.boolean().nullish(),
+
+ /**
+ * Whether to enable code switching (multiple languages in the same audio).
+ */
+ enableCodeSwitching: z.boolean().nullish(),
+
+ /**
+ * Configuration for code switching.
+ */
+ codeSwitchingConfig: z
+ .object({
+ /**
+ * Languages to consider for code switching.
+ */
+ languages: z.array(z.string()).nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Specific language for transcription.
+ */
+ language: z.string().nullish(),
+
+ /**
+ * Whether to enable callback when transcription is complete.
+ */
+ callback: z.boolean().nullish(),
+
+ /**
+ * Configuration for callback.
+ */
+ callbackConfig: z
+ .object({
+ /**
+ * URL to send the callback to.
+ */
+ url: z.string(),
+ /**
+ * HTTP method for the callback.
+ */
+ method: z.enum(['POST', 'PUT']).nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to generate subtitles.
+ */
+ subtitles: z.boolean().nullish(),
+
+ /**
+ * Configuration for subtitles generation.
+ */
+ subtitlesConfig: z
+ .object({
+ /**
+ * Subtitle file formats to generate.
+ */
+ formats: z.array(z.enum(['srt', 'vtt'])).nullish(),
+ /**
+ * Minimum duration for subtitle segments.
+ */
+ minimumDuration: z.number().nullish(),
+ /**
+ * Maximum duration for subtitle segments.
+ */
+ maximumDuration: z.number().nullish(),
+ /**
+ * Maximum characters per row in subtitles.
+ */
+ maximumCharactersPerRow: z.number().nullish(),
+ /**
+ * Maximum rows per caption in subtitles.
+ */
+ maximumRowsPerCaption: z.number().nullish(),
+ /**
+ * Style of subtitles.
+ */
+ style: z.enum(['default', 'compliance']).nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to enable speaker diarization (speaker identification).
+ */
+ diarization: z.boolean().nullish(),
+
+ /**
+ * Configuration for diarization.
+ */
+ diarizationConfig: z
+ .object({
+ /**
+ * Exact number of speakers to identify.
+ */
+ numberOfSpeakers: z.number().nullish(),
+ /**
+ * Minimum number of speakers to identify.
+ */
+ minSpeakers: z.number().nullish(),
+ /**
+ * Maximum number of speakers to identify.
+ */
+ maxSpeakers: z.number().nullish(),
+ /**
+ * Whether to use enhanced diarization.
+ */
+ enhanced: z.boolean().nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to translate the transcription.
+ */
+ translation: z.boolean().nullish(),
+
+ /**
+ * Configuration for translation.
+ */
+ translationConfig: z
+ .object({
+ /**
+ * Target languages for translation.
+ */
+ targetLanguages: z.array(z.string()),
+ /**
+ * Translation model to use.
+ */
+ model: z.enum(['base', 'enhanced']).nullish(),
+ /**
+ * Whether to match original utterances in translation.
+ */
+ matchOriginalUtterances: z.boolean().nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to generate a summary of the transcription.
+ */
+ summarization: z.boolean().nullish(),
+
+ /**
+ * Configuration for summarization.
+ */
+ summarizationConfig: z
+ .object({
+ /**
+ * Type of summary to generate.
+ */
+ type: z.enum(['general', 'bullet_points', 'concise']).nullish(),
+ })
+ .nullish(),
+
+ /**
+ * Whether to enable content moderation.
+ */
+ moderation: z.boolean().nullish(),
+
+ /**
+ * Whether to enable named entity recognition.
+ */
+ namedEntityRecognition: z.boolean().nullish(),
+
+ /**
+ * Whether to enable automatic chapter creation.
+ */
+ chapterization: z.boolean().nullish(),
+
+ /**
+ * Whether to ensure consistent naming of entities.
+ */
+ nameConsistency: z.boolean().nullish(),
+
+ /**
+ * Whether to enable custom spelling.
+ */
+ customSpelling: z.boolean().nullish(),
+
+ /**
+ * Configuration for custom spelling.
+ */
+ customSpellingConfig: z
+ .object({
+ /**
+ * Dictionary of custom spellings.
+ */
+ spellingDictionary: z.record(z.array(z.string())),
+ })
+ .nullish(),
+
+ /**
+ * Whether to extract structured data from the transcription.
+ */
+ structuredDataExtraction: z.boolean().nullish(),
+
+ /**
+ * Configuration for structured data extraction.
+ */
+ structuredDataExtractionConfig: z
+ .object({
+ /**
+ * Classes of data to extract.
+ */
+ classes: z.array(z.string()),
+ })
+ .nullish(),
+
+ /**
+ * Whether to perform sentiment analysis on the transcription.
+ */
+ sentimentAnalysis: z.boolean().nullish(),
+
+ /**
+ * Whether to send audio to a language model for processing.
+ */
+ audioToLlm: z.boolean().nullish(),
+
+ /**
+ * Configuration for audio to language model processing.
+ */
+ audioToLlmConfig: z
+ .object({
+ /**
+ * Prompts to send to the language model.
+ */
+ prompts: z.array(z.string()),
+ })
+ .nullish(),
+
+ /**
+ * Custom metadata to include with the transcription.
+ */
+ customMetadata: z.record(z.any()).nullish(),
+
+ /**
+ * Whether to include sentence-level segmentation.
+ */
+ sentences: z.boolean().nullish(),
+
+ /**
+ * Whether to enable display mode.
+ */
+ displayMode: z.boolean().nullish(),
+
+ /**
+ * Whether to enhance punctuation in the transcription.
+ */
+ punctuationEnhanced: z.boolean().nullish(),
+});
+
+export type GladiaTranscriptionCallOptions = z.infer<
+ typeof gladiaProviderOptionsSchema
+>;
+
+interface GladiaTranscriptionModelConfig extends GladiaConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class GladiaTranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: 'default',
+ private readonly config: GladiaTranscriptionModelConfig,
+ ) {}
+
+ private async getArgs({
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const gladiaOptions = parseProviderOptions({
+ provider: 'gladia',
+ providerOptions,
+ schema: gladiaProviderOptionsSchema,
+ });
+
+ const body: Omit = {};
+
+ // Add provider-specific options
+ if (gladiaOptions) {
+ body.context_prompt = gladiaOptions.contextPrompt ?? undefined;
+ body.custom_vocabulary = gladiaOptions.customVocabulary ?? undefined;
+ body.detect_language = gladiaOptions.detectLanguage ?? undefined;
+ body.enable_code_switching =
+ gladiaOptions.enableCodeSwitching ?? undefined;
+ body.language = gladiaOptions.language ?? undefined;
+ body.callback = gladiaOptions.callback ?? undefined;
+ body.subtitles = gladiaOptions.subtitles ?? undefined;
+ body.diarization = gladiaOptions.diarization ?? undefined;
+ body.translation = gladiaOptions.translation ?? undefined;
+ body.summarization = gladiaOptions.summarization ?? undefined;
+ body.moderation = gladiaOptions.moderation ?? undefined;
+ body.named_entity_recognition =
+ gladiaOptions.namedEntityRecognition ?? undefined;
+ body.chapterization = gladiaOptions.chapterization ?? undefined;
+ body.name_consistency = gladiaOptions.nameConsistency ?? undefined;
+ body.custom_spelling = gladiaOptions.customSpelling ?? undefined;
+ body.structured_data_extraction =
+ gladiaOptions.structuredDataExtraction ?? undefined;
+ body.structured_data_extraction_config =
+ gladiaOptions.structuredDataExtractionConfig ?? undefined;
+ body.sentiment_analysis = gladiaOptions.sentimentAnalysis ?? undefined;
+ body.audio_to_llm = gladiaOptions.audioToLlm ?? undefined;
+ body.audio_to_llm_config = gladiaOptions.audioToLlmConfig ?? undefined;
+ body.custom_metadata = gladiaOptions.customMetadata ?? undefined;
+ body.sentences = gladiaOptions.sentences ?? undefined;
+ body.display_mode = gladiaOptions.displayMode ?? undefined;
+ body.punctuation_enhanced =
+ gladiaOptions.punctuationEnhanced ?? undefined;
+
+ if (gladiaOptions.customVocabularyConfig) {
+ body.custom_vocabulary_config = {
+ vocabulary: gladiaOptions.customVocabularyConfig.vocabulary.map(
+ item => {
+ if (typeof item === 'string') return item;
+ return {
+ value: item.value,
+ intensity: item.intensity ?? undefined,
+ pronunciations: item.pronunciations ?? undefined,
+ language: item.language ?? undefined,
+ };
+ },
+ ),
+ default_intensity:
+ gladiaOptions.customVocabularyConfig.defaultIntensity ?? undefined,
+ };
+ }
+
+ // Handle code switching config
+ if (gladiaOptions.codeSwitchingConfig) {
+ body.code_switching_config = {
+ languages: gladiaOptions.codeSwitchingConfig.languages ?? undefined,
+ };
+ }
+
+ // Handle callback config
+ if (gladiaOptions.callbackConfig) {
+ body.callback_config = {
+ url: gladiaOptions.callbackConfig.url,
+ method: gladiaOptions.callbackConfig.method ?? undefined,
+ };
+ }
+
+ // Handle subtitles config
+ if (gladiaOptions.subtitlesConfig) {
+ body.subtitles_config = {
+ formats: gladiaOptions.subtitlesConfig.formats ?? undefined,
+ minimum_duration:
+ gladiaOptions.subtitlesConfig.minimumDuration ?? undefined,
+ maximum_duration:
+ gladiaOptions.subtitlesConfig.maximumDuration ?? undefined,
+ maximum_characters_per_row:
+ gladiaOptions.subtitlesConfig.maximumCharactersPerRow ?? undefined,
+ maximum_rows_per_caption:
+ gladiaOptions.subtitlesConfig.maximumRowsPerCaption ?? undefined,
+ style: gladiaOptions.subtitlesConfig.style ?? undefined,
+ };
+ }
+
+ // Handle diarization config
+ if (gladiaOptions.diarizationConfig) {
+ body.diarization_config = {
+ number_of_speakers:
+ gladiaOptions.diarizationConfig.numberOfSpeakers ?? undefined,
+ min_speakers:
+ gladiaOptions.diarizationConfig.minSpeakers ?? undefined,
+ max_speakers:
+ gladiaOptions.diarizationConfig.maxSpeakers ?? undefined,
+ enhanced: gladiaOptions.diarizationConfig.enhanced ?? undefined,
+ };
+ }
+
+ // Handle translation config
+ if (gladiaOptions.translationConfig) {
+ body.translation_config = {
+ target_languages: gladiaOptions.translationConfig.targetLanguages,
+ model: gladiaOptions.translationConfig.model ?? undefined,
+ match_original_utterances:
+ gladiaOptions.translationConfig.matchOriginalUtterances ??
+ undefined,
+ };
+ }
+
+ // Handle summarization config
+ if (gladiaOptions.summarizationConfig) {
+ body.summarization_config = {
+ type: gladiaOptions.summarizationConfig.type ?? undefined,
+ };
+ }
+
+ // Handle custom spelling config
+ if (gladiaOptions.customSpellingConfig) {
+ body.custom_spelling_config = {
+ spelling_dictionary:
+ gladiaOptions.customSpellingConfig.spellingDictionary,
+ };
+ }
+ }
+
+ return {
+ body,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+
+ // Create form data with base fields
+ const formData = new FormData();
+ const blob =
+ options.audio instanceof Uint8Array
+ ? new Blob([options.audio])
+ : new Blob([convertBase64ToUint8Array(options.audio)]);
+
+ formData.append('model', this.modelId);
+ formData.append(
+ 'audio',
+ new File([blob], 'audio', { type: options.mediaType }),
+ );
+
+ const { value: uploadResponse } = await postFormDataToApi({
+ url: this.config.url({
+ path: '/v2/upload',
+ modelId: 'default',
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ formData,
+ failedResponseHandler: gladiaFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ gladiaUploadResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ const { body, warnings } = await this.getArgs(options);
+
+ const { value: transcriptionInitResponse } = await postJsonToApi({
+ url: this.config.url({
+ path: '/v2/pre-recorded',
+ modelId: 'default',
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ body: {
+ ...body,
+ audio_url: uploadResponse.audio_url,
+ },
+ failedResponseHandler: gladiaFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ gladiaTranscriptionInitializeResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ // Poll the result URL until the transcription is done or an error occurs
+ const resultUrl = transcriptionInitResponse.result_url;
+ let transcriptionResult;
+ let transcriptionResultHeaders;
+ const timeoutMs = 60 * 1000; // 60 seconds timeout
+ const startTime = Date.now();
+ const pollingInterval = 1000;
+
+ while (true) {
+ // Check if we've exceeded the timeout
+ if (Date.now() - startTime > timeoutMs) {
+ throw new AISDKError({
+ message: 'Transcription job polling timed out',
+ name: 'TranscriptionJobPollingTimedOut',
+ cause: transcriptionResult,
+ });
+ }
+
+ const response = await getFromApi({
+ url: resultUrl,
+ headers: combineHeaders(this.config.headers(), options.headers),
+ failedResponseHandler: gladiaFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ gladiaTranscriptionResultResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ transcriptionResult = response.value;
+ transcriptionResultHeaders = response.responseHeaders;
+
+ if (transcriptionResult.status === 'done') {
+ break;
+ }
+
+ if (transcriptionResult.status === 'error') {
+ throw new AISDKError({
+ message: 'Transcription job failed',
+ name: 'TranscriptionJobFailed',
+ cause: transcriptionResult,
+ });
+ }
+
+ // Wait for the configured polling interval before checking again
+ await delay(pollingInterval);
+ }
+
+ if (!transcriptionResult.result) {
+ throw new AISDKError({
+ message: 'Transcription result is empty',
+ name: 'TranscriptionResultEmpty',
+ cause: transcriptionResult,
+ });
+ }
+
+ // Process the successful result
+ return {
+ text: transcriptionResult.result.transcription.full_transcript,
+ durationInSeconds: transcriptionResult.result.metadata.audio_duration,
+ language: transcriptionResult.result.transcription.languages.at(0),
+ segments: transcriptionResult.result.transcription.utterances.map(
+ utterance => ({
+ text: utterance.text,
+ startSecond: utterance.start,
+ endSecond: utterance.end,
+ }),
+ ),
+ response: {
+ timestamp: currentDate,
+ modelId: 'default',
+ headers: transcriptionResultHeaders,
+ },
+ providerMetadata: {
+ gladia: transcriptionResult,
+ },
+ warnings,
+ };
+ }
+}
+
+const gladiaUploadResponseSchema = z.object({
+ audio_url: z.string(),
+});
+
+const gladiaTranscriptionInitializeResponseSchema = z.object({
+ result_url: z.string(),
+});
+
+const gladiaTranscriptionResultResponseSchema = z.object({
+ status: z.enum(['queued', 'processing', 'done', 'error']),
+ result: z
+ .object({
+ metadata: z.object({
+ audio_duration: z.number(),
+ }),
+ transcription: z.object({
+ full_transcript: z.string(),
+ languages: z.array(z.string()),
+ utterances: z.array(
+ z.object({
+ start: z.number(),
+ end: z.number(),
+ text: z.string(),
+ }),
+ ),
+ }),
+ })
+ .nullish(),
+});
diff --git a/packages/gladia/src/index.ts b/packages/gladia/src/index.ts
new file mode 100644
index 000000000000..4e5bf690e0d8
--- /dev/null
+++ b/packages/gladia/src/index.ts
@@ -0,0 +1,2 @@
+export { createGladia, gladia } from './gladia-provider';
+export type { GladiaProvider, GladiaProviderSettings } from './gladia-provider';
diff --git a/packages/gladia/src/transcript-test.mp3 b/packages/gladia/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/gladia/src/transcript-test.mp3 differ
diff --git a/packages/gladia/tsconfig.json b/packages/gladia/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/gladia/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/gladia/tsup.config.ts b/packages/gladia/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/gladia/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/gladia/turbo.json b/packages/gladia/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/gladia/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/gladia/vitest.edge.config.js b/packages/gladia/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/gladia/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/gladia/vitest.node.config.js b/packages/gladia/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/gladia/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/google-vertex/CHANGELOG.md b/packages/google-vertex/CHANGELOG.md
index ca17793d1136..2031cce08743 100644
--- a/packages/google-vertex/CHANGELOG.md
+++ b/packages/google-vertex/CHANGELOG.md
@@ -1,5 +1,127 @@
# @ai-sdk/google-vertex
+## 2.2.22
+
+### Patch Changes
+
+- fe24216: Add reasoning token output support for gemini models via Vertex AI Provider
+- Updated dependencies [4b2e1b0]
+ - @ai-sdk/google@1.2.18
+
+## 2.2.21
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+ - @ai-sdk/anthropic@1.2.11
+ - @ai-sdk/google@1.2.17
+
+## 2.2.20
+
+### Patch Changes
+
+- Updated dependencies [0ca6f2f]
+ - @ai-sdk/google@1.2.16
+
+## 2.2.19
+
+### Patch Changes
+
+- Updated dependencies [2afd354]
+ - @ai-sdk/google@1.2.15
+
+## 2.2.18
+
+### Patch Changes
+
+- a85ae99: feat (provider/google-vertex): add imagen-3.0-generate-002
+- Updated dependencies [c695a7e]
+ - @ai-sdk/google@1.2.14
+
+## 2.2.17
+
+### Patch Changes
+
+- Updated dependencies [6183b08]
+ - @ai-sdk/google@1.2.13
+
+## 2.2.16
+
+### Patch Changes
+
+- Updated dependencies [c56331d]
+ - @ai-sdk/google@1.2.12
+
+## 2.2.15
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/anthropic@1.2.10
+ - @ai-sdk/google@1.2.11
+ - @ai-sdk/provider-utils@2.2.7
+
+## 2.2.14
+
+### Patch Changes
+
+- Updated dependencies [aeba38e]
+ - @ai-sdk/anthropic@1.2.9
+
+## 2.2.13
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/anthropic@1.2.8
+ - @ai-sdk/google@1.2.10
+ - @ai-sdk/provider-utils@2.2.6
+
+## 2.2.12
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+ - @ai-sdk/anthropic@1.2.7
+ - @ai-sdk/google@1.2.9
+
+## 2.2.11
+
+### Patch Changes
+
+- Updated dependencies [1e8e66d]
+ - @ai-sdk/google@1.2.8
+
+## 2.2.10
+
+### Patch Changes
+
+- 1789884: feat: add provider option schemas for vertex imagegen and google genai
+- Updated dependencies [1789884]
+ - @ai-sdk/google@1.2.7
+
+## 2.2.9
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+ - @ai-sdk/anthropic@1.2.6
+ - @ai-sdk/google@1.2.6
+
+## 2.2.8
+
+### Patch Changes
+
+- 292f543: fix (provider/google-vertex): fix anthropic support for image urls in messages
+- Updated dependencies [292f543]
+ - @ai-sdk/anthropic@1.2.5
+
## 2.2.7
### Patch Changes
diff --git a/packages/google-vertex/README.md b/packages/google-vertex/README.md
index 6c69c1930c38..dc12f19e8fa7 100644
--- a/packages/google-vertex/README.md
+++ b/packages/google-vertex/README.md
@@ -1,6 +1,6 @@
# AI SDK - Google Vertex AI Provider
-The **[Google Vertex provider](https://sdk.vercel.ai/providers/ai-sdk-providers/google-vertex)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the [Google Vertex AI](https://cloud.google.com/vertex-ai) APIs.
+The **[Google Vertex provider](https://ai-sdk.dev/providers/ai-sdk-providers/google-vertex)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [Google Vertex AI](https://cloud.google.com/vertex-ai) APIs.
This library includes a Google Vertex Anthropic provider. This provider closely follows the core Google Vertex library's usage patterns. See more in the [Google Vertex Anthropic Provider](#google-vertex-anthropic-provider) section below.
@@ -218,4 +218,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Google Vertex provider](https://sdk.vercel.ai/providers/ai-sdk-providers/google-vertex)** for more information.
+Please check out the **[Google Vertex provider](https://ai-sdk.dev/providers/ai-sdk-providers/google-vertex)** for more information.
diff --git a/packages/google-vertex/package.json b/packages/google-vertex/package.json
index 4d96920378f6..93a2bc1831e9 100644
--- a/packages/google-vertex/package.json
+++ b/packages/google-vertex/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/google-vertex",
- "version": "2.2.7",
+ "version": "2.2.22",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -49,10 +49,10 @@
}
},
"dependencies": {
- "@ai-sdk/anthropic": "1.2.4",
- "@ai-sdk/google": "1.2.5",
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3",
+ "@ai-sdk/anthropic": "1.2.11",
+ "@ai-sdk/google": "1.2.18",
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8",
"google-auth-library": "^9.15.0"
},
"devDependencies": {
@@ -71,7 +71,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/google-vertex/src/anthropic/google-vertex-anthropic-provider.ts b/packages/google-vertex/src/anthropic/google-vertex-anthropic-provider.ts
index 1c14853eddc0..e80447d79852 100644
--- a/packages/google-vertex/src/anthropic/google-vertex-anthropic-provider.ts
+++ b/packages/google-vertex/src/anthropic/google-vertex-anthropic-provider.ts
@@ -100,6 +100,7 @@ export function createVertexAnthropic(
baseURL,
headers: options.headers ?? {},
fetch: options.fetch,
+ supportsImageUrls: false,
buildRequestUrl: (baseURL, isStreaming) =>
`${baseURL}/${modelId}:${
isStreaming ? 'streamRawPredict' : 'rawPredict'
diff --git a/packages/google-vertex/src/google-vertex-image-model.test.ts b/packages/google-vertex/src/google-vertex-image-model.test.ts
index 0a7dab1c6fe9..79935b79ed6a 100644
--- a/packages/google-vertex/src/google-vertex-image-model.test.ts
+++ b/packages/google-vertex/src/google-vertex-image-model.test.ts
@@ -4,7 +4,7 @@ import { GoogleVertexImageModel } from './google-vertex-image-model';
const prompt = 'A cute baby sea otter';
const model = new GoogleVertexImageModel(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{},
{
provider: 'google-vertex',
@@ -14,7 +14,7 @@ const model = new GoogleVertexImageModel(
);
const server = createTestServer({
- 'https://api.example.com/models/imagen-3.0-generate-001:predict': {},
+ 'https://api.example.com/models/imagen-3.0-generate-002:predict': {},
});
describe('GoogleVertexImageModel', () => {
@@ -25,7 +25,7 @@ describe('GoogleVertexImageModel', () => {
headers?: Record;
} = {}) {
server.urls[
- 'https://api.example.com/models/imagen-3.0-generate-001:predict'
+ 'https://api.example.com/models/imagen-3.0-generate-002:predict'
].response = {
type: 'json-value',
headers,
@@ -38,32 +38,11 @@ describe('GoogleVertexImageModel', () => {
};
}
- it('should pass the correct parameters', async () => {
- prepareJsonResponse();
-
- await model.doGenerate({
- prompt,
- n: 2,
- size: undefined,
- aspectRatio: undefined,
- seed: undefined,
- providerOptions: { vertex: { aspectRatio: '1:1' } },
- });
-
- expect(await server.calls[0].requestBody).toStrictEqual({
- instances: [{ prompt }],
- parameters: {
- sampleCount: 2,
- aspectRatio: '1:1',
- },
- });
- });
-
it('should pass headers', async () => {
prepareJsonResponse();
const modelWithHeaders = new GoogleVertexImageModel(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{},
{
provider: 'google-vertex',
@@ -95,7 +74,7 @@ describe('GoogleVertexImageModel', () => {
it('should respect maxImagesPerCall setting', () => {
const customModel = new GoogleVertexImageModel(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{ maxImagesPerCall: 2 },
{
provider: 'google-vertex',
@@ -109,7 +88,7 @@ describe('GoogleVertexImageModel', () => {
it('should use default maxImagesPerCall when not specified', () => {
const defaultModel = new GoogleVertexImageModel(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{},
{
provider: 'google-vertex',
@@ -143,13 +122,9 @@ describe('GoogleVertexImageModel', () => {
prompt: 'test prompt',
n: 1,
size: undefined,
- aspectRatio: undefined,
+ aspectRatio: '16:9',
seed: undefined,
- providerOptions: {
- vertex: {
- aspectRatio: '16:9',
- },
- },
+ providerOptions: {},
});
expect(await server.calls[0].requestBody).toStrictEqual({
@@ -214,7 +189,7 @@ describe('GoogleVertexImageModel', () => {
seed: 42,
providerOptions: {
vertex: {
- temperature: 0.8,
+ addWatermark: false,
},
},
});
@@ -225,7 +200,7 @@ describe('GoogleVertexImageModel', () => {
sampleCount: 1,
aspectRatio: '1:1',
seed: 42,
- temperature: 0.8,
+ addWatermark: false,
},
});
});
@@ -263,7 +238,7 @@ describe('GoogleVertexImageModel', () => {
const testDate = new Date('2024-03-15T12:00:00Z');
const customModel = new GoogleVertexImageModel(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{},
{
provider: 'google-vertex',
@@ -286,7 +261,7 @@ describe('GoogleVertexImageModel', () => {
expect(result.response).toStrictEqual({
timestamp: testDate,
- modelId: 'imagen-3.0-generate-001',
+ modelId: 'imagen-3.0-generate-002',
headers: {
'content-length': '97',
'content-type': 'application/json',
@@ -302,7 +277,7 @@ describe('GoogleVertexImageModel', () => {
const result = await model.doGenerate({
prompt,
- n: 1,
+ n: 2,
size: undefined,
aspectRatio: undefined,
seed: undefined,
@@ -317,7 +292,38 @@ describe('GoogleVertexImageModel', () => {
expect(result.response.timestamp.getTime()).toBeLessThanOrEqual(
afterDate.getTime(),
);
- expect(result.response.modelId).toBe('imagen-3.0-generate-001');
+ expect(result.response.modelId).toBe('imagen-3.0-generate-002');
+ });
+
+ it('should only pass valid provider options', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ prompt,
+ n: 2,
+ size: undefined,
+ aspectRatio: '16:9',
+ seed: undefined,
+ providerOptions: {
+ vertex: {
+ addWatermark: false,
+ negativePrompt: 'negative prompt',
+ personGeneration: 'allow_all',
+ foo: 'bar',
+ },
+ },
+ });
+
+ expect(await server.calls[0].requestBody).toStrictEqual({
+ instances: [{ prompt }],
+ parameters: {
+ sampleCount: 2,
+ addWatermark: false,
+ negativePrompt: 'negative prompt',
+ personGeneration: 'allow_all',
+ aspectRatio: '16:9',
+ },
+ });
});
});
});
diff --git a/packages/google-vertex/src/google-vertex-image-model.ts b/packages/google-vertex/src/google-vertex-image-model.ts
index c94d5ddc32d5..635b50df5445 100644
--- a/packages/google-vertex/src/google-vertex-image-model.ts
+++ b/packages/google-vertex/src/google-vertex-image-model.ts
@@ -3,6 +3,7 @@ import {
Resolvable,
combineHeaders,
createJsonResponseHandler,
+ parseProviderOptions,
postJsonToApi,
resolve,
} from '@ai-sdk/provider-utils';
@@ -65,13 +66,19 @@ export class GoogleVertexImageModel implements ImageModelV1 {
});
}
+ const vertexImageOptions = parseProviderOptions({
+ provider: 'vertex',
+ providerOptions,
+ schema: vertexImageProviderOptionsSchema,
+ });
+
const body = {
instances: [{ prompt }],
parameters: {
sampleCount: n,
...(aspectRatio != null ? { aspectRatio } : {}),
...(seed != null ? { seed } : {}),
- ...(providerOptions.vertex ?? {}),
+ ...(vertexImageOptions ?? {}),
},
};
@@ -108,3 +115,23 @@ export class GoogleVertexImageModel implements ImageModelV1 {
const vertexImageResponseSchema = z.object({
predictions: z.array(z.object({ bytesBase64Encoded: z.string() })).nullish(),
});
+
+const vertexImageProviderOptionsSchema = z.object({
+ negativePrompt: z.string().nullish(),
+ personGeneration: z
+ .enum(['dont_allow', 'allow_adult', 'allow_all'])
+ .nullish(),
+ safetySetting: z
+ .enum([
+ 'block_low_and_above',
+ 'block_medium_and_above',
+ 'block_only_high',
+ 'block_none',
+ ])
+ .nullish(),
+ addWatermark: z.boolean().nullish(),
+ storageUri: z.string().nullish(),
+});
+export type GoogleVertexImageProviderOptions = z.infer<
+ typeof vertexImageProviderOptionsSchema
+>;
diff --git a/packages/google-vertex/src/google-vertex-image-settings.ts b/packages/google-vertex/src/google-vertex-image-settings.ts
index d037657701f8..640c4bd9db20 100644
--- a/packages/google-vertex/src/google-vertex-image-settings.ts
+++ b/packages/google-vertex/src/google-vertex-image-settings.ts
@@ -1,5 +1,6 @@
export type GoogleVertexImageModelId =
| 'imagen-3.0-generate-001'
+ | 'imagen-3.0-generate-002'
| 'imagen-3.0-fast-generate-001'
| (string & {});
diff --git a/packages/google-vertex/src/google-vertex-provider.test.ts b/packages/google-vertex/src/google-vertex-provider.test.ts
index 2c34ca0a33ae..2233214e6e82 100644
--- a/packages/google-vertex/src/google-vertex-provider.test.ts
+++ b/packages/google-vertex/src/google-vertex-provider.test.ts
@@ -148,10 +148,10 @@ describe('google-vertex-provider', () => {
project: 'test-project',
location: 'test-location',
});
- provider.image('imagen-3.0-generate-001');
+ provider.image('imagen-3.0-generate-002');
expect(GoogleVertexImageModel).toHaveBeenCalledWith(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
{},
expect.objectContaining({
provider: 'google.vertex.image',
@@ -170,10 +170,10 @@ describe('google-vertex-provider', () => {
const imageSettings = {
maxImagesPerCall: 4,
};
- provider.image('imagen-3.0-generate-001', imageSettings);
+ provider.image('imagen-3.0-generate-002', imageSettings);
expect(GoogleVertexImageModel).toHaveBeenCalledWith(
- 'imagen-3.0-generate-001',
+ 'imagen-3.0-generate-002',
imageSettings,
expect.objectContaining({
provider: 'google.vertex.image',
diff --git a/packages/google-vertex/src/index.ts b/packages/google-vertex/src/index.ts
index bf1e0fad1033..e84f8e06d72f 100644
--- a/packages/google-vertex/src/index.ts
+++ b/packages/google-vertex/src/index.ts
@@ -1,3 +1,4 @@
+export type { GoogleVertexImageProviderOptions } from './google-vertex-image-model';
export { createVertex, vertex } from './google-vertex-provider-node';
export type {
GoogleVertexProvider,
diff --git a/packages/google/CHANGELOG.md b/packages/google/CHANGELOG.md
index 44da41030ded..2395a81917be 100644
--- a/packages/google/CHANGELOG.md
+++ b/packages/google/CHANGELOG.md
@@ -1,5 +1,91 @@
# @ai-sdk/google
+## 1.2.18
+
+### Patch Changes
+
+- 4b2e1b0: Add reasoning token output support for gemini models via Vertex AI Provider
+
+## 1.2.17
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.2.16
+
+### Patch Changes
+
+- 0ca6f2f: feat(providers/google): add gemini-2.5-pro-preview-05-06
+
+## 1.2.15
+
+### Patch Changes
+
+- 2afd354: fix(providers/google): accept nullish in safetyRatings
+
+## 1.2.14
+
+### Patch Changes
+
+- c695a7e: feat (provider/google): add new gemini models
+
+## 1.2.13
+
+### Patch Changes
+
+- 6183b08: feat(providers/google): Add taskType support for Text Embedding Models
+
+## 1.2.12
+
+### Patch Changes
+
+- c56331d: feat (providers/google): add thinking config to provider options
+
+## 1.2.11
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.2.10
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.2.9
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.2.8
+
+### Patch Changes
+
+- 1e8e66d: fix (provider/google): allow "OFF" for Google HarmBlockThreshold
+
+## 1.2.7
+
+### Patch Changes
+
+- 1789884: feat: add provider option schemas for vertex imagegen and google genai
+
+## 1.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 1.2.5
### Patch Changes
diff --git a/packages/google/README.md b/packages/google/README.md
index d64c30e8f9e0..eda1db8e4a8d 100644
--- a/packages/google/README.md
+++ b/packages/google/README.md
@@ -1,6 +1,6 @@
# AI SDK - Google Generative AI Provider
-The **[Google Generative AI provider](https://sdk.vercel.ai/providers/ai-sdk-providers/google-generative-ai)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the [Google Generative AI](https://ai.google/discover/generativeai/) APIs.
+The **[Google Generative AI provider](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [Google Generative AI](https://ai.google/discover/generativeai/) APIs.
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Google Generative AI provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/google-generative-ai)** for more information.
+Please check out the **[Google Generative AI provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai)** for more information.
diff --git a/packages/google/package.json b/packages/google/package.json
index 8ae46dd09ff0..b778bc36ebb1 100644
--- a/packages/google/package.json
+++ b/packages/google/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/google",
- "version": "1.2.5",
+ "version": "1.2.18",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -38,8 +38,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -57,7 +57,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/google/src/google-generative-ai-embedding-model.test.ts b/packages/google/src/google-generative-ai-embedding-model.test.ts
index 316790789f49..f199e13fe7c0 100644
--- a/packages/google/src/google-generative-ai-embedding-model.test.ts
+++ b/packages/google/src/google-generative-ai-embedding-model.test.ts
@@ -92,6 +92,22 @@ describe('GoogleGenerativeAIEmbeddingModel', () => {
});
});
+ it('should pass the taskType setting', async () => {
+ prepareJsonResponse();
+
+ await provider
+ .embedding('text-embedding-004', { taskType: 'SEMANTIC_SIMILARITY' })
+ .doEmbed({ values: testValues });
+
+ expect(await server.calls[0].requestBody).toStrictEqual({
+ requests: testValues.map(value => ({
+ model: 'models/text-embedding-004',
+ content: { role: 'user', parts: [{ text: value }] },
+ taskType: 'SEMANTIC_SIMILARITY',
+ })),
+ });
+ });
+
it('should pass headers', async () => {
prepareJsonResponse();
diff --git a/packages/google/src/google-generative-ai-embedding-model.ts b/packages/google/src/google-generative-ai-embedding-model.ts
index 395d1b23ef2f..fa69645f479b 100644
--- a/packages/google/src/google-generative-ai-embedding-model.ts
+++ b/packages/google/src/google-generative-ai-embedding-model.ts
@@ -83,6 +83,7 @@ export class GoogleGenerativeAIEmbeddingModel
model: `models/${this.modelId}`,
content: { role: 'user', parts: [{ text: value }] },
outputDimensionality: this.settings.outputDimensionality,
+ taskType: this.settings.taskType,
})),
},
failedResponseHandler: googleFailedResponseHandler,
diff --git a/packages/google/src/google-generative-ai-embedding-settings.ts b/packages/google/src/google-generative-ai-embedding-settings.ts
index d1e84b244277..b8931b12e6aa 100644
--- a/packages/google/src/google-generative-ai-embedding-settings.ts
+++ b/packages/google/src/google-generative-ai-embedding-settings.ts
@@ -8,4 +8,26 @@ export interface GoogleGenerativeAIEmbeddingSettings {
* If set, excessive values in the output embedding are truncated from the end.
*/
outputDimensionality?: number;
+
+ /**
+ * Optional. Specifies the task type for generating embeddings.
+ * Supported task types:
+ * - SEMANTIC_SIMILARITY: Optimized for text similarity.
+ * - CLASSIFICATION: Optimized for text classification.
+ * - CLUSTERING: Optimized for clustering texts based on similarity.
+ * - RETRIEVAL_DOCUMENT: Optimized for document retrieval.
+ * - RETRIEVAL_QUERY: Optimized for query-based retrieval.
+ * - QUESTION_ANSWERING: Optimized for answering questions.
+ * - FACT_VERIFICATION: Optimized for verifying factual information.
+ * - CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
+ */
+ taskType?:
+ | 'SEMANTIC_SIMILARITY'
+ | 'CLASSIFICATION'
+ | 'CLUSTERING'
+ | 'RETRIEVAL_DOCUMENT'
+ | 'RETRIEVAL_QUERY'
+ | 'QUESTION_ANSWERING'
+ | 'FACT_VERIFICATION'
+ | 'CODE_RETRIEVAL_QUERY';
}
diff --git a/packages/google/src/google-generative-ai-language-model.test.ts b/packages/google/src/google-generative-ai-language-model.test.ts
index 4a10794a8c2a..886a94252f9c 100644
--- a/packages/google/src/google-generative-ai-language-model.test.ts
+++ b/packages/google/src/google-generative-ai-language-model.test.ts
@@ -33,6 +33,21 @@ const SAFETY_RATINGS = [
},
];
+type TestTokenDetail = { modality: string; tokenCount: number };
+
+const TEST_PROMPT_TOKENS_DETAILS: TestTokenDetail[] = [
+ { modality: 'TEXT', tokenCount: 10 },
+];
+const TEST_CACHE_TOKENS_DETAILS: TestTokenDetail[] = [
+ { modality: 'IMAGE', tokenCount: 20 },
+];
+const TEST_CANDIDATES_TOKENS_DETAILS: TestTokenDetail[] = [
+ { modality: 'AUDIO', tokenCount: 30 },
+];
+const TEST_TOOL_USE_PROMPT_TOKENS_DETAILS: TestTokenDetail[] = [
+ { modality: 'VIDEO', tokenCount: 40 },
+];
+
const provider = createGoogleGenerativeAI({
apiKey: 'test-api-key',
generateId: () => 'test-id',
@@ -194,20 +209,32 @@ describe('doGenerate', () => {
const prepareJsonResponse = ({
content = '',
- usage = {
+ usageMetadata = {
promptTokenCount: 1,
candidatesTokenCount: 2,
totalTokenCount: 3,
+ cachedContentTokenCount: null,
+ thoughtsTokenCount: null,
+ promptTokensDetails: null,
+ cacheTokensDetails: null,
+ candidatesTokensDetails: null,
+ toolUsePromptTokensDetails: null,
},
headers,
groundingMetadata,
url = TEST_URL_GEMINI_PRO,
}: {
content?: string;
- usage?: {
- promptTokenCount: number;
- candidatesTokenCount: number;
- totalTokenCount: number;
+ usageMetadata?: {
+ promptTokenCount: number | null;
+ candidatesTokenCount: number | null;
+ totalTokenCount?: number | null;
+ cachedContentTokenCount?: number | null;
+ thoughtsTokenCount?: number | null;
+ promptTokensDetails?: TestTokenDetail[] | null;
+ cacheTokensDetails?: TestTokenDetail[] | null;
+ candidatesTokensDetails?: TestTokenDetail[] | null;
+ toolUsePromptTokensDetails?: TestTokenDetail[] | null;
};
headers?: Record;
groundingMetadata?: GoogleGenerativeAIGroundingMetadata;
@@ -235,7 +262,7 @@ describe('doGenerate', () => {
},
],
promptFeedback: { safetyRatings: SAFETY_RATINGS },
- usageMetadata: usage,
+ usageMetadata,
},
};
};
@@ -254,7 +281,7 @@ describe('doGenerate', () => {
it('should extract usage', async () => {
prepareJsonResponse({
- usage: {
+ usageMetadata: {
promptTokenCount: 20,
candidatesTokenCount: 5,
totalTokenCount: 25,
@@ -377,7 +404,7 @@ describe('doGenerate', () => {
expect(rawResponse?.headers).toStrictEqual({
// default headers:
- 'content-length': '804',
+ 'content-length': '979',
'content-type': 'application/json',
// custom header
@@ -414,6 +441,39 @@ describe('doGenerate', () => {
});
});
+ it('should only pass valid provider options', async () => {
+ prepareJsonResponse({});
+
+ await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: [
+ { role: 'system', content: 'test system instruction' },
+ { role: 'user', content: [{ type: 'text', text: 'Hello' }] },
+ ],
+ seed: 123,
+ temperature: 0.5,
+ providerMetadata: {
+ google: { foo: 'bar', responseModalities: ['TEXT', 'IMAGE'] },
+ },
+ });
+
+ expect(await server.calls[0].requestBody).toStrictEqual({
+ contents: [
+ {
+ role: 'user',
+ parts: [{ text: 'Hello' }],
+ },
+ ],
+ systemInstruction: { parts: [{ text: 'test system instruction' }] },
+ generationConfig: {
+ seed: 123,
+ temperature: 0.5,
+ responseModalities: ['TEXT', 'IMAGE'],
+ },
+ });
+ });
+
it('should pass tools and toolChoice', async () => {
prepareJsonResponse({});
@@ -981,6 +1041,96 @@ describe('doGenerate', () => {
});
});
+ it('should expose all token details in providerMetadata when available in doGenerate', async () => {
+ prepareJsonResponse({
+ content: 'test response',
+ usageMetadata: {
+ promptTokenCount: 1,
+ candidatesTokenCount: 1,
+ cachedContentTokenCount: 5,
+ thoughtsTokenCount: 2,
+ promptTokensDetails: TEST_PROMPT_TOKENS_DETAILS,
+ cacheTokensDetails: TEST_CACHE_TOKENS_DETAILS,
+ candidatesTokensDetails: TEST_CANDIDATES_TOKENS_DETAILS,
+ toolUsePromptTokensDetails: TEST_TOOL_USE_PROMPT_TOKENS_DETAILS,
+ },
+ });
+
+ const { providerMetadata } = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(providerMetadata?.google.cachedContentTokenCount).toBe(5);
+ expect(providerMetadata?.google.thoughtsTokenCount).toBe(2);
+ expect(providerMetadata?.google.promptTokensDetails).toEqual(
+ TEST_PROMPT_TOKENS_DETAILS,
+ );
+ expect(providerMetadata?.google.cacheTokensDetails).toEqual(
+ TEST_CACHE_TOKENS_DETAILS,
+ );
+ expect(providerMetadata?.google.candidatesTokensDetails).toEqual(
+ TEST_CANDIDATES_TOKENS_DETAILS,
+ );
+ expect(providerMetadata?.google.toolUsePromptTokensDetails).toEqual(
+ TEST_TOOL_USE_PROMPT_TOKENS_DETAILS,
+ );
+ });
+
+ it('should set token details to null in providerMetadata when absent in usageMetadata in doGenerate', async () => {
+ prepareJsonResponse({
+ content: 'test response',
+ usageMetadata: {
+ promptTokenCount: 1,
+ candidatesTokenCount: 1,
+ },
+ });
+
+ const { providerMetadata } = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(providerMetadata?.google.cachedContentTokenCount).toBeNull();
+ expect(providerMetadata?.google.thoughtsTokenCount).toBeNull();
+ expect(providerMetadata?.google.promptTokensDetails).toBeNull();
+ expect(providerMetadata?.google.cacheTokensDetails).toBeNull();
+ expect(providerMetadata?.google.candidatesTokensDetails).toBeNull();
+ expect(providerMetadata?.google.toolUsePromptTokensDetails).toBeNull();
+ });
+
+ it('should set token details to null in providerMetadata when usageMetadata itself is absent in doGenerate', async () => {
+ server.urls[TEST_URL_GEMINI_PRO].response = {
+ type: 'json-value',
+ body: {
+ candidates: [
+ {
+ content: { parts: [{ text: 'test' }], role: 'model' },
+ finishReason: 'STOP',
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ },
+ };
+
+ const { providerMetadata, usage } = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(usage.promptTokens).toBeNaN();
+ expect(usage.completionTokens).toBeNaN();
+ expect(providerMetadata?.google.cachedContentTokenCount).toBeNull();
+ expect(providerMetadata?.google.thoughtsTokenCount).toBeNull();
+ expect(providerMetadata?.google.promptTokensDetails).toBeNull();
+ expect(providerMetadata?.google.cacheTokensDetails).toBeNull();
+ expect(providerMetadata?.google.candidatesTokensDetails).toBeNull();
+ expect(providerMetadata?.google.toolUsePromptTokensDetails).toBeNull();
+ });
+
describe('search tool selection', () => {
const provider = createGoogleGenerativeAI({
apiKey: 'test-api-key',
@@ -1262,6 +1412,202 @@ describe('doGenerate', () => {
},
]);
});
+ it('should correctly parse and separate reasoning parts from text output', async () => {
+ server.urls[TEST_URL_GEMINI_PRO].response = {
+ type: 'json-value',
+ body: {
+ candidates: [
+ {
+ content: {
+ parts: [
+ { text: 'Visible text part 1. ' },
+ { text: 'This is a thought process.', thought: true },
+ { text: 'Visible text part 2.' },
+ { text: 'Another internal thought.', thought: true },
+ ],
+ role: 'model',
+ },
+ finishReason: 'STOP',
+ index: 0,
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ usageMetadata: {
+ promptTokenCount: 10,
+ candidatesTokenCount: 20,
+ totalTokenCount: 30,
+ },
+ },
+ };
+
+ const { text, reasoning } = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ expect(text).toStrictEqual('Visible text part 1. Visible text part 2.');
+ expect(reasoning).toStrictEqual([
+ { type: 'text', text: 'This is a thought process.' },
+ { type: 'text', text: 'Another internal thought.' },
+ ]);
+ });
+ describe('warnings for includeThoughts option', () => {
+ it('should generate a warning if includeThoughts is true for a non-Vertex provider', async () => {
+ prepareJsonResponse({ content: 'test' }); // Mock API response
+
+ // Manually create a model instance to control the provider string
+ const nonVertexModel = new GoogleGenerativeAILanguageModel(
+ 'gemini-pro',
+ {},
+ {
+ provider: 'google.generative-ai.chat', // Simulate non-Vertex provider
+ baseURL: 'https://generativelanguage.googleapis.com/v1beta',
+ headers: {},
+ generateId: () => 'test-id',
+ isSupportedUrl: () => false, // Dummy implementation
+ },
+ );
+
+ const { warnings } = await nonVertexModel.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ google: {
+ thinkingConfig: {
+ includeThoughts: true,
+ thinkingBudget: 500,
+ },
+ },
+ },
+ });
+
+ expect(warnings).toContainEqual({
+ type: 'other',
+ message:
+ "The 'includeThoughts' option is only supported with the Google Vertex provider " +
+ 'and might not be supported or could behave unexpectedly with the current Google provider ' +
+ '(google.generative-ai.chat).',
+ });
+ });
+
+ it('should NOT generate a warning if includeThoughts is true for a Vertex provider', async () => {
+ prepareJsonResponse({ content: 'test' }); // Mock API response
+
+ const vertexModel = new GoogleGenerativeAILanguageModel(
+ 'gemini-pro',
+ {},
+ {
+ provider: 'google.vertex.chat', // Simulate Vertex provider
+ baseURL: 'https://generativelanguage.googleapis.com/v1beta',
+ headers: {},
+ generateId: () => 'test-id',
+ isSupportedUrl: () => false,
+ },
+ );
+
+ const { warnings } = await vertexModel.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ google: {
+ thinkingConfig: {
+ includeThoughts: true,
+ thinkingBudget: 500,
+ },
+ },
+ },
+ });
+
+ const expectedWarningMessage =
+ "The 'includeThoughts' option is only supported with the Google Vertex provider " +
+ 'and might not be supported or could behave unexpectedly with the current Google provider ';
+
+ expect(
+ warnings?.some(
+ w =>
+ w.type === 'other' && w.message.startsWith(expectedWarningMessage),
+ ),
+ ).toBe(false);
+ });
+
+ it('should NOT generate a warning if includeThoughts is false for a non-Vertex provider', async () => {
+ prepareJsonResponse({ content: 'test' }); // Mock API response
+
+ const nonVertexModel = new GoogleGenerativeAILanguageModel(
+ 'gemini-pro',
+ {},
+ {
+ provider: 'google.generative-ai.chat', // Simulate non-Vertex provider
+ baseURL: 'https://generativelanguage.googleapis.com/v1beta',
+ headers: {},
+ generateId: () => 'test-id',
+ isSupportedUrl: () => false,
+ },
+ );
+
+ const { warnings } = await nonVertexModel.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ google: {
+ thinkingConfig: {
+ includeThoughts: false,
+ thinkingBudget: 500,
+ },
+ },
+ },
+ });
+
+ const expectedWarningMessage =
+ "The 'includeThoughts' option is only supported with the Google Vertex provider " +
+ 'and might not be supported or could behave unexpectedly with the current Google provider ';
+ expect(
+ warnings?.some(
+ w =>
+ w.type === 'other' && w.message.startsWith(expectedWarningMessage),
+ ),
+ ).toBe(false);
+ });
+
+ it('should NOT generate a warning if thinkingConfig is not provided for a non-Vertex provider', async () => {
+ prepareJsonResponse({ content: 'test' }); // Mock API response
+ const nonVertexModel = new GoogleGenerativeAILanguageModel(
+ 'gemini-pro',
+ {},
+ {
+ provider: 'google.generative-ai.chat', // Simulate non-Vertex provider
+ baseURL: 'https://generativelanguage.googleapis.com/v1beta',
+ headers: {},
+ generateId: () => 'test-id',
+ isSupportedUrl: () => false,
+ },
+ );
+
+ const { warnings } = await nonVertexModel.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ google: {
+ // No thinkingConfig
+ },
+ },
+ });
+ const expectedWarningMessage =
+ "The 'includeThoughts' option is only supported with the Google Vertex provider " +
+ 'and might not be supported or could behave unexpectedly with the current Google provider ';
+ expect(
+ warnings?.some(
+ w =>
+ w.type === 'other' && w.message.startsWith(expectedWarningMessage),
+ ),
+ ).toBe(false);
+ });
+ });
});
describe('doStream', () => {
@@ -1293,6 +1639,17 @@ describe('doStream', () => {
headers,
groundingMetadata,
url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:streamGenerateContent',
+ usageMetadata = {
+ promptTokenCount: 294,
+ candidatesTokenCount: 233,
+ totalTokenCount: 527,
+ cachedContentTokenCount: null,
+ thoughtsTokenCount: null,
+ promptTokensDetails: null,
+ cacheTokensDetails: null,
+ candidatesTokensDetails: null,
+ toolUsePromptTokensDetails: null,
+ },
}: {
content: string[];
headers?: Record;
@@ -1303,6 +1660,17 @@ describe('doStream', () => {
| typeof TEST_URL_GEMINI_2_0_FLASH_EXP
| typeof TEST_URL_GEMINI_1_0_PRO
| typeof TEST_URL_GEMINI_1_5_FLASH;
+ usageMetadata?: {
+ promptTokenCount: number | null;
+ candidatesTokenCount: number | null;
+ totalTokenCount?: number | null;
+ cachedContentTokenCount?: number | null;
+ thoughtsTokenCount?: number | null;
+ promptTokensDetails?: TestTokenDetail[] | null;
+ cacheTokensDetails?: TestTokenDetail[] | null;
+ candidatesTokensDetails?: TestTokenDetail[] | null;
+ toolUsePromptTokensDetails?: TestTokenDetail[] | null;
+ };
}) => {
server.urls[url].response = {
headers,
@@ -1319,13 +1687,8 @@ describe('doStream', () => {
...(groundingMetadata && { groundingMetadata }),
},
],
- // Include usage metadata only in the last chunk
...(index === content.length - 1 && {
- usageMetadata: {
- promptTokenCount: 294,
- candidatesTokenCount: 233,
- totalTokenCount: 527,
- },
+ usageMetadata,
}),
})}\n\n`,
),
@@ -1426,6 +1789,12 @@ describe('doStream', () => {
providerMetadata: {
google: {
groundingMetadata: null,
+ cacheTokensDetails: null,
+ candidatesTokensDetails: null,
+ toolUsePromptTokensDetails: null,
+ promptTokensDetails: null,
+ thoughtsTokenCount: null,
+ cachedContentTokenCount: null,
safetyRatings: [
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
@@ -1574,6 +1943,12 @@ describe('doStream', () => {
providerMetadata: {
google: {
groundingMetadata: null,
+ cacheTokensDetails: null,
+ candidatesTokensDetails: null,
+ toolUsePromptTokensDetails: null,
+ promptTokensDetails: null,
+ thoughtsTokenCount: null,
+ cachedContentTokenCount: null,
safetyRatings: [
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
@@ -1869,4 +2244,256 @@ describe('doStream', () => {
'tool-calls',
);
});
+
+ it('should only pass valid provider options', async () => {
+ prepareStreamResponse({ content: [''] });
+
+ await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ google: { foo: 'bar', responseModalities: ['TEXT', 'IMAGE'] },
+ },
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ contents: [
+ {
+ role: 'user',
+ parts: [{ text: 'Hello' }],
+ },
+ ],
+ generationConfig: {
+ responseModalities: ['TEXT', 'IMAGE'],
+ },
+ });
+ });
+
+ it('should correctly stream reasoning parts and text deltas separately', async () => {
+ server.urls[TEST_URL_GEMINI_PRO].response = {
+ type: 'stream-chunks',
+ chunks: [
+ `data: ${JSON.stringify({
+ candidates: [
+ {
+ content: { parts: [{ text: 'Text delta 1. ' }], role: 'model' },
+ index: 0,
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ })}\n\n`,
+ `data: ${JSON.stringify({
+ candidates: [
+ {
+ content: {
+ parts: [{ text: 'Reasoning delta 1.', thought: true }],
+ role: 'model',
+ },
+ index: 0,
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ })}\n\n`,
+ `data: ${JSON.stringify({
+ candidates: [
+ {
+ content: { parts: [{ text: 'Text delta 2.' }], role: 'model' },
+ index: 0,
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ })}\n\n`,
+ `data: ${JSON.stringify({
+ candidates: [
+ {
+ content: {
+ parts: [{ text: 'Reasoning delta 2.', thought: true }],
+ role: 'model',
+ },
+ finishReason: 'STOP', // Mark finish reason in a chunk that has content
+ index: 0,
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ })}\n\n`,
+ `data: ${JSON.stringify({
+ // Final chunk for usage metadata
+ usageMetadata: {
+ promptTokenCount: 15,
+ candidatesTokenCount: 25,
+ totalTokenCount: 40,
+ },
+ })}\n\n`,
+ ],
+ };
+ const { stream } = await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ const events = await convertReadableStreamToArray(stream);
+
+ const relevantEvents = events.filter(
+ event => event.type === 'text-delta' || event.type === 'reasoning',
+ );
+
+ expect(relevantEvents).toStrictEqual([
+ { type: 'text-delta', textDelta: 'Text delta 1. ' },
+ { type: 'reasoning', textDelta: 'Reasoning delta 1.' },
+ { type: 'text-delta', textDelta: 'Text delta 2.' },
+ { type: 'reasoning', textDelta: 'Reasoning delta 2.' },
+ ]);
+
+ const finishEvent = events.find(event => event.type === 'finish');
+ expect(finishEvent).toBeDefined();
+ expect(finishEvent?.type === 'finish' && finishEvent.finishReason).toEqual(
+ 'stop',
+ );
+ expect(finishEvent?.type === 'finish' && finishEvent.usage).toStrictEqual({
+ promptTokens: 15,
+ completionTokens: 25,
+ });
+ });
+
+ it('should expose all token details in providerMetadata on finish when available in doStream', async () => {
+ prepareStreamResponse({
+ content: ['test stream chunk'],
+ usageMetadata: {
+ promptTokenCount: 10,
+ candidatesTokenCount: 20,
+ cachedContentTokenCount: 5,
+ thoughtsTokenCount: 3,
+ promptTokensDetails: TEST_PROMPT_TOKENS_DETAILS,
+ cacheTokensDetails: TEST_CACHE_TOKENS_DETAILS,
+ candidatesTokensDetails: TEST_CANDIDATES_TOKENS_DETAILS,
+ toolUsePromptTokensDetails: TEST_TOOL_USE_PROMPT_TOKENS_DETAILS,
+ },
+ });
+
+ const { stream } = await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ const events = await convertReadableStreamToArray(stream);
+ const finishEvent = events.find(event => event.type === 'finish');
+
+ expect(finishEvent).toBeDefined();
+ if (finishEvent && finishEvent.type === 'finish') {
+ expect(finishEvent.providerMetadata?.google.cachedContentTokenCount).toBe(
+ 5,
+ );
+ expect(finishEvent.providerMetadata?.google.thoughtsTokenCount).toBe(3);
+ expect(finishEvent.providerMetadata?.google.promptTokensDetails).toEqual(
+ TEST_PROMPT_TOKENS_DETAILS,
+ );
+ expect(finishEvent.providerMetadata?.google.cacheTokensDetails).toEqual(
+ TEST_CACHE_TOKENS_DETAILS,
+ );
+ expect(
+ finishEvent.providerMetadata?.google.candidatesTokensDetails,
+ ).toEqual(TEST_CANDIDATES_TOKENS_DETAILS);
+ expect(
+ finishEvent.providerMetadata?.google.toolUsePromptTokensDetails,
+ ).toEqual(TEST_TOOL_USE_PROMPT_TOKENS_DETAILS);
+ }
+ });
+
+ it('should set token details to null in providerMetadata on finish when absent in usageMetadata in doStream', async () => {
+ prepareStreamResponse({
+ content: ['test stream chunk'],
+ usageMetadata: {
+ promptTokenCount: 10,
+ candidatesTokenCount: 20,
+ },
+ });
+
+ const { stream } = await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ const events = await convertReadableStreamToArray(stream);
+ const finishEvent = events.find(event => event.type === 'finish');
+
+ expect(finishEvent).toBeDefined();
+ if (finishEvent && finishEvent.type === 'finish') {
+ expect(
+ finishEvent.providerMetadata?.google.cachedContentTokenCount,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.thoughtsTokenCount,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.promptTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.cacheTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.candidatesTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.toolUsePromptTokensDetails,
+ ).toBeNull();
+ }
+ });
+
+ it('should set token details to null in providerMetadata on finish when usageMetadata itself is absent in last chunk of doStream', async () => {
+ server.urls[
+ 'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:streamGenerateContent'
+ ].response = {
+ type: 'stream-chunks',
+ chunks: [
+ `data: ${JSON.stringify({
+ candidates: [
+ {
+ content: { parts: [{ text: 'final chunk' }], role: 'model' },
+ finishReason: 'STOP',
+ safetyRatings: SAFETY_RATINGS,
+ },
+ ],
+ })}
+
+`,
+ ],
+ };
+
+ const { stream } = await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ const events = await convertReadableStreamToArray(stream);
+ const finishEvent = events.find(event => event.type === 'finish');
+
+ expect(finishEvent).toBeDefined();
+ if (finishEvent && finishEvent.type === 'finish') {
+ expect(finishEvent.usage.promptTokens).toBeNaN();
+ expect(finishEvent.usage.completionTokens).toBeNaN();
+ expect(
+ finishEvent.providerMetadata?.google.cachedContentTokenCount,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.thoughtsTokenCount,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.promptTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.cacheTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.candidatesTokensDetails,
+ ).toBeNull();
+ expect(
+ finishEvent.providerMetadata?.google.toolUsePromptTokensDetails,
+ ).toBeNull();
+ }
+ });
});
diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts
index 8f2a9bf1c356..daaf95d174c8 100644
--- a/packages/google/src/google-generative-ai-language-model.ts
+++ b/packages/google/src/google-generative-ai-language-model.ts
@@ -88,11 +88,23 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
const googleOptions = parseProviderOptions({
provider: 'google',
providerOptions: providerMetadata,
- schema: z.object({
- responseModalities: z.array(z.enum(['TEXT', 'IMAGE'])).nullish(),
- }),
+ schema: googleGenerativeAIProviderOptionsSchema,
});
+ // Add warning if includeThoughts is used with a non-Vertex Google provider
+ if (
+ googleOptions?.thinkingConfig?.includeThoughts === true &&
+ !this.config.provider.startsWith('google.vertex.')
+ ) {
+ warnings.push({
+ type: 'other',
+ message:
+ "The 'includeThoughts' option is only supported with the Google Vertex provider " +
+ 'and might not be supported or could behave unexpectedly with the current Google provider ' +
+ `(${this.config.provider}).`,
+ });
+ }
+
const generationConfig = {
// standardized settings:
maxOutputTokens: maxTokens,
@@ -117,10 +129,12 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
: undefined,
...(this.settings.audioTimestamp && {
audioTimestamp: this.settings.audioTimestamp,
+ mediaResolution: this.settings.mediaResolution,
}),
// provider options:
responseModalities: googleOptions?.responseModalities,
+ thinkingConfig: googleOptions?.thinkingConfig,
};
const { contents, systemInstruction } =
@@ -128,7 +142,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
switch (type) {
case 'regular': {
- const { tools, toolConfig, toolWarnings } = prepareTools(
+ const preparedTools = prepareTools(
mode,
this.settings.useSearchGrounding ?? false,
this.settings.dynamicRetrievalConfig,
@@ -137,15 +151,21 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
return {
args: {
+ // Conditionally include tools, toolConfig, and systemInstruction
+ // only if cachedContent is not being used.
+ ...(!this.settings.cachedContent
+ ? {
+ tools: preparedTools.tools,
+ toolConfig: preparedTools.toolConfig,
+ systemInstruction,
+ }
+ : {}),
generationConfig,
contents,
- systemInstruction,
safetySettings: this.settings.safetySettings,
- tools,
- toolConfig,
cachedContent: this.settings.cachedContent,
},
- warnings: [...warnings, ...toolWarnings],
+ warnings: [...warnings, ...preparedTools.toolWarnings],
};
}
@@ -164,7 +184,11 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
: undefined,
},
contents,
- systemInstruction,
+ ...(!this.settings.cachedContent
+ ? {
+ systemInstruction,
+ }
+ : {}),
safetySettings: this.settings.safetySettings,
cachedContent: this.settings.cachedContent,
},
@@ -245,7 +269,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
: candidate.content.parts;
const toolCalls = getToolCallsFromParts({
- parts,
+ parts: parts, // Use candidateParts
generateId: this.config.generateId,
});
@@ -253,6 +277,7 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
return {
text: getTextFromParts(parts),
+ reasoning: getReasoningDetailsFromParts(parts),
files: getInlineDataParts(parts)?.map(part => ({
data: part.inlineData.data,
mimeType: part.inlineData.mimeType,
@@ -273,6 +298,15 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
google: {
groundingMetadata: candidate.groundingMetadata ?? null,
safetyRatings: candidate.safetyRatings ?? null,
+ cachedContentTokenCount:
+ usageMetadata?.cachedContentTokenCount ?? null,
+ thoughtsTokenCount: usageMetadata?.thoughtsTokenCount ?? null,
+ promptTokensDetails: usageMetadata?.promptTokensDetails ?? null,
+ cacheTokensDetails: usageMetadata?.cacheTokensDetails ?? null,
+ candidatesTokensDetails:
+ usageMetadata?.candidatesTokensDetails ?? null,
+ toolUsePromptTokensDetails:
+ usageMetadata?.toolUsePromptTokensDetails ?? null,
},
},
sources: extractSources({
@@ -361,6 +395,18 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
});
}
+ const reasoningDeltaText = getReasoningDetailsFromParts(
+ content.parts,
+ );
+ if (reasoningDeltaText != null) {
+ for (const part of reasoningDeltaText) {
+ controller.enqueue({
+ type: 'reasoning',
+ textDelta: part.text,
+ });
+ }
+ }
+
const inlineDataParts = getInlineDataParts(content.parts);
if (inlineDataParts != null) {
for (const part of inlineDataParts) {
@@ -420,6 +466,20 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV1 {
google: {
groundingMetadata: candidate.groundingMetadata ?? null,
safetyRatings: candidate.safetyRatings ?? null,
+ cachedContentTokenCount:
+ usageMetadata?.cachedContentTokenCount ?? null,
+ thoughtsTokenCount:
+ usageMetadata?.thoughtsTokenCount ??
+ (args.generationConfig.thinkingConfig?.thinkingBudget === 0
+ ? 0
+ : null),
+ promptTokensDetails:
+ usageMetadata?.promptTokensDetails ?? null,
+ cacheTokensDetails: usageMetadata?.cacheTokensDetails ?? null,
+ candidatesTokensDetails:
+ usageMetadata?.candidatesTokensDetails ?? null,
+ toolUsePromptTokensDetails:
+ usageMetadata?.toolUsePromptTokensDetails ?? null,
},
};
}
@@ -469,15 +529,29 @@ function getToolCallsFromParts({
}
function getTextFromParts(parts: z.infer['parts']) {
- const textParts = parts?.filter(part => 'text' in part) as Array<
- GoogleGenerativeAIContentPart & { text: string }
- >;
+ const textParts = parts?.filter(
+ part => 'text' in part && (part as any).thought !== true, // Exclude thought parts
+ ) as Array;
return textParts == null || textParts.length === 0
? undefined
: textParts.map(part => part.text).join('');
}
+function getReasoningDetailsFromParts(
+ parts: z.infer['parts'],
+): Array<{ type: 'text'; text: string }> | undefined {
+ const reasoningParts = parts?.filter(
+ part => 'text' in part && (part as any).thought === true,
+ ) as Array<
+ GoogleGenerativeAIContentPart & { text: string; thought?: boolean }
+ >;
+
+ return reasoningParts == null || reasoningParts.length === 0
+ ? undefined
+ : reasoningParts.map(part => ({ type: 'text', text: part.text }));
+}
+
function getInlineDataParts(parts: z.infer['parts']) {
return parts?.filter(
(
@@ -518,6 +592,7 @@ const contentSchema = z.object({
z.union([
z.object({
text: z.string(),
+ thought: z.boolean().nullish(),
}),
z.object({
functionCall: z.object({
@@ -576,14 +651,28 @@ export const groundingMetadataSchema = z.object({
// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters
export const safetyRatingSchema = z.object({
- category: z.string(),
- probability: z.string(),
+ category: z.string().nullish(),
+ probability: z.string().nullish(),
probabilityScore: z.number().nullish(),
severity: z.string().nullish(),
severityScore: z.number().nullish(),
blocked: z.boolean().nullish(),
});
+const modalityEnum = z.enum([
+ 'TEXT',
+ 'IMAGE',
+ 'AUDIO',
+ 'VIDEO',
+ 'DOCUMENT',
+ 'MODALITY_UNSPECIFIED',
+]);
+
+export const tokensDetailsSchema = z.object({
+ modality: modalityEnum,
+ tokenCount: z.number(),
+});
+
const responseSchema = z.object({
candidates: z.array(
z.object({
@@ -598,6 +687,12 @@ const responseSchema = z.object({
promptTokenCount: z.number().nullish(),
candidatesTokenCount: z.number().nullish(),
totalTokenCount: z.number().nullish(),
+ cachedContentTokenCount: z.number().nullish(),
+ thoughtsTokenCount: z.number().nullish(),
+ promptTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ cacheTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ candidatesTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ toolUsePromptTokensDetails: z.array(tokensDetailsSchema).nullish(),
})
.nullish(),
});
@@ -620,6 +715,25 @@ const chunkSchema = z.object({
promptTokenCount: z.number().nullish(),
candidatesTokenCount: z.number().nullish(),
totalTokenCount: z.number().nullish(),
+ cachedContentTokenCount: z.number().nullish(),
+ thoughtsTokenCount: z.number().nullish(),
+ promptTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ cacheTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ candidatesTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ toolUsePromptTokensDetails: z.array(tokensDetailsSchema).nullish(),
+ })
+ .nullish(),
+});
+
+const googleGenerativeAIProviderOptionsSchema = z.object({
+ responseModalities: z.array(z.enum(['TEXT', 'IMAGE'])).nullish(),
+ thinkingConfig: z
+ .object({
+ thinkingBudget: z.number().nullish(),
+ includeThoughts: z.boolean().nullish(),
})
.nullish(),
});
+export type GoogleGenerativeAIProviderOptions = z.infer<
+ typeof googleGenerativeAIProviderOptionsSchema
+>;
diff --git a/packages/google/src/google-generative-ai-prompt.ts b/packages/google/src/google-generative-ai-prompt.ts
index bd7c3e2cb4c8..2b8e4e4b2601 100644
--- a/packages/google/src/google-generative-ai-prompt.ts
+++ b/packages/google/src/google-generative-ai-prompt.ts
@@ -1,6 +1,7 @@
import {
groundingMetadataSchema,
safetyRatingSchema,
+ tokensDetailsSchema,
} from './google-generative-ai-language-model';
import { z } from 'zod';
@@ -31,7 +32,17 @@ export type GoogleGenerativeAIGroundingMetadata = z.infer<
export type GoogleGenerativeAISafetyRating = z.infer;
+export type GoogleGenerativeAITokensDetails = z.infer<
+ typeof tokensDetailsSchema
+>;
+
export interface GoogleGenerativeAIProviderMetadata {
groundingMetadata: GoogleGenerativeAIGroundingMetadata | null;
safetyRatings: GoogleGenerativeAISafetyRating[] | null;
+ cachedContentTokenCount: number | null;
+ thoughtsTokenCount: number | null;
+ promptTokensDetails: GoogleGenerativeAITokensDetails[] | null;
+ cacheTokensDetails: GoogleGenerativeAITokensDetails[] | null;
+ candidatesTokensDetails: GoogleGenerativeAITokensDetails[] | null;
+ toolUsePromptTokensDetails: GoogleGenerativeAITokensDetails[] | null;
}
diff --git a/packages/google/src/google-generative-ai-settings.ts b/packages/google/src/google-generative-ai-settings.ts
index 02f57d68e797..759416039ced 100644
--- a/packages/google/src/google-generative-ai-settings.ts
+++ b/packages/google/src/google-generative-ai-settings.ts
@@ -1,7 +1,6 @@
export type GoogleGenerativeAIModelId =
// Stable models
// https://ai.google.dev/gemini-api/docs/models/gemini
- | 'gemini-2.0-flash-001'
| 'gemini-1.5-flash'
| 'gemini-1.5-flash-latest'
| 'gemini-1.5-flash-001'
@@ -13,13 +12,18 @@ export type GoogleGenerativeAIModelId =
| 'gemini-1.5-pro-latest'
| 'gemini-1.5-pro-001'
| 'gemini-1.5-pro-002'
- // Experimental models
- // https://ai.google.dev/gemini-api/docs/models/experimental-models
- | 'gemini-2.5-pro-exp-03-25'
- | 'gemini-2.0-flash-lite-preview-02-05'
+ | 'gemini-2.0-flash'
+ | 'gemini-2.0-flash-001'
+ | 'gemini-2.0-flash-live-001'
+ | 'gemini-2.0-flash-lite'
| 'gemini-2.0-pro-exp-02-05'
| 'gemini-2.0-flash-thinking-exp-01-21'
| 'gemini-2.0-flash-exp'
+ // Experimental models
+ // https://ai.google.dev/gemini-api/docs/models/experimental-models
+ | 'gemini-2.5-pro-exp-03-25'
+ | 'gemini-2.5-pro-preview-05-06'
+ | 'gemini-2.5-flash-preview-04-17'
| 'gemini-exp-1206'
| 'gemma-3-27b-it'
| 'learnlm-1.5-pro-experimental'
@@ -55,6 +59,17 @@ Format: cachedContents/{cachedContent}
*/
structuredOutputs?: boolean;
+ /**
+ * Optional. Media resolution used for vision capabilities.
+ *
+ * @see https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/GenerationConfig#FIELDS.media_resolution
+ */
+ mediaResolution?:
+ | 'MEDIA_RESOLUTION_UNSPECIFIED'
+ | 'MEDIA_RESOLUTION_LOW'
+ | 'MEDIA_RESOLUTION_MEDIUM'
+ | 'MEDIA_RESOLUTION_HIGH';
+
/**
Optional. A list of unique safety settings for blocking unsafe content.
*/
@@ -72,7 +87,8 @@ Optional. A list of unique safety settings for blocking unsafe content.
| 'BLOCK_LOW_AND_ABOVE'
| 'BLOCK_MEDIUM_AND_ABOVE'
| 'BLOCK_ONLY_HIGH'
- | 'BLOCK_NONE';
+ | 'BLOCK_NONE'
+ | 'OFF';
}>;
/**
* Optional. Enables timestamp understanding for audio-only files.
diff --git a/packages/google/src/index.ts b/packages/google/src/index.ts
index bf9c08c46346..8220c6c80ba0 100644
--- a/packages/google/src/index.ts
+++ b/packages/google/src/index.ts
@@ -1,6 +1,7 @@
-export { createGoogleGenerativeAI, google } from './google-provider';
export type { GoogleErrorData } from './google-error';
+export type { GoogleGenerativeAIProviderOptions } from './google-generative-ai-language-model';
export type { GoogleGenerativeAIProviderMetadata } from './google-generative-ai-prompt';
+export { createGoogleGenerativeAI, google } from './google-provider';
export type {
GoogleGenerativeAIProvider,
GoogleGenerativeAIProviderSettings,
diff --git a/packages/google/turbo.json b/packages/google/turbo.json
index 620b8380e744..3e50dc890ace 100644
--- a/packages/google/turbo.json
+++ b/packages/google/turbo.json
@@ -1,12 +1,8 @@
{
- "extends": [
- "//"
- ],
+ "extends": ["//"],
"tasks": {
"build": {
- "outputs": [
- "**/dist/**"
- ]
+ "outputs": ["**/dist/**"]
}
}
}
diff --git a/packages/groq/CHANGELOG.md b/packages/groq/CHANGELOG.md
index 104016e417dc..ef6036c09c42 100644
--- a/packages/groq/CHANGELOG.md
+++ b/packages/groq/CHANGELOG.md
@@ -1,5 +1,50 @@
# @ai-sdk/groq
+## 1.2.9
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.2.8
+
+### Patch Changes
+
+- 74688db: feat(providers/groq): add transcribe
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.2.7
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.2.6
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.2.5
+
+### Patch Changes
+
+- 8fdc8fe: feat (provider/groq): add llama 4 model
+
+## 1.2.4
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 1.2.3
### Patch Changes
diff --git a/packages/groq/README.md b/packages/groq/README.md
index dc41b3d613ff..973b6cc5cbf7 100644
--- a/packages/groq/README.md
+++ b/packages/groq/README.md
@@ -1,6 +1,6 @@
# AI SDK - Groq Provider
-The **[Groq provider](https://sdk.vercel.ai/providers/ai-sdk-providers/groq)** for the [AI SDK](https://sdk.vercel.ai/docs)
+The **[Groq provider](https://ai-sdk.dev/providers/ai-sdk-providers/groq)** for the [AI SDK](https://ai-sdk.dev/docs)
contains language model support for the Groq chat and completion APIs and embedding model support for the Groq embeddings API.
## Setup
@@ -33,4 +33,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Groq provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/groq)** for more information.
+Please check out the **[Groq provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/groq)** for more information.
diff --git a/packages/groq/package.json b/packages/groq/package.json
index c7875a77e49d..2be2451cd627 100644
--- a/packages/groq/package.json
+++ b/packages/groq/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/groq",
- "version": "1.2.3",
+ "version": "1.2.9",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -31,8 +31,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/groq/src/groq-api-types.ts b/packages/groq/src/groq-api-types.ts
index f76a055c97b2..febbd8331485 100644
--- a/packages/groq/src/groq-api-types.ts
+++ b/packages/groq/src/groq-api-types.ts
@@ -48,3 +48,52 @@ export interface GroqToolMessage {
content: string;
tool_call_id: string;
}
+
+export interface GroqTranscriptionAPITypes {
+ /**
+ * The audio file object for direct upload to translate/transcribe.
+ * Required unless using url instead.
+ */
+ file?: string;
+
+ /**
+ * The audio URL to translate/transcribe (supports Base64URL).
+ * Required unless using file instead.
+ */
+ url?: string;
+
+ /**
+ * The language of the input audio. Supplying the input language in ISO-639-1 (i.e. en, tr`) format will improve accuracy and latency.
+ * The translations endpoint only supports 'en' as a parameter option.
+ */
+ language?: string;
+
+ /**
+ * ID of the model to use.
+ */
+ model: string;
+
+ /**
+ * Prompt to guide the model's style or specify how to spell unfamiliar words. (limited to 224 tokens)
+ */
+ prompt?: string;
+
+ /**
+ * Define the output response format.
+ * Set to verbose_json to receive timestamps for audio segments.
+ * Set to text to return a text response.
+ */
+ response_format?: string;
+
+ /**
+ * The temperature between 0 and 1. For translations and transcriptions, we recommend the default value of 0.
+ */
+ temperature?: number;
+
+ /**
+ * The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities.
+ * Either or both of word and segment are supported.
+ * segment returns full metadata and word returns only word, start, and end timestamps. To get both word-level timestamps and full segment metadata, include both values in the array.
+ */
+ timestamp_granularities?: Array;
+}
diff --git a/packages/groq/src/groq-chat-settings.ts b/packages/groq/src/groq-chat-settings.ts
index ed70c1ec7527..2d8104d52cdc 100644
--- a/packages/groq/src/groq-chat-settings.ts
+++ b/packages/groq/src/groq-chat-settings.ts
@@ -9,6 +9,7 @@ export type GroqChatModelId =
| 'llama3-8b-8192'
| 'mixtral-8x7b-32768'
// preview models (selection)
+ | 'meta-llama/llama-4-scout-17b-16e-instruct'
| 'qwen-qwq-32b'
| 'mistral-saba-24b'
| 'qwen-2.5-32b'
diff --git a/packages/groq/src/groq-config.ts b/packages/groq/src/groq-config.ts
new file mode 100644
index 000000000000..403fa7a5bcd5
--- /dev/null
+++ b/packages/groq/src/groq-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type GroqConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/groq/src/groq-provider.ts b/packages/groq/src/groq-provider.ts
index 048288910414..b112e2132768 100644
--- a/packages/groq/src/groq-provider.ts
+++ b/packages/groq/src/groq-provider.ts
@@ -2,6 +2,7 @@ import {
LanguageModelV1,
NoSuchModelError,
ProviderV1,
+ TranscriptionModelV1,
} from '@ai-sdk/provider';
import {
FetchFunction,
@@ -10,6 +11,8 @@ import {
} from '@ai-sdk/provider-utils';
import { GroqChatLanguageModel } from './groq-chat-language-model';
import { GroqChatModelId, GroqChatSettings } from './groq-chat-settings';
+import { GroqTranscriptionModelId } from './groq-transcription-settings';
+import { GroqTranscriptionModel } from './groq-transcription-model';
export interface GroqProvider extends ProviderV1 {
/**
@@ -24,6 +27,11 @@ Creates an Groq chat model for text generation.
modelId: GroqChatModelId,
settings?: GroqChatSettings,
): LanguageModelV1;
+
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: GroqTranscriptionModelId): TranscriptionModelV1;
}
export interface GroqProviderSettings {
@@ -89,6 +97,15 @@ export function createGroq(options: GroqProviderSettings = {}): GroqProvider {
return createChatModel(modelId, settings);
};
+ const createTranscriptionModel = (modelId: GroqTranscriptionModelId) => {
+ return new GroqTranscriptionModel(modelId, {
+ provider: 'groq.transcription',
+ url: ({ path }) => `${baseURL}${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+ };
+
const provider = function (
modelId: GroqChatModelId,
settings?: GroqChatSettings,
@@ -101,6 +118,7 @@ export function createGroq(options: GroqProviderSettings = {}): GroqProvider {
provider.textEmbeddingModel = (modelId: string) => {
throw new NoSuchModelError({ modelId, modelType: 'textEmbeddingModel' });
};
+ provider.transcription = createTranscriptionModel;
return provider;
}
diff --git a/packages/groq/src/groq-transcription-model.test.ts b/packages/groq/src/groq-transcription-model.test.ts
new file mode 100644
index 000000000000..006b28d75e57
--- /dev/null
+++ b/packages/groq/src/groq-transcription-model.test.ts
@@ -0,0 +1,157 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { GroqTranscriptionModel } from './groq-transcription-model';
+import { createGroq } from './groq-provider';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
+
+const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
+const provider = createGroq({ apiKey: 'test-api-key' });
+const model = provider.transcription('whisper-large-v3-turbo');
+
+const server = createTestServer({
+ 'https://api.groq.com/openai/v1/audio/transcriptions': {},
+});
+
+describe('doGenerate', () => {
+ function prepareJsonResponse({
+ headers,
+ }: {
+ headers?: Record;
+ } = {}) {
+ server.urls[
+ 'https://api.groq.com/openai/v1/audio/transcriptions'
+ ].response = {
+ type: 'json-value',
+ headers,
+ body: {
+ task: 'transcribe',
+ language: 'English',
+ duration: 2.5,
+ text: 'Hello world!',
+ segments: [
+ {
+ id: 0,
+ seek: 0,
+ start: 0,
+ end: 2.48,
+ text: 'Hello world!',
+ tokens: [50365, 2425, 490, 264],
+ temperature: 0,
+ avg_logprob: -0.29010406,
+ compression_ratio: 0.7777778,
+ no_speech_prob: 0.032802984,
+ },
+ ],
+ x_groq: { id: 'req_01jrh9nn61f24rydqq1r4b3yg5' },
+ },
+ };
+ }
+
+ it('should pass the model', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(await server.calls[0].requestBodyMultipart).toMatchObject({
+ model: 'whisper-large-v3-turbo',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareJsonResponse();
+
+ const provider = createGroq({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.transcription('whisper-large-v3-turbo').doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ authorization: 'Bearer test-api-key',
+ 'content-type': expect.stringMatching(
+ /^multipart\/form-data; boundary=----formdata-undici-\d+$/,
+ ),
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should extract the transcription text', async () => {
+ prepareJsonResponse();
+
+ const result = await model.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.text).toBe('Hello world!');
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareJsonResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new GroqTranscriptionModel('whisper-large-v3-turbo', {
+ provider: 'test-provider',
+ url: () => 'https://api.groq.com/openai/v1/audio/transcriptions',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'whisper-large-v3-turbo',
+ headers: {
+ 'content-type': 'application/json',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareJsonResponse();
+
+ const testDate = new Date(0);
+ const customModel = new GroqTranscriptionModel('whisper-large-v3-turbo', {
+ provider: 'test-provider',
+ url: () => 'https://api.groq.com/openai/v1/audio/transcriptions',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ audio: audioData,
+ mediaType: 'audio/wav',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('whisper-large-v3-turbo');
+ });
+});
diff --git a/packages/groq/src/groq-transcription-model.ts b/packages/groq/src/groq-transcription-model.ts
new file mode 100644
index 000000000000..9167b66313bb
--- /dev/null
+++ b/packages/groq/src/groq-transcription-model.ts
@@ -0,0 +1,172 @@
+import {
+ TranscriptionModelV1,
+ TranscriptionModelV1CallWarning,
+} from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ convertBase64ToUint8Array,
+ createJsonResponseHandler,
+ parseProviderOptions,
+ postFormDataToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { GroqConfig } from './groq-config';
+import { groqFailedResponseHandler } from './groq-error';
+import { GroqTranscriptionModelId } from './groq-transcription-settings';
+import { GroqTranscriptionAPITypes } from './groq-api-types';
+
+// https://console.groq.com/docs/speech-to-text
+const groqProviderOptionsSchema = z.object({
+ language: z.string().nullish(),
+ prompt: z.string().nullish(),
+ responseFormat: z.string().nullish(),
+ temperature: z.number().min(0).max(1).nullish(),
+ timestampGranularities: z.array(z.string()).nullish(),
+});
+
+export type GroqTranscriptionCallOptions = z.infer<
+ typeof groqProviderOptionsSchema
+>;
+
+interface GroqTranscriptionModelConfig extends GroqConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class GroqTranscriptionModel implements TranscriptionModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: GroqTranscriptionModelId,
+ private readonly config: GroqTranscriptionModelConfig,
+ ) {}
+
+ private getArgs({
+ audio,
+ mediaType,
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: TranscriptionModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const groqOptions = parseProviderOptions({
+ provider: 'groq',
+ providerOptions,
+ schema: groqProviderOptionsSchema,
+ });
+
+ // Create form data with base fields
+ const formData = new FormData();
+ const blob =
+ audio instanceof Uint8Array
+ ? new Blob([audio])
+ : new Blob([convertBase64ToUint8Array(audio)]);
+
+ formData.append('model', this.modelId);
+ formData.append('file', new File([blob], 'audio', { type: mediaType }));
+
+ // Add provider-specific options
+ if (groqOptions) {
+ const transcriptionModelOptions: Omit<
+ GroqTranscriptionAPITypes,
+ 'model'
+ > = {
+ language: groqOptions.language ?? undefined,
+ prompt: groqOptions.prompt ?? undefined,
+ response_format: groqOptions.responseFormat ?? undefined,
+ temperature: groqOptions.temperature ?? undefined,
+ timestamp_granularities:
+ groqOptions.timestampGranularities ?? undefined,
+ };
+
+ for (const key in transcriptionModelOptions) {
+ const value =
+ transcriptionModelOptions[
+ key as keyof Omit
+ ];
+ if (value !== undefined) {
+ formData.append(key, String(value));
+ }
+ }
+ }
+
+ return {
+ formData,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { formData, warnings } = this.getArgs(options);
+
+ const {
+ value: response,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postFormDataToApi({
+ url: this.config.url({
+ path: '/audio/transcriptions',
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ formData,
+ failedResponseHandler: groqFailedResponseHandler,
+ successfulResponseHandler: createJsonResponseHandler(
+ groqTranscriptionResponseSchema,
+ ),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ text: response.text,
+ segments:
+ response.segments?.map(segment => ({
+ text: segment.text,
+ startSecond: segment.start,
+ endSecond: segment.end,
+ })) ?? [],
+ language: response.language,
+ durationInSeconds: response.duration,
+ warnings,
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
+
+const groqTranscriptionResponseSchema = z.object({
+ task: z.string(),
+ language: z.string(),
+ duration: z.number(),
+ text: z.string(),
+ segments: z.array(
+ z.object({
+ id: z.number(),
+ seek: z.number(),
+ start: z.number(),
+ end: z.number(),
+ text: z.string(),
+ tokens: z.array(z.number()),
+ temperature: z.number(),
+ avg_logprob: z.number(),
+ compression_ratio: z.number(),
+ no_speech_prob: z.number(),
+ }),
+ ),
+ x_groq: z.object({
+ id: z.string(),
+ }),
+});
diff --git a/packages/groq/src/groq-transcription-settings.ts b/packages/groq/src/groq-transcription-settings.ts
new file mode 100644
index 000000000000..14d34c1c4d08
--- /dev/null
+++ b/packages/groq/src/groq-transcription-settings.ts
@@ -0,0 +1,5 @@
+export type GroqTranscriptionModelId =
+ | 'whisper-large-v3-turbo'
+ | 'distil-whisper-large-v3-en'
+ | 'whisper-large-v3'
+ | (string & {});
diff --git a/packages/groq/src/transcript-test.mp3 b/packages/groq/src/transcript-test.mp3
new file mode 100644
index 000000000000..6a4cf7b67483
Binary files /dev/null and b/packages/groq/src/transcript-test.mp3 differ
diff --git a/packages/hume/CHANGELOG.md b/packages/hume/CHANGELOG.md
new file mode 100644
index 000000000000..dff0d1d68188
--- /dev/null
+++ b/packages/hume/CHANGELOG.md
@@ -0,0 +1,14 @@
+# @ai-sdk/hume
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.0.1
+
+### Patch Changes
+
+- 69e8344: feat(providers/hume): add speech
diff --git a/packages/hume/README.md b/packages/hume/README.md
new file mode 100644
index 000000000000..9f663114806a
--- /dev/null
+++ b/packages/hume/README.md
@@ -0,0 +1,36 @@
+# AI SDK - Hume Provider
+
+The **[Hume provider](https://ai-sdk.dev/providers/ai-sdk-providers/hume)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains support for the Hume API.
+
+## Setup
+
+The Hume provider is available in the `@ai-sdk/hume` module. You can install it with
+
+```bash
+npm i @ai-sdk/hume
+```
+
+## Provider Instance
+
+You can import the default provider instance `lmnt` from `@ai-sdk/lmnt`:
+
+```ts
+import { hume } from '@ai-sdk/hume';
+```
+
+## Example
+
+```ts
+import { hume } from '@ai-sdk/hume';
+import { experimental_generateSpeech as generateSpeech } from 'ai';
+
+const result = await generateSpeech({
+ model: hume.speech('aurora'),
+ text: 'Hello, world!',
+});
+```
+
+## Documentation
+
+Please check out the **[Hume provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/hume)** for more information.
diff --git a/packages/hume/package.json b/packages/hume/package.json
new file mode 100644
index 000000000000..a93d4e6434e1
--- /dev/null
+++ b/packages/hume/package.json
@@ -0,0 +1,64 @@
+{
+ "name": "@ai-sdk/hume",
+ "version": "0.0.2",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/hume/src/hume-api-types.ts b/packages/hume/src/hume-api-types.ts
new file mode 100644
index 000000000000..eae98acbb632
--- /dev/null
+++ b/packages/hume/src/hume-api-types.ts
@@ -0,0 +1,29 @@
+type HumeSpeechAPIUtterances = Array<{
+ text: string;
+ description?: string;
+ speed?: number;
+ trailing_silence?: number;
+ voice?:
+ | {
+ id: string;
+ provider?: 'HUME_AI' | 'CUSTOM_VOICE';
+ }
+ | {
+ name: string;
+ provider?: 'HUME_AI' | 'CUSTOM_VOICE';
+ };
+}>;
+
+export type HumeSpeechAPITypes = {
+ utterances: HumeSpeechAPIUtterances;
+ context?:
+ | {
+ generation_id: string;
+ }
+ | {
+ utterances: HumeSpeechAPIUtterances;
+ };
+ format: {
+ type: 'mp3' | 'pcm' | 'wav';
+ };
+};
diff --git a/packages/hume/src/hume-config.ts b/packages/hume/src/hume-config.ts
new file mode 100644
index 000000000000..7e0d4b7b91e2
--- /dev/null
+++ b/packages/hume/src/hume-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type HumeConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/hume/src/hume-error.test.ts b/packages/hume/src/hume-error.test.ts
new file mode 100644
index 000000000000..3cd03ccce308
--- /dev/null
+++ b/packages/hume/src/hume-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { humeErrorDataSchema } from './hume-error';
+
+describe('humeErrorDataSchema', () => {
+ it('should parse Hume resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: humeErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/hume/src/hume-error.ts b/packages/hume/src/hume-error.ts
new file mode 100644
index 000000000000..63aba8ecdbbf
--- /dev/null
+++ b/packages/hume/src/hume-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const humeErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type HumeErrorData = z.infer;
+
+export const humeFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: humeErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/hume/src/hume-provider.ts b/packages/hume/src/hume-provider.ts
new file mode 100644
index 000000000000..0be2f92d7a26
--- /dev/null
+++ b/packages/hume/src/hume-provider.ts
@@ -0,0 +1,70 @@
+import { SpeechModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { HumeSpeechModel } from './hume-speech-model';
+
+export interface HumeProvider extends Pick {
+ (settings?: {}): {
+ speech: HumeSpeechModel;
+ };
+
+ /**
+Creates a model for speech synthesis.
+ */
+ speech(): SpeechModelV1;
+}
+
+export interface HumeProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create an Hume provider instance.
+ */
+export function createHume(options: HumeProviderSettings = {}): HumeProvider {
+ const getHeaders = () => ({
+ 'X-Hume-Api-Key': loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'HUME_API_KEY',
+ description: 'Hume',
+ }),
+ ...options.headers,
+ });
+
+ const createSpeechModel = () =>
+ new HumeSpeechModel('', {
+ provider: `hume.speech`,
+ url: ({ path }) => `https://api.hume.ai${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function () {
+ return {
+ speech: createSpeechModel(),
+ };
+ };
+
+ provider.speech = createSpeechModel;
+ provider.speechModel = createSpeechModel;
+
+ return provider as HumeProvider;
+}
+
+/**
+Default Hume provider instance.
+ */
+export const hume = createHume();
diff --git a/packages/hume/src/hume-speech-model.test.ts b/packages/hume/src/hume-speech-model.test.ts
new file mode 100644
index 000000000000..f61e750f7d75
--- /dev/null
+++ b/packages/hume/src/hume-speech-model.test.ts
@@ -0,0 +1,206 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { HumeSpeechModel } from './hume-speech-model';
+import { createHume } from './hume-provider';
+
+const provider = createHume({ apiKey: 'test-api-key' });
+const model = provider.speech();
+
+const server = createTestServer({
+ 'https://api.hume.ai/v0/tts/file': {},
+});
+
+describe('doGenerate', () => {
+ function prepareAudioResponse({
+ headers,
+ format = 'mp3',
+ }: {
+ headers?: Record;
+ format?: 'mp3' | 'pcm' | 'wav';
+ } = {}) {
+ const audioBuffer = new Uint8Array(100); // Mock audio data
+ server.urls['https://api.hume.ai/v0/tts/file'].response = {
+ type: 'binary',
+ headers: {
+ 'content-type': `audio/${format}`,
+ ...headers,
+ },
+ body: Buffer.from(audioBuffer),
+ };
+ return audioBuffer;
+ }
+
+ it('should pass the model and text', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ utterances: [
+ {
+ text: 'Hello from the AI SDK!',
+ voice: {
+ id: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
+ provider: 'HUME_AI',
+ },
+ },
+ ],
+ format: {
+ type: 'mp3',
+ },
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareAudioResponse();
+
+ const provider = createHume({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.speech().doGenerate({
+ text: 'Hello from the AI SDK!',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ 'x-hume-api-key': 'test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should pass options', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ voice: 'test-voice',
+ outputFormat: 'mp3',
+ speed: 1.5,
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ utterances: [
+ {
+ text: 'Hello from the AI SDK!',
+ voice: {
+ id: 'test-voice',
+ provider: 'HUME_AI',
+ },
+ speed: 1.5,
+ },
+ ],
+ format: {
+ type: 'mp3',
+ },
+ });
+ });
+
+ it('should return audio data with correct content type', async () => {
+ const audio = new Uint8Array(100); // Mock audio data
+ prepareAudioResponse({
+ format: 'mp3',
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ outputFormat: 'mp3',
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareAudioResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new HumeSpeechModel('', {
+ provider: 'test-provider',
+ url: () => 'https://api.hume.ai/v0/tts/file',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ headers: {
+ 'content-type': 'audio/mp3',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareAudioResponse();
+
+ const testDate = new Date(0);
+ const customModel = new HumeSpeechModel('', {
+ provider: 'test-provider',
+ url: () => 'https://api.hume.ai/v0/tts/file',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('');
+ });
+
+ it('should handle different audio formats', async () => {
+ const formats = ['mp3', 'pcm', 'wav'] as const;
+
+ for (const format of formats) {
+ const audio = prepareAudioResponse({ format });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ providerOptions: {
+ lmnt: {
+ format,
+ },
+ },
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ }
+ });
+
+ it('should include warnings if any are generated', async () => {
+ prepareAudioResponse();
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.warnings).toEqual([]);
+ });
+});
diff --git a/packages/hume/src/hume-speech-model.ts b/packages/hume/src/hume-speech-model.ts
new file mode 100644
index 000000000000..1e82fc643e46
--- /dev/null
+++ b/packages/hume/src/hume-speech-model.ts
@@ -0,0 +1,229 @@
+import { SpeechModelV1, SpeechModelV1CallWarning } from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ createBinaryResponseHandler,
+ parseProviderOptions,
+ postJsonToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { HumeConfig } from './hume-config';
+import { humeFailedResponseHandler } from './hume-error';
+import { HumeSpeechAPITypes } from './hume-api-types';
+
+// https://dev.hume.ai/reference/text-to-speech-tts/synthesize-file
+const humeSpeechCallOptionsSchema = z.object({
+ /**
+ * Context for the speech synthesis request.
+ * Can be either a generationId for retrieving a previous generation,
+ * or a list of utterances to synthesize.
+ */
+ context: z
+ .object({
+ /**
+ * ID of a previously generated speech synthesis to retrieve.
+ */
+ generationId: z.string(),
+ })
+ .or(
+ z.object({
+ /**
+ * List of utterances to synthesize into speech.
+ */
+ utterances: z.array(
+ z.object({
+ /**
+ * The text content to convert to speech.
+ */
+ text: z.string(),
+ /**
+ * Optional description or instructions for how the text should be spoken.
+ */
+ description: z.string().optional(),
+ /**
+ * Optional speech rate multiplier.
+ */
+ speed: z.number().optional(),
+ /**
+ * Optional duration of silence to add after the utterance in seconds.
+ */
+ trailingSilence: z.number().optional(),
+ /**
+ * Voice configuration for the utterance.
+ * Can be specified by ID or name.
+ */
+ voice: z
+ .object({
+ /**
+ * ID of the voice to use.
+ */
+ id: z.string(),
+ /**
+ * Provider of the voice, either Hume's built-in voices or a custom voice.
+ */
+ provider: z.enum(['HUME_AI', 'CUSTOM_VOICE']).optional(),
+ })
+ .or(
+ z.object({
+ /**
+ * Name of the voice to use.
+ */
+ name: z.string(),
+ /**
+ * Provider of the voice, either Hume's built-in voices or a custom voice.
+ */
+ provider: z.enum(['HUME_AI', 'CUSTOM_VOICE']).optional(),
+ }),
+ )
+ .optional(),
+ }),
+ ),
+ }),
+ )
+ .nullish(),
+});
+
+export type HumeSpeechCallOptions = z.infer;
+
+interface HumeSpeechModelConfig extends HumeConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class HumeSpeechModel implements SpeechModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: '',
+ private readonly config: HumeSpeechModelConfig,
+ ) {}
+
+ private getArgs({
+ text,
+ voice = 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
+ outputFormat = 'mp3',
+ speed,
+ instructions,
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: SpeechModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const humeOptions = parseProviderOptions({
+ provider: 'hume',
+ providerOptions,
+ schema: humeSpeechCallOptionsSchema,
+ });
+
+ // Create request body
+ const requestBody: HumeSpeechAPITypes = {
+ utterances: [
+ {
+ text,
+ speed,
+ description: instructions,
+ voice: {
+ id: voice,
+ provider: 'HUME_AI',
+ },
+ },
+ ],
+ format: { type: 'mp3' },
+ };
+
+ if (outputFormat) {
+ if (['mp3', 'pcm', 'wav'].includes(outputFormat)) {
+ requestBody.format = { type: outputFormat as 'mp3' | 'pcm' | 'wav' };
+ } else {
+ warnings.push({
+ type: 'unsupported-setting',
+ setting: 'outputFormat',
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`,
+ });
+ }
+ }
+
+ // Add provider-specific options
+ if (humeOptions) {
+ const speechModelOptions: Omit<
+ HumeSpeechAPITypes,
+ 'utterances' | 'format'
+ > = {};
+
+ if (humeOptions.context) {
+ if ('generationId' in humeOptions.context) {
+ speechModelOptions.context = {
+ generation_id: humeOptions.context.generationId,
+ };
+ } else {
+ speechModelOptions.context = {
+ utterances: humeOptions.context.utterances.map(utterance => ({
+ text: utterance.text,
+ description: utterance.description,
+ speed: utterance.speed,
+ trailing_silence: utterance.trailingSilence,
+ voice: utterance.voice,
+ })),
+ };
+ }
+ }
+
+ for (const key in speechModelOptions) {
+ const value =
+ speechModelOptions[
+ key as keyof Omit
+ ];
+ if (value !== undefined) {
+ (requestBody as Record)[key] = value;
+ }
+ }
+ }
+
+ return {
+ requestBody,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { requestBody, warnings } = this.getArgs(options);
+
+ const {
+ value: audio,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postJsonToApi({
+ url: this.config.url({
+ path: '/v0/tts/file',
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ body: requestBody,
+ failedResponseHandler: humeFailedResponseHandler,
+ successfulResponseHandler: createBinaryResponseHandler(),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ audio,
+ warnings,
+ request: {
+ body: JSON.stringify(requestBody),
+ },
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
diff --git a/packages/hume/src/index.ts b/packages/hume/src/index.ts
new file mode 100644
index 000000000000..2fd4e0d07b2c
--- /dev/null
+++ b/packages/hume/src/index.ts
@@ -0,0 +1,2 @@
+export { createHume, hume } from './hume-provider';
+export type { HumeProvider, HumeProviderSettings } from './hume-provider';
diff --git a/packages/hume/tsconfig.json b/packages/hume/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/hume/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/hume/tsup.config.ts b/packages/hume/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/hume/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/hume/turbo.json b/packages/hume/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/hume/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/hume/vitest.edge.config.js b/packages/hume/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/hume/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/hume/vitest.node.config.js b/packages/hume/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/hume/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/lmnt/CHANGELOG.md b/packages/lmnt/CHANGELOG.md
new file mode 100644
index 000000000000..0498686b868c
--- /dev/null
+++ b/packages/lmnt/CHANGELOG.md
@@ -0,0 +1,14 @@
+# @ai-sdk/lmnt
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.0.1
+
+### Patch Changes
+
+- 0d7291a: feat(providers/lmnt): add speech
diff --git a/packages/lmnt/README.md b/packages/lmnt/README.md
new file mode 100644
index 000000000000..3fd39d20e268
--- /dev/null
+++ b/packages/lmnt/README.md
@@ -0,0 +1,36 @@
+# AI SDK - LMNT Provider
+
+The **[LMNT provider](https://ai-sdk.dev/providers/ai-sdk-providers/lmnt)** for the [AI SDK](https://ai-sdk.dev/docs)
+contains language model support for the LMNT API.
+
+## Setup
+
+The LMNT provider is available in the `@ai-sdk/lmnt` module. You can install it with
+
+```bash
+npm i @ai-sdk/lmnt
+```
+
+## Provider Instance
+
+You can import the default provider instance `lmnt` from `@ai-sdk/lmnt`:
+
+```ts
+import { lmnt } from '@ai-sdk/lmnt';
+```
+
+## Example
+
+```ts
+import { lmnt } from '@ai-sdk/lmnt';
+import { experimental_generateSpeech as generateSpeech } from 'ai';
+
+const result = await generateSpeech({
+ model: lmnt.speech('aurora'),
+ text: 'Hello, world!',
+});
+```
+
+## Documentation
+
+Please check out the **[LMNT provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/lmnt)** for more information.
diff --git a/packages/lmnt/package.json b/packages/lmnt/package.json
new file mode 100644
index 000000000000..842da8f84333
--- /dev/null
+++ b/packages/lmnt/package.json
@@ -0,0 +1,64 @@
+{
+ "name": "@ai-sdk/lmnt",
+ "version": "0.0.2",
+ "license": "Apache-2.0",
+ "sideEffects": false,
+ "main": "./dist/index.js",
+ "module": "./dist/index.mjs",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist/**/*",
+ "CHANGELOG.md"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "build:watch": "tsup --watch",
+ "clean": "rm -rf dist",
+ "lint": "eslint \"./**/*.ts*\"",
+ "type-check": "tsc --noEmit",
+ "prettier-check": "prettier --check \"./**/*.ts*\"",
+ "test": "pnpm test:node && pnpm test:edge",
+ "test:edge": "vitest --config vitest.edge.config.js --run",
+ "test:node": "vitest --config vitest.node.config.js --run",
+ "test:node:watch": "vitest --config vitest.node.config.js --watch"
+ },
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.mjs",
+ "require": "./dist/index.js"
+ }
+ },
+ "dependencies": {
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
+ },
+ "devDependencies": {
+ "@types/node": "20.17.24",
+ "@vercel/ai-tsconfig": "workspace:*",
+ "tsup": "^8",
+ "typescript": "5.6.3",
+ "zod": "3.23.8"
+ },
+ "peerDependencies": {
+ "zod": "^3.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "publishConfig": {
+ "access": "public"
+ },
+ "homepage": "https://ai-sdk.dev/docs",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/vercel/ai.git"
+ },
+ "bugs": {
+ "url": "https://github.com/vercel/ai/issues"
+ },
+ "keywords": [
+ "ai"
+ ]
+}
diff --git a/packages/lmnt/src/index.ts b/packages/lmnt/src/index.ts
new file mode 100644
index 000000000000..879f4bf9eb70
--- /dev/null
+++ b/packages/lmnt/src/index.ts
@@ -0,0 +1,2 @@
+export { createLMNT, lmnt } from './lmnt-provider';
+export type { LMNTProvider, LMNTProviderSettings } from './lmnt-provider';
diff --git a/packages/lmnt/src/lmnt-api-types.ts b/packages/lmnt/src/lmnt-api-types.ts
new file mode 100644
index 000000000000..aeae35e1c3c1
--- /dev/null
+++ b/packages/lmnt/src/lmnt-api-types.ts
@@ -0,0 +1,39 @@
+export type LMNTSpeechAPITypes = {
+ /** The voice id of the voice to use; voice ids can be retrieved by calls to List voices or Voice info. */
+ voice: string;
+ /** The text to synthesize; max 5000 characters per request (including spaces) */
+ text: string;
+ /** The model to use for synthesis. One of aurora (default) or blizzard. */
+ model?: 'aurora' | 'blizzard';
+ /** The desired language. Two letter ISO 639-1 code. Does not work with professional clones. Not all languages work with all models. Defaults to auto language detection. */
+ language?:
+ | 'auto'
+ | 'en'
+ | 'es'
+ | 'pt'
+ | 'fr'
+ | 'de'
+ | 'zh'
+ | 'ko'
+ | 'hi'
+ | 'ja'
+ | 'ru'
+ | 'it'
+ | 'tr';
+ /** The file format of the audio output */
+ format?: 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
+ /** The desired output sample rate in Hz */
+ sample_rate?: 8000 | 16000 | 24000;
+ /** The talking speed of the generated speech, a floating point value between 0.25 (slow) and 2.0 (fast). */
+ speed?: number;
+ /** Seed used to specify a different take; defaults to random */
+ seed?: number;
+ /** Set this to true to generate conversational-style speech rather than reading-style speech. Does not work with the blizzard model. */
+ conversational?: boolean;
+ /** Produce speech of this length in seconds; maximum 300.0 (5 minutes). Does not work with the blizzard model. */
+ length?: number;
+ /** Controls the stability of the generated speech. A lower value (like 0.3) produces more consistent, reliable speech. A higher value (like 0.9) gives more flexibility in how words are spoken, but might occasionally produce unusual intonations or speech patterns. */
+ top_p?: number;
+ /** Influences how expressive and emotionally varied the speech becomes. Lower values (like 0.3) create more neutral, consistent speaking styles. Higher values (like 1.0) allow for more dynamic emotional range and speaking styles. */
+ temperature?: number;
+};
diff --git a/packages/lmnt/src/lmnt-config.ts b/packages/lmnt/src/lmnt-config.ts
new file mode 100644
index 000000000000..a852d5b8c8da
--- /dev/null
+++ b/packages/lmnt/src/lmnt-config.ts
@@ -0,0 +1,9 @@
+import { FetchFunction } from '@ai-sdk/provider-utils';
+
+export type LMNTConfig = {
+ provider: string;
+ url: (options: { modelId: string; path: string }) => string;
+ headers: () => Record;
+ fetch?: FetchFunction;
+ generateId?: () => string;
+};
diff --git a/packages/lmnt/src/lmnt-error.test.ts b/packages/lmnt/src/lmnt-error.test.ts
new file mode 100644
index 000000000000..abb9f46d6072
--- /dev/null
+++ b/packages/lmnt/src/lmnt-error.test.ts
@@ -0,0 +1,33 @@
+import { safeParseJSON } from '@ai-sdk/provider-utils';
+import { lmntErrorDataSchema } from './lmnt-error';
+
+describe('lmntErrorDataSchema', () => {
+ it('should parse LMNT resource exhausted error', () => {
+ const error = `
+{"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
+`;
+
+ const result = safeParseJSON({
+ text: error,
+ schema: lmntErrorDataSchema,
+ });
+
+ expect(result).toStrictEqual({
+ success: true,
+ value: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ rawValue: {
+ error: {
+ message:
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
+ code: 429,
+ },
+ },
+ });
+ });
+});
diff --git a/packages/lmnt/src/lmnt-error.ts b/packages/lmnt/src/lmnt-error.ts
new file mode 100644
index 000000000000..40540a8bdd90
--- /dev/null
+++ b/packages/lmnt/src/lmnt-error.ts
@@ -0,0 +1,16 @@
+import { z } from 'zod';
+import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
+
+export const lmntErrorDataSchema = z.object({
+ error: z.object({
+ message: z.string(),
+ code: z.number(),
+ }),
+});
+
+export type LMNTErrorData = z.infer;
+
+export const lmntFailedResponseHandler = createJsonErrorResponseHandler({
+ errorSchema: lmntErrorDataSchema,
+ errorToMessage: data => data.error.message,
+});
diff --git a/packages/lmnt/src/lmnt-provider.ts b/packages/lmnt/src/lmnt-provider.ts
new file mode 100644
index 000000000000..ec8e2ef1d6d1
--- /dev/null
+++ b/packages/lmnt/src/lmnt-provider.ts
@@ -0,0 +1,74 @@
+import { SpeechModelV1, ProviderV1 } from '@ai-sdk/provider';
+import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
+import { LMNTSpeechModel } from './lmnt-speech-model';
+import { LMNTSpeechModelId } from './lmnt-speech-settings';
+
+export interface LMNTProvider extends Pick {
+ (
+ modelId: 'aurora',
+ settings?: {},
+ ): {
+ speech: LMNTSpeechModel;
+ };
+
+ /**
+Creates a model for speech synthesis.
+ */
+ speech(modelId: LMNTSpeechModelId): SpeechModelV1;
+}
+
+export interface LMNTProviderSettings {
+ /**
+API key for authenticating requests.
+ */
+ apiKey?: string;
+
+ /**
+Custom headers to include in the requests.
+ */
+ headers?: Record;
+
+ /**
+Custom fetch implementation. You can use it as a middleware to intercept requests,
+or to provide a custom fetch implementation for e.g. testing.
+ */
+ fetch?: FetchFunction;
+}
+
+/**
+Create an LMNT provider instance.
+ */
+export function createLMNT(options: LMNTProviderSettings = {}): LMNTProvider {
+ const getHeaders = () => ({
+ 'x-api-key': loadApiKey({
+ apiKey: options.apiKey,
+ environmentVariableName: 'LMNT_API_KEY',
+ description: 'LMNT',
+ }),
+ ...options.headers,
+ });
+
+ const createSpeechModel = (modelId: LMNTSpeechModelId) =>
+ new LMNTSpeechModel(modelId, {
+ provider: `lmnt.speech`,
+ url: ({ path }) => `https://api.lmnt.com${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const provider = function (modelId: LMNTSpeechModelId) {
+ return {
+ speech: createSpeechModel(modelId),
+ };
+ };
+
+ provider.speech = createSpeechModel;
+ provider.speechModel = createSpeechModel;
+
+ return provider as LMNTProvider;
+}
+
+/**
+Default LMNT provider instance.
+ */
+export const lmnt = createLMNT();
diff --git a/packages/lmnt/src/lmnt-speech-model.test.ts b/packages/lmnt/src/lmnt-speech-model.test.ts
new file mode 100644
index 000000000000..54e51bf4f485
--- /dev/null
+++ b/packages/lmnt/src/lmnt-speech-model.test.ts
@@ -0,0 +1,189 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { LMNTSpeechModel } from './lmnt-speech-model';
+import { createLMNT } from './lmnt-provider';
+
+const provider = createLMNT({ apiKey: 'test-api-key' });
+const model = provider.speech('aurora');
+
+const server = createTestServer({
+ 'https://api.lmnt.com/v1/ai/speech/bytes': {},
+});
+
+describe('doGenerate', () => {
+ function prepareAudioResponse({
+ headers,
+ format = 'mp3',
+ }: {
+ headers?: Record;
+ format?: 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
+ } = {}) {
+ const audioBuffer = new Uint8Array(100); // Mock audio data
+ server.urls['https://api.lmnt.com/v1/ai/speech/bytes'].response = {
+ type: 'binary',
+ headers: {
+ 'content-type': `audio/${format}`,
+ ...headers,
+ },
+ body: Buffer.from(audioBuffer),
+ };
+ return audioBuffer;
+ }
+
+ it('should pass the model and text', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ model: 'aurora',
+ text: 'Hello from the AI SDK!',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareAudioResponse();
+
+ const provider = createLMNT({
+ apiKey: 'test-api-key',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.speech('aurora').doGenerate({
+ text: 'Hello from the AI SDK!',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ 'x-api-key': 'test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ });
+ });
+
+ it('should pass options', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ voice: 'nova',
+ outputFormat: 'mp3',
+ speed: 1.5,
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ model: 'aurora',
+ text: 'Hello from the AI SDK!',
+ voice: 'nova',
+ speed: 1.5,
+ response_format: 'mp3',
+ });
+ });
+
+ it('should return audio data with correct content type', async () => {
+ const audio = new Uint8Array(100); // Mock audio data
+ prepareAudioResponse({
+ format: 'mp3',
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ outputFormat: 'mp3',
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareAudioResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new LMNTSpeechModel('aurora', {
+ provider: 'test-provider',
+ url: () => 'https://api.lmnt.com/v1/ai/speech/bytes',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'aurora',
+ headers: {
+ 'content-type': 'audio/mp3',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareAudioResponse();
+
+ const testDate = new Date(0);
+ const customModel = new LMNTSpeechModel('aurora', {
+ provider: 'test-provider',
+ url: () => 'https://api.lmnt.com/v1/ai/speech/bytes',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('aurora');
+ });
+
+ it('should handle different audio formats', async () => {
+ const formats = ['aac', 'mp3', 'mulaw', 'raw', 'wav'] as const;
+
+ for (const format of formats) {
+ const audio = prepareAudioResponse({ format });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ providerOptions: {
+ lmnt: {
+ format,
+ },
+ },
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ }
+ });
+
+ it('should include warnings if any are generated', async () => {
+ prepareAudioResponse();
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.warnings).toEqual([]);
+ });
+});
diff --git a/packages/lmnt/src/lmnt-speech-model.ts b/packages/lmnt/src/lmnt-speech-model.ts
new file mode 100644
index 000000000000..e0fff2e8f7aa
--- /dev/null
+++ b/packages/lmnt/src/lmnt-speech-model.ts
@@ -0,0 +1,210 @@
+import { SpeechModelV1, SpeechModelV1CallWarning } from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ createBinaryResponseHandler,
+ parseProviderOptions,
+ postJsonToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { LMNTConfig } from './lmnt-config';
+import { lmntFailedResponseHandler } from './lmnt-error';
+import { LMNTSpeechModelId } from './lmnt-speech-settings';
+import { LMNTSpeechAPITypes } from './lmnt-api-types';
+
+// https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes
+const lmntSpeechCallOptionsSchema = z.object({
+ /**
+ * The model to use for speech synthesis e.g. 'aurora' or 'blizzard'.
+ * @default 'aurora'
+ */
+ model: z
+ .union([z.enum(['aurora', 'blizzard']), z.string()])
+ .nullish()
+ .default('aurora'),
+
+ /**
+ * The language of the input text.
+ * @default 'auto'
+ */
+ language: z
+ .union([z.enum(['auto', 'en']), z.string()])
+ .nullish()
+ .default('auto'),
+
+ /**
+ * The audio format of the output.
+ * @default 'mp3'
+ */
+ format: z
+ .enum(['aac', 'mp3', 'mulaw', 'raw', 'wav'])
+ .nullish()
+ .default('mp3'),
+
+ /**
+ * The sample rate of the output audio in Hz.
+ * @default 24000
+ */
+ sampleRate: z
+ .union([z.literal(8000), z.literal(16000), z.literal(24000)])
+ .nullish()
+ .default(24000),
+
+ /**
+ * The speed of the speech. Range: 0.25 to 2.
+ * @default 1
+ */
+ speed: z.number().min(0.25).max(2).nullish().default(1),
+
+ /**
+ * A seed value for deterministic generation.
+ */
+ seed: z.number().int().nullish(),
+
+ /**
+ * Whether to use a conversational style.
+ * @default false
+ */
+ conversational: z.boolean().nullish().default(false),
+
+ /**
+ * Maximum length of the output in seconds (up to 300).
+ */
+ length: z.number().max(300).nullish(),
+
+ /**
+ * Top-p sampling parameter. Range: 0 to 1.
+ * @default 1
+ */
+ topP: z.number().min(0).max(1).nullish().default(1),
+
+ /**
+ * Temperature for sampling. Higher values increase randomness.
+ * @default 1
+ */
+ temperature: z.number().min(0).nullish().default(1),
+});
+
+export type LMNTSpeechCallOptions = z.infer;
+
+interface LMNTSpeechModelConfig extends LMNTConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class LMNTSpeechModel implements SpeechModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: LMNTSpeechModelId,
+ private readonly config: LMNTSpeechModelConfig,
+ ) {}
+
+ private getArgs({
+ text,
+ voice = 'ava',
+ outputFormat = 'mp3',
+ speed,
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: SpeechModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const lmntOptions = parseProviderOptions({
+ provider: 'lmnt',
+ providerOptions,
+ schema: lmntSpeechCallOptionsSchema,
+ });
+
+ // Create request body
+ const requestBody: Record = {
+ model: this.modelId,
+ text,
+ voice,
+ response_format: 'mp3',
+ speed,
+ };
+
+ if (outputFormat) {
+ if (['mp3', 'aac', 'mulaw', 'raw', 'wav'].includes(outputFormat)) {
+ requestBody.response_format = outputFormat;
+ } else {
+ warnings.push({
+ type: 'unsupported-setting',
+ setting: 'outputFormat',
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`,
+ });
+ }
+ }
+
+ // Add provider-specific options
+ if (lmntOptions) {
+ const speechModelOptions: Omit = {
+ conversational: lmntOptions.conversational ?? undefined,
+ length: lmntOptions.length ?? undefined,
+ seed: lmntOptions.seed ?? undefined,
+ speed: lmntOptions.speed ?? undefined,
+ temperature: lmntOptions.temperature ?? undefined,
+ top_p: lmntOptions.topP ?? undefined,
+ sample_rate: lmntOptions.sampleRate ?? undefined,
+ };
+
+ for (const key in speechModelOptions) {
+ const value =
+ speechModelOptions[
+ key as keyof Omit
+ ];
+ if (value !== undefined) {
+ requestBody[key] = value;
+ }
+ }
+ }
+
+ return {
+ requestBody,
+ warnings,
+ };
+ }
+
+ async doGenerate(
+ options: Parameters[0],
+ ): Promise>> {
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
+ const { requestBody, warnings } = this.getArgs(options);
+
+ const {
+ value: audio,
+ responseHeaders,
+ rawValue: rawResponse,
+ } = await postJsonToApi({
+ url: this.config.url({
+ path: '/v1/ai/speech/bytes',
+ modelId: this.modelId,
+ }),
+ headers: combineHeaders(this.config.headers(), options.headers),
+ body: requestBody,
+ failedResponseHandler: lmntFailedResponseHandler,
+ successfulResponseHandler: createBinaryResponseHandler(),
+ abortSignal: options.abortSignal,
+ fetch: this.config.fetch,
+ });
+
+ return {
+ audio,
+ warnings,
+ request: {
+ body: JSON.stringify(requestBody),
+ },
+ response: {
+ timestamp: currentDate,
+ modelId: this.modelId,
+ headers: responseHeaders,
+ body: rawResponse,
+ },
+ };
+ }
+}
diff --git a/packages/lmnt/src/lmnt-speech-settings.ts b/packages/lmnt/src/lmnt-speech-settings.ts
new file mode 100644
index 000000000000..377c029f980f
--- /dev/null
+++ b/packages/lmnt/src/lmnt-speech-settings.ts
@@ -0,0 +1 @@
+export type LMNTSpeechModelId = 'aurora' | 'blizzard' | (string & {});
diff --git a/packages/lmnt/tsconfig.json b/packages/lmnt/tsconfig.json
new file mode 100644
index 000000000000..8eee8f9f6a82
--- /dev/null
+++ b/packages/lmnt/tsconfig.json
@@ -0,0 +1,5 @@
+{
+ "extends": "./node_modules/@vercel/ai-tsconfig/ts-library.json",
+ "include": ["."],
+ "exclude": ["*/dist", "dist", "build", "node_modules"]
+}
diff --git a/packages/lmnt/tsup.config.ts b/packages/lmnt/tsup.config.ts
new file mode 100644
index 000000000000..3f92041b987c
--- /dev/null
+++ b/packages/lmnt/tsup.config.ts
@@ -0,0 +1,10 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig([
+ {
+ entry: ['src/index.ts'],
+ format: ['cjs', 'esm'],
+ dts: true,
+ sourcemap: true,
+ },
+]);
diff --git a/packages/lmnt/turbo.json b/packages/lmnt/turbo.json
new file mode 100644
index 000000000000..620b8380e744
--- /dev/null
+++ b/packages/lmnt/turbo.json
@@ -0,0 +1,12 @@
+{
+ "extends": [
+ "//"
+ ],
+ "tasks": {
+ "build": {
+ "outputs": [
+ "**/dist/**"
+ ]
+ }
+ }
+}
diff --git a/packages/lmnt/vitest.edge.config.js b/packages/lmnt/vitest.edge.config.js
new file mode 100644
index 000000000000..700660e913f5
--- /dev/null
+++ b/packages/lmnt/vitest.edge.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'edge-runtime',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/lmnt/vitest.node.config.js b/packages/lmnt/vitest.node.config.js
new file mode 100644
index 000000000000..b1d14b21fc11
--- /dev/null
+++ b/packages/lmnt/vitest.node.config.js
@@ -0,0 +1,10 @@
+import { defineConfig } from 'vite';
+
+// https://vitejs.dev/config/
+export default defineConfig({
+ test: {
+ environment: 'node',
+ globals: true,
+ include: ['**/*.test.ts', '**/*.test.tsx'],
+ },
+});
diff --git a/packages/luma/CHANGELOG.md b/packages/luma/CHANGELOG.md
index f9efba8cf43a..222c898e138f 100644
--- a/packages/luma/CHANGELOG.md
+++ b/packages/luma/CHANGELOG.md
@@ -1,5 +1,43 @@
# @ai-sdk/luma
+## 0.1.8
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.1.7
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.1.6
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.1.5
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 0.1.4
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 0.1.3
### Patch Changes
diff --git a/packages/luma/README.md b/packages/luma/README.md
index 0c854631cf43..6b117e7cc636 100644
--- a/packages/luma/README.md
+++ b/packages/luma/README.md
@@ -1,6 +1,6 @@
# AI SDK - Luma Provider
-The **Luma provider** for the [AI SDK](https://sdk.vercel.ai/docs) contains support for Luma AI's state-of-the-art image generation models - Photon and Photon Flash.
+The **Luma provider** for the [AI SDK](https://ai-sdk.dev/docs) contains support for Luma AI's state-of-the-art image generation models - Photon and Photon Flash.
## About Luma Photon Models
@@ -49,4 +49,4 @@ console.log(`Image saved to ${filename}`);
## Documentation
-Please check out the **[Luma provider](https://sdk.vercel.ai/providers/ai-sdk-providers/luma)** for more information.
+Please check out the **[Luma provider](https://ai-sdk.dev/providers/ai-sdk-providers/luma)** for more information.
diff --git a/packages/luma/package.json b/packages/luma/package.json
index 71644bd83c33..2dadbd5e6025 100644
--- a/packages/luma/package.json
+++ b/packages/luma/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/luma",
- "version": "0.1.3",
+ "version": "0.1.8",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -30,8 +30,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -49,7 +49,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/mistral/CHANGELOG.md b/packages/mistral/CHANGELOG.md
index ab6df6bd1df0..b076961c3d1f 100644
--- a/packages/mistral/CHANGELOG.md
+++ b/packages/mistral/CHANGELOG.md
@@ -1,5 +1,43 @@
# @ai-sdk/mistral
+## 1.2.8
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.2.7
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.2.6
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.2.5
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.2.4
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 1.2.3
### Patch Changes
diff --git a/packages/mistral/README.md b/packages/mistral/README.md
index d2be472a8459..519a3eb2c466 100644
--- a/packages/mistral/README.md
+++ b/packages/mistral/README.md
@@ -1,6 +1,6 @@
# AI SDK - Mistral Provider
-The **[Mistral provider](https://sdk.vercel.ai/providers/ai-sdk-providers/mistral)** for the [AI SDK](https://sdk.vercel.ai/docs) contains language model support for the Mistral chat API.
+The **[Mistral provider](https://ai-sdk.dev/providers/ai-sdk-providers/mistral)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the Mistral chat API.
## Setup
@@ -32,4 +32,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[Mistral provider](https://sdk.vercel.ai/providers/ai-sdk-providers/mistral)** for more information.
+Please check out the **[Mistral provider](https://ai-sdk.dev/providers/ai-sdk-providers/mistral)** for more information.
diff --git a/packages/mistral/package.json b/packages/mistral/package.json
index 8186d77e0c6a..4de4dd51a66b 100644
--- a/packages/mistral/package.json
+++ b/packages/mistral/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/mistral",
- "version": "1.2.3",
+ "version": "1.2.8",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -31,8 +31,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -50,7 +50,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/openai-compatible/CHANGELOG.md b/packages/openai-compatible/CHANGELOG.md
index 864481d86a8b..27d12a78e4e5 100644
--- a/packages/openai-compatible/CHANGELOG.md
+++ b/packages/openai-compatible/CHANGELOG.md
@@ -1,5 +1,67 @@
# @ai-sdk/openai-compatible
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 0.2.13
+
+### Patch Changes
+
+- 23571c9: feat(providers/xai): add reasoningEffort provider option
+
+## 0.2.12
+
+### Patch Changes
+
+- 13492fe: fix(providers/xai): return actual usage when streaming instead of NaN
+
+## 0.2.11
+
+### Patch Changes
+
+- b5c9cd4: fix (provider/openai-compatible): change tool_call type schema to nullish
+
+## 0.2.10
+
+### Patch Changes
+
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 0.2.9
+
+### Patch Changes
+
+- 1bbc698: chore(openai-compatible): deprecate simulateStreaming
+
+## 0.2.8
+
+### Patch Changes
+
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 0.2.7
+
+### Patch Changes
+
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 0.2.6
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 0.2.5
### Patch Changes
diff --git a/packages/openai-compatible/README.md b/packages/openai-compatible/README.md
index 82d9365b407d..98535048a5c0 100644
--- a/packages/openai-compatible/README.md
+++ b/packages/openai-compatible/README.md
@@ -97,4 +97,4 @@ const { text } = await generateText({
});
```
-For more examples, see the [OpenAI Compatible Providers](https://sdk.vercel.ai/providers/openai-compatible-providers) documentation.
+For more examples, see the [OpenAI Compatible Providers](https://ai-sdk.dev/providers/openai-compatible-providers) documentation.
diff --git a/packages/openai-compatible/package.json b/packages/openai-compatible/package.json
index e6c197f2747a..25035941d289 100644
--- a/packages/openai-compatible/package.json
+++ b/packages/openai-compatible/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/openai-compatible",
- "version": "0.2.5",
+ "version": "0.2.14",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -38,8 +38,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -57,7 +57,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/openai-compatible/src/openai-compatible-chat-language-model.test.ts b/packages/openai-compatible/src/openai-compatible-chat-language-model.test.ts
index 9a3d06bd5e95..9dfce5e77911 100644
--- a/packages/openai-compatible/src/openai-compatible-chat-language-model.test.ts
+++ b/packages/openai-compatible/src/openai-compatible-chat-language-model.test.ts
@@ -646,6 +646,35 @@ describe('doGenerate', () => {
expect(warnings).toEqual([]);
});
+ it('should respect the reasoningEffort provider option', async () => {
+ prepareJsonResponse({ content: '{"value":"test"}' });
+
+ const model = new OpenAICompatibleChatLanguageModel(
+ 'gpt-4o-2024-08-06',
+ {},
+ {
+ provider: 'test-provider',
+ url: () => 'https://my.api.com/v1/chat/completions',
+ headers: () => ({}),
+ },
+ );
+
+ await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ providerMetadata: {
+ 'openai-compatible': {
+ reasoningEffort: 'low',
+ },
+ },
+ });
+
+ const body = await server.calls[0].requestBody;
+
+ expect(body.reasoning_effort).toBe('low');
+ });
+
it('should use json_schema & strict in object-json mode when structuredOutputs are enabled', async () => {
prepareJsonResponse({ content: '{"value":"Spark"}' });
@@ -984,6 +1013,37 @@ describe('doStream', () => {
};
}
+ it('should respect the includeUsage option', async () => {
+ prepareStreamResponse({
+ content: ['Hello', ', ', 'World!'],
+ finish_reason: 'stop',
+ });
+
+ const model = new OpenAICompatibleChatLanguageModel(
+ 'gpt-4o-2024-08-06',
+ {},
+ {
+ provider: 'test-provider',
+ url: () => 'https://my.api.com/v1/chat/completions',
+ headers: () => ({}),
+ includeUsage: true,
+ },
+ );
+
+ const { warnings } = await model.doStream({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ });
+
+ const body = await server.calls[0].requestBody;
+
+ expect(body.stream).toBe(true);
+ expect(body.stream_options).toStrictEqual({ include_usage: true });
+
+ expect(warnings).toEqual([]);
+ });
+
it('should stream text deltas', async () => {
prepareStreamResponse({
content: ['Hello', ', ', 'World!'],
diff --git a/packages/openai-compatible/src/openai-compatible-chat-language-model.ts b/packages/openai-compatible/src/openai-compatible-chat-language-model.ts
index ff1874fa16c2..20e53eccea9e 100644
--- a/packages/openai-compatible/src/openai-compatible-chat-language-model.ts
+++ b/packages/openai-compatible/src/openai-compatible-chat-language-model.ts
@@ -40,6 +40,7 @@ export type OpenAICompatibleChatConfig = {
headers: () => Record;
url: (options: { modelId: string; path: string }) => string;
fetch?: FetchFunction;
+ includeUsage?: boolean;
errorStructure?: ProviderErrorStructure;
metadataExtractor?: MetadataExtractor;
@@ -170,6 +171,10 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV1 {
seed,
...providerMetadata?.[this.providerOptionsName],
+ reasoning_effort:
+ providerMetadata?.[this.providerOptionsName]?.reasoningEffort ??
+ providerMetadata?.['openai-compatible']?.reasoningEffort,
+
// messages:
messages: convertToOpenAICompatibleChatMessages(prompt),
};
@@ -375,7 +380,16 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV1 {
const { args, warnings } = this.getArgs({ ...options });
- const body = JSON.stringify({ ...args, stream: true });
+ const body = {
+ ...args,
+ stream: true,
+
+ // only include stream_options when in strict compatibility mode:
+ stream_options: this.config.includeUsage
+ ? { include_usage: true }
+ : undefined,
+ };
+
const metadataExtractor =
this.config.metadataExtractor?.createStreamExtractor();
@@ -385,10 +399,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV1 {
modelId: this.modelId,
}),
headers: combineHeaders(this.config.headers(), options.headers),
- body: {
- ...args,
- stream: true,
- },
+ body,
failedResponseHandler: this.failedResponseHandler,
successfulResponseHandler: createEventSourceResponseHandler(
this.chunkSchema,
@@ -683,7 +694,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV1 {
rawCall: { rawPrompt, rawSettings },
rawResponse: { headers: responseHeaders },
warnings,
- request: { body },
+ request: { body: JSON.stringify(body) },
};
}
}
@@ -760,7 +771,7 @@ const createOpenAICompatibleChatChunkSchema = (
z.object({
index: z.number(),
id: z.string().nullish(),
- type: z.literal('function').optional(),
+ type: z.literal('function').nullish(),
function: z.object({
name: z.string().nullish(),
arguments: z.string().nullish(),
diff --git a/packages/openai-compatible/src/openai-compatible-chat-settings.ts b/packages/openai-compatible/src/openai-compatible-chat-settings.ts
index 38e9bf767e4a..0e73f560872f 100644
--- a/packages/openai-compatible/src/openai-compatible-chat-settings.ts
+++ b/packages/openai-compatible/src/openai-compatible-chat-settings.ts
@@ -12,6 +12,7 @@ Simulates streaming by using a normal generate call and returning it as a stream
Enable this if the model that you are using does not support streaming.
Defaults to `false`.
+@deprecated Use `simulateStreamingMiddleware` instead.
*/
simulateStreaming?: boolean;
}
diff --git a/packages/openai-compatible/src/openai-compatible-completion-language-model.ts b/packages/openai-compatible/src/openai-compatible-completion-language-model.ts
index 1ba1b6dfe6d2..555019ee1764 100644
--- a/packages/openai-compatible/src/openai-compatible-completion-language-model.ts
+++ b/packages/openai-compatible/src/openai-compatible-completion-language-model.ts
@@ -31,6 +31,7 @@ import {
type OpenAICompatibleCompletionConfig = {
provider: string;
+ includeUsage?: boolean;
headers: () => Record;
url: (options: { modelId: string; path: string }) => string;
fetch?: FetchFunction;
@@ -227,6 +228,11 @@ export class OpenAICompatibleCompletionLanguageModel
const body = {
...args,
stream: true,
+
+ // only include stream_options when in strict compatibility mode:
+ stream_options: this.config.includeUsage
+ ? { include_usage: true }
+ : undefined,
};
const { responseHeaders, value: response } = await postJsonToApi({
diff --git a/packages/openai/CHANGELOG.md b/packages/openai/CHANGELOG.md
index ec4bb3a9da6a..7d53746d0513 100644
--- a/packages/openai/CHANGELOG.md
+++ b/packages/openai/CHANGELOG.md
@@ -1,5 +1,114 @@
# @ai-sdk/openai
+## 1.3.22
+
+### Patch Changes
+
+- Updated dependencies [d87b9d1]
+ - @ai-sdk/provider-utils@2.2.8
+
+## 1.3.21
+
+### Patch Changes
+
+- 5caac29: fix(providers/openai): zod parse error with function
+
+## 1.3.20
+
+### Patch Changes
+
+- dd5450e: feat(provider/openai): add o3 & o4-mini with developer systemMessageMode
+
+## 1.3.19
+
+### Patch Changes
+
+- 3cabda9: feat (providers/openai): add gpt-image-1 model id to image settings
+
+## 1.3.18
+
+### Patch Changes
+
+- 74cd391: feat (providers/openai): support gpt-image-1 image generation
+
+## 1.3.17
+
+### Patch Changes
+
+- ca7bce3: feat (providers/openai): add support for reasoning summaries
+
+## 1.3.16
+
+### Patch Changes
+
+- bd6e457: feat (provider/openai): o4 updates for responses api
+
+## 1.3.15
+
+### Patch Changes
+
+- 98d954e: feat (providers/openai): add o3 and o4-mini models
+
+## 1.3.14
+
+### Patch Changes
+
+- 980141c: fix (openai): structure output for responses model
+
+## 1.3.13
+
+### Patch Changes
+
+- 75b9849: adding support for gpt-4o-search-preview and handling unsupported parameters
+
+## 1.3.12
+
+### Patch Changes
+
+- 575339f: feat (providers/openai): add gpt-4.1 models
+
+## 1.3.11
+
+### Patch Changes
+
+- beef951: feat: add speech with experimental_generateSpeech
+- Updated dependencies [beef951]
+ - @ai-sdk/provider@1.1.3
+ - @ai-sdk/provider-utils@2.2.7
+
+## 1.3.10
+
+### Patch Changes
+
+- dbe53e7: adding support for gpt-4o-search-preview and handling unsupported parameters
+- 84ffaba: fix: propagate openai transcription fixes
+
+## 1.3.9
+
+### Patch Changes
+
+- 013faa8: core (ai): change transcription model mimeType to mediaType
+- 013faa8: fix (provider/openai): increase transcription model resilience
+- Updated dependencies [013faa8]
+ - @ai-sdk/provider@1.1.2
+ - @ai-sdk/provider-utils@2.2.6
+
+## 1.3.8
+
+### Patch Changes
+
+- c21fa6d: feat: add transcription with experimental_transcribe
+- Updated dependencies [c21fa6d]
+ - @ai-sdk/provider-utils@2.2.5
+ - @ai-sdk/provider@1.1.1
+
+## 1.3.7
+
+### Patch Changes
+
+- Updated dependencies [2c19b9a]
+ - @ai-sdk/provider-utils@2.2.4
+
## 1.3.6
### Patch Changes
diff --git a/packages/openai/README.md b/packages/openai/README.md
index 75c49ae432d7..5ed225fcd735 100644
--- a/packages/openai/README.md
+++ b/packages/openai/README.md
@@ -1,6 +1,6 @@
# AI SDK - OpenAI Provider
-The **[OpenAI provider](https://sdk.vercel.ai/providers/ai-sdk-providers/openai)** for the [AI SDK](https://sdk.vercel.ai/docs)
+The **[OpenAI provider](https://ai-sdk.dev/providers/ai-sdk-providers/openai)** for the [AI SDK](https://ai-sdk.dev/docs)
contains language model support for the OpenAI chat and completion APIs and embedding model support for the OpenAI embeddings API.
## Setup
@@ -33,4 +33,4 @@ const { text } = await generateText({
## Documentation
-Please check out the **[OpenAI provider documentation](https://sdk.vercel.ai/providers/ai-sdk-providers/openai)** for more information.
+Please check out the **[OpenAI provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/openai)** for more information.
diff --git a/packages/openai/package.json b/packages/openai/package.json
index 66d1a887adf6..f8776d8d3ed4 100644
--- a/packages/openai/package.json
+++ b/packages/openai/package.json
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk/openai",
- "version": "1.3.6",
+ "version": "1.3.22",
"license": "Apache-2.0",
"sideEffects": false,
"main": "./dist/index.js",
@@ -38,8 +38,8 @@
}
},
"dependencies": {
- "@ai-sdk/provider": "1.1.0",
- "@ai-sdk/provider-utils": "2.2.3"
+ "@ai-sdk/provider": "1.1.3",
+ "@ai-sdk/provider-utils": "2.2.8"
},
"devDependencies": {
"@types/node": "20.17.24",
@@ -57,7 +57,7 @@
"publishConfig": {
"access": "public"
},
- "homepage": "https://sdk.vercel.ai/docs",
+ "homepage": "https://ai-sdk.dev/docs",
"repository": {
"type": "git",
"url": "git+https://github.com/vercel/ai.git"
diff --git a/packages/openai/src/internal/index.ts b/packages/openai/src/internal/index.ts
index b2fca011ffb8..97f1736fccd6 100644
--- a/packages/openai/src/internal/index.ts
+++ b/packages/openai/src/internal/index.ts
@@ -6,4 +6,8 @@ export * from '../openai-embedding-model';
export * from '../openai-embedding-settings';
export * from '../openai-image-model';
export * from '../openai-image-settings';
+export * from '../openai-transcription-model';
+export * from '../openai-transcription-settings';
+export * from '../openai-speech-model';
+export * from '../openai-speech-settings';
export * from '../responses/openai-responses-language-model';
diff --git a/packages/openai/src/openai-api-types.ts b/packages/openai/src/openai-api-types.ts
new file mode 100644
index 000000000000..6dd14415f2d9
--- /dev/null
+++ b/packages/openai/src/openai-api-types.ts
@@ -0,0 +1,38 @@
+export type OpenAISpeechAPITypes = {
+ /**
+ * The voice to use when generating the audio.
+ * Supported voices are alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse.
+ * @default 'alloy'
+ */
+ voice?:
+ | 'alloy'
+ | 'ash'
+ | 'ballad'
+ | 'coral'
+ | 'echo'
+ | 'fable'
+ | 'onyx'
+ | 'nova'
+ | 'sage'
+ | 'shimmer'
+ | 'verse';
+
+ /**
+ * The speed of the generated audio.
+ * Select a value from 0.25 to 4.0.
+ * @default 1.0
+ */
+ speed?: number;
+
+ /**
+ * The format of the generated audio.
+ * @default 'mp3'
+ */
+ response_format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
+
+ /**
+ * Instructions for the speech generation e.g. "Speak in a slow and steady tone".
+ * Does not work with tts-1 or tts-1-hd.
+ */
+ instructions?: string;
+};
diff --git a/packages/openai/src/openai-chat-language-model.test.ts b/packages/openai/src/openai-chat-language-model.test.ts
index c8bf86f7d45d..c0404ca8291e 100644
--- a/packages/openai/src/openai-chat-language-model.test.ts
+++ b/packages/openai/src/openai-chat-language-model.test.ts
@@ -1357,6 +1357,78 @@ describe('doGenerate', () => {
},
});
});
+
+ it('should remove temperature setting for gpt-4o-search-preview and add warning', async () => {
+ prepareJsonResponse();
+
+ const model = provider.chat('gpt-4o-search-preview');
+
+ const result = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ temperature: 0.7,
+ });
+
+ const requestBody = await server.calls[0].requestBody;
+ expect(requestBody.model).toBe('gpt-4o-search-preview');
+ expect(requestBody.temperature).toBeUndefined();
+
+ expect(result.warnings).toContainEqual({
+ type: 'unsupported-setting',
+ setting: 'temperature',
+ details:
+ 'temperature is not supported for the search preview models and has been removed.',
+ });
+ });
+
+ it('should remove temperature setting for gpt-4o-mini-search-preview and add warning', async () => {
+ prepareJsonResponse();
+
+ const model = provider.chat('gpt-4o-mini-search-preview');
+
+ const result = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ temperature: 0.7,
+ });
+
+ const requestBody = await server.calls[0].requestBody;
+ expect(requestBody.model).toBe('gpt-4o-mini-search-preview');
+ expect(requestBody.temperature).toBeUndefined();
+
+ expect(result.warnings).toContainEqual({
+ type: 'unsupported-setting',
+ setting: 'temperature',
+ details:
+ 'temperature is not supported for the search preview models and has been removed.',
+ });
+ });
+
+ it('should remove temperature setting for gpt-4o-mini-search-preview-2025-03-11 and add warning', async () => {
+ prepareJsonResponse();
+
+ const model = provider.chat('gpt-4o-mini-search-preview-2025-03-11');
+
+ const result = await model.doGenerate({
+ inputFormat: 'prompt',
+ mode: { type: 'regular' },
+ prompt: TEST_PROMPT,
+ temperature: 0.7,
+ });
+
+ const requestBody = await server.calls[0].requestBody;
+ expect(requestBody.model).toBe('gpt-4o-mini-search-preview-2025-03-11');
+ expect(requestBody.temperature).toBeUndefined();
+
+ expect(result.warnings).toContainEqual({
+ type: 'unsupported-setting',
+ setting: 'temperature',
+ details:
+ 'temperature is not supported for the search preview models and has been removed.',
+ });
+ });
});
describe('doStream', () => {
diff --git a/packages/openai/src/openai-chat-language-model.ts b/packages/openai/src/openai-chat-language-model.ts
index 247d9d4a44fb..e12f5d308947 100644
--- a/packages/openai/src/openai-chat-language-model.ts
+++ b/packages/openai/src/openai-chat-language-model.ts
@@ -266,8 +266,20 @@ export class OpenAIChatLanguageModel implements LanguageModelV1 {
}
baseArgs.max_tokens = undefined;
}
+ } else if (
+ this.modelId.startsWith('gpt-4o-search-preview') ||
+ this.modelId.startsWith('gpt-4o-mini-search-preview')
+ ) {
+ if (baseArgs.temperature != null) {
+ baseArgs.temperature = undefined;
+ warnings.push({
+ type: 'unsupported-setting',
+ setting: 'temperature',
+ details:
+ 'temperature is not supported for the search preview models and has been removed.',
+ });
+ }
}
-
switch (type) {
case 'regular': {
const { tools, tool_choice, functions, function_call, toolWarnings } =
@@ -877,7 +889,7 @@ const openaiChatChunkSchema = z.union([
z.object({
index: z.number(),
id: z.string().nullish(),
- type: z.literal('function').optional(),
+ type: z.literal('function').nullish(),
function: z.object({
name: z.string().nullish(),
arguments: z.string().nullish(),
@@ -905,7 +917,7 @@ const openaiChatChunkSchema = z.union([
.nullable(),
})
.nullish(),
- finish_reason: z.string().nullable().optional(),
+ finish_reason: z.string().nullish(),
index: z.number(),
}),
),
@@ -915,12 +927,7 @@ const openaiChatChunkSchema = z.union([
]);
function isReasoningModel(modelId: string) {
- return (
- modelId === 'o1' ||
- modelId.startsWith('o1-') ||
- modelId === 'o3' ||
- modelId.startsWith('o3-')
- );
+ return modelId.startsWith('o');
}
function isAudioModel(modelId: string) {
@@ -951,10 +958,22 @@ const reasoningModels = {
'o1-preview-2024-09-12': {
systemMessageMode: 'remove',
},
+ o3: {
+ systemMessageMode: 'developer',
+ },
+ 'o3-2025-04-16': {
+ systemMessageMode: 'developer',
+ },
'o3-mini': {
systemMessageMode: 'developer',
},
'o3-mini-2025-01-31': {
systemMessageMode: 'developer',
},
+ 'o4-mini': {
+ systemMessageMode: 'developer',
+ },
+ 'o4-mini-2025-04-16': {
+ systemMessageMode: 'developer',
+ },
} as const;
diff --git a/packages/openai/src/openai-chat-settings.ts b/packages/openai/src/openai-chat-settings.ts
index c37d73589c0b..b2a33d6861af 100644
--- a/packages/openai/src/openai-chat-settings.ts
+++ b/packages/openai/src/openai-chat-settings.ts
@@ -8,6 +8,16 @@ export type OpenAIChatModelId =
| 'o1-preview-2024-09-12'
| 'o3-mini'
| 'o3-mini-2025-01-31'
+ | 'o3'
+ | 'o3-2025-04-16'
+ | 'o4-mini'
+ | 'o4-mini-2025-04-16'
+ | 'gpt-4.1'
+ | 'gpt-4.1-2025-04-14'
+ | 'gpt-4.1-mini'
+ | 'gpt-4.1-mini-2025-04-14'
+ | 'gpt-4.1-nano'
+ | 'gpt-4.1-nano-2025-04-14'
| 'gpt-4o'
| 'gpt-4o-2024-05-13'
| 'gpt-4o-2024-08-06'
@@ -15,6 +25,10 @@ export type OpenAIChatModelId =
| 'gpt-4o-audio-preview'
| 'gpt-4o-audio-preview-2024-10-01'
| 'gpt-4o-audio-preview-2024-12-17'
+ | 'gpt-4o-search-preview'
+ | 'gpt-4o-search-preview-2025-03-11'
+ | 'gpt-4o-mini-search-preview'
+ | 'gpt-4o-mini-search-preview-2025-03-11'
| 'gpt-4o-mini'
| 'gpt-4o-mini-2024-07-18'
| 'gpt-4-turbo'
diff --git a/packages/openai/src/openai-image-model.test.ts b/packages/openai/src/openai-image-model.test.ts
index 5e5a4aa5d834..f0404087ed0c 100644
--- a/packages/openai/src/openai-image-model.test.ts
+++ b/packages/openai/src/openai-image-model.test.ts
@@ -213,4 +213,44 @@ describe('doGenerate', () => {
);
expect(result.response.modelId).toBe('dall-e-3');
});
+
+ it('should not include response_format for gpt-image-1', async () => {
+ prepareJsonResponse();
+
+ const gptImageModel = provider.image('gpt-image-1');
+ await gptImageModel.doGenerate({
+ prompt,
+ n: 1,
+ size: '1024x1024',
+ aspectRatio: undefined,
+ seed: undefined,
+ providerOptions: {},
+ });
+
+ const requestBody = await server.calls[server.calls.length - 1].requestBody;
+ expect(requestBody).toStrictEqual({
+ model: 'gpt-image-1',
+ prompt,
+ n: 1,
+ size: '1024x1024',
+ });
+
+ expect(requestBody).not.toHaveProperty('response_format');
+ });
+
+ it('should include response_format for dall-e-3', async () => {
+ prepareJsonResponse();
+
+ await model.doGenerate({
+ prompt,
+ n: 1,
+ size: '1024x1024',
+ aspectRatio: undefined,
+ seed: undefined,
+ providerOptions: {},
+ });
+
+ const requestBody = await server.calls[server.calls.length - 1].requestBody;
+ expect(requestBody).toHaveProperty('response_format', 'b64_json');
+ });
});
diff --git a/packages/openai/src/openai-image-model.ts b/packages/openai/src/openai-image-model.ts
index 7696e9c01ff2..df2afcb16f5d 100644
--- a/packages/openai/src/openai-image-model.ts
+++ b/packages/openai/src/openai-image-model.ts
@@ -11,6 +11,7 @@ import {
OpenAIImageModelId,
OpenAIImageSettings,
modelMaxImagesPerCall,
+ hasDefaultResponseFormat,
} from './openai-image-settings';
interface OpenAIImageModelConfig extends OpenAIConfig {
@@ -78,7 +79,9 @@ export class OpenAIImageModel implements ImageModelV1 {
n,
size,
...(providerOptions.openai ?? {}),
- response_format: 'b64_json',
+ ...(!hasDefaultResponseFormat.has(this.modelId)
+ ? { response_format: 'b64_json' }
+ : {}),
},
failedResponseHandler: openaiFailedResponseHandler,
successfulResponseHandler: createJsonResponseHandler(
diff --git a/packages/openai/src/openai-image-settings.ts b/packages/openai/src/openai-image-settings.ts
index cc0f212a133e..19fa72632783 100644
--- a/packages/openai/src/openai-image-settings.ts
+++ b/packages/openai/src/openai-image-settings.ts
@@ -1,11 +1,18 @@
-export type OpenAIImageModelId = 'dall-e-3' | 'dall-e-2' | (string & {});
+export type OpenAIImageModelId =
+ | 'gpt-image-1'
+ | 'dall-e-3'
+ | 'dall-e-2'
+ | (string & {});
// https://platform.openai.com/docs/guides/images
export const modelMaxImagesPerCall: Record = {
'dall-e-3': 1,
'dall-e-2': 10,
+ 'gpt-image-1': 10,
};
+export const hasDefaultResponseFormat = new Set(['gpt-image-1']);
+
export interface OpenAIImageSettings {
/**
Override the maximum number of images per call (default is dependent on the
diff --git a/packages/openai/src/openai-provider.ts b/packages/openai/src/openai-provider.ts
index 8b403073d94a..84dc64bf2513 100644
--- a/packages/openai/src/openai-provider.ts
+++ b/packages/openai/src/openai-provider.ts
@@ -1,8 +1,10 @@
import {
EmbeddingModelV1,
ImageModelV1,
+ TranscriptionModelV1,
LanguageModelV1,
ProviderV1,
+ SpeechModelV1,
} from '@ai-sdk/provider';
import {
FetchFunction,
@@ -26,9 +28,13 @@ import {
OpenAIImageModelId,
OpenAIImageSettings,
} from './openai-image-settings';
+import { OpenAITranscriptionModel } from './openai-transcription-model';
+import { OpenAITranscriptionModelId } from './openai-transcription-settings';
import { OpenAIResponsesLanguageModel } from './responses/openai-responses-language-model';
import { OpenAIResponsesModelId } from './responses/openai-responses-settings';
import { openaiTools } from './openai-tools';
+import { OpenAISpeechModel } from './openai-speech-model';
+import { OpenAISpeechModelId } from './openai-speech-settings';
export interface OpenAIProvider extends ProviderV1 {
(
@@ -112,6 +118,16 @@ Creates a model for image generation.
settings?: OpenAIImageSettings,
): ImageModelV1;
+ /**
+Creates a model for transcription.
+ */
+ transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
+
+ /**
+Creates a model for speech generation.
+ */
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
+
/**
OpenAI-specific tools.
*/
@@ -234,6 +250,22 @@ export function createOpenAI(
fetch: options.fetch,
});
+ const createTranscriptionModel = (modelId: OpenAITranscriptionModelId) =>
+ new OpenAITranscriptionModel(modelId, {
+ provider: `${providerName}.transcription`,
+ url: ({ path }) => `${baseURL}${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
+ const createSpeechModel = (modelId: OpenAISpeechModelId) =>
+ new OpenAISpeechModel(modelId, {
+ provider: `${providerName}.speech`,
+ url: ({ path }) => `${baseURL}${path}`,
+ headers: getHeaders,
+ fetch: options.fetch,
+ });
+
const createLanguageModel = (
modelId: OpenAIChatModelId | OpenAICompletionModelId,
settings?: OpenAIChatSettings | OpenAICompletionSettings,
@@ -281,6 +313,12 @@ export function createOpenAI(
provider.image = createImageModel;
provider.imageModel = createImageModel;
+ provider.transcription = createTranscriptionModel;
+ provider.transcriptionModel = createTranscriptionModel;
+
+ provider.speech = createSpeechModel;
+ provider.speechModel = createSpeechModel;
+
provider.tools = openaiTools;
return provider as OpenAIProvider;
diff --git a/packages/openai/src/openai-speech-model.test.ts b/packages/openai/src/openai-speech-model.test.ts
new file mode 100644
index 000000000000..69ebe5304056
--- /dev/null
+++ b/packages/openai/src/openai-speech-model.test.ts
@@ -0,0 +1,193 @@
+import { createTestServer } from '@ai-sdk/provider-utils/test';
+import { OpenAISpeechModel } from './openai-speech-model';
+import { createOpenAI } from './openai-provider';
+
+const provider = createOpenAI({ apiKey: 'test-api-key' });
+const model = provider.speech('tts-1');
+
+const server = createTestServer({
+ 'https://api.openai.com/v1/audio/speech': {},
+});
+
+describe('doGenerate', () => {
+ function prepareAudioResponse({
+ headers,
+ format = 'mp3',
+ }: {
+ headers?: Record;
+ format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
+ } = {}) {
+ const audioBuffer = new Uint8Array(100); // Mock audio data
+ server.urls['https://api.openai.com/v1/audio/speech'].response = {
+ type: 'binary',
+ headers: {
+ 'content-type': `audio/${format}`,
+ ...headers,
+ },
+ body: Buffer.from(audioBuffer),
+ };
+ return audioBuffer;
+ }
+
+ it('should pass the model and text', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ model: 'tts-1',
+ input: 'Hello from the AI SDK!',
+ });
+ });
+
+ it('should pass headers', async () => {
+ prepareAudioResponse();
+
+ const provider = createOpenAI({
+ apiKey: 'test-api-key',
+ organization: 'test-organization',
+ project: 'test-project',
+ headers: {
+ 'Custom-Provider-Header': 'provider-header-value',
+ },
+ });
+
+ await provider.speech('tts-1').doGenerate({
+ text: 'Hello from the AI SDK!',
+ headers: {
+ 'Custom-Request-Header': 'request-header-value',
+ },
+ });
+
+ expect(server.calls[0].requestHeaders).toMatchObject({
+ authorization: 'Bearer test-api-key',
+ 'content-type': 'application/json',
+ 'custom-provider-header': 'provider-header-value',
+ 'custom-request-header': 'request-header-value',
+ 'openai-organization': 'test-organization',
+ 'openai-project': 'test-project',
+ });
+ });
+
+ it('should pass options', async () => {
+ prepareAudioResponse();
+
+ await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ voice: 'nova',
+ outputFormat: 'opus',
+ speed: 1.5,
+ });
+
+ expect(await server.calls[0].requestBody).toMatchObject({
+ model: 'tts-1',
+ input: 'Hello from the AI SDK!',
+ voice: 'nova',
+ speed: 1.5,
+ response_format: 'opus',
+ });
+ });
+
+ it('should return audio data with correct content type', async () => {
+ const audio = new Uint8Array(100); // Mock audio data
+ prepareAudioResponse({
+ format: 'opus',
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ outputFormat: 'opus',
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ });
+
+ it('should include response data with timestamp, modelId and headers', async () => {
+ prepareAudioResponse({
+ headers: {
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+
+ const testDate = new Date(0);
+ const customModel = new OpenAISpeechModel('tts-1', {
+ provider: 'test-provider',
+ url: () => 'https://api.openai.com/v1/audio/speech',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response).toMatchObject({
+ timestamp: testDate,
+ modelId: 'tts-1',
+ headers: {
+ 'content-type': 'audio/mp3',
+ 'x-request-id': 'test-request-id',
+ 'x-ratelimit-remaining': '123',
+ },
+ });
+ });
+
+ it('should use real date when no custom date provider is specified', async () => {
+ prepareAudioResponse();
+
+ const testDate = new Date(0);
+ const customModel = new OpenAISpeechModel('tts-1', {
+ provider: 'test-provider',
+ url: () => 'https://api.openai.com/v1/audio/speech',
+ headers: () => ({}),
+ _internal: {
+ currentDate: () => testDate,
+ },
+ });
+
+ const result = await customModel.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
+ expect(result.response.modelId).toBe('tts-1');
+ });
+
+ it('should handle different audio formats', async () => {
+ const formats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] as const;
+
+ for (const format of formats) {
+ const audio = prepareAudioResponse({ format });
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ providerOptions: {
+ openai: {
+ response_format: format,
+ },
+ },
+ });
+
+ expect(result.audio).toStrictEqual(audio);
+ }
+ });
+
+ it('should include warnings if any are generated', async () => {
+ prepareAudioResponse();
+
+ const result = await model.doGenerate({
+ text: 'Hello from the AI SDK!',
+ });
+
+ expect(result.warnings).toEqual([]);
+ });
+});
diff --git a/packages/openai/src/openai-speech-model.ts b/packages/openai/src/openai-speech-model.ts
new file mode 100644
index 000000000000..522bd89b759e
--- /dev/null
+++ b/packages/openai/src/openai-speech-model.ts
@@ -0,0 +1,136 @@
+import { SpeechModelV1, SpeechModelV1CallWarning } from '@ai-sdk/provider';
+import {
+ combineHeaders,
+ createBinaryResponseHandler,
+ parseProviderOptions,
+ postJsonToApi,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+import { OpenAIConfig } from './openai-config';
+import { openaiFailedResponseHandler } from './openai-error';
+import { OpenAISpeechModelId } from './openai-speech-settings';
+import { OpenAISpeechAPITypes } from './openai-api-types';
+
+// https://platform.openai.com/docs/api-reference/audio/createSpeech
+const OpenAIProviderOptionsSchema = z.object({
+ instructions: z.string().nullish(),
+ speed: z.number().min(0.25).max(4.0).default(1.0).nullish(),
+});
+
+export type OpenAISpeechCallOptions = z.infer<
+ typeof OpenAIProviderOptionsSchema
+>;
+
+interface OpenAISpeechModelConfig extends OpenAIConfig {
+ _internal?: {
+ currentDate?: () => Date;
+ };
+}
+
+export class OpenAISpeechModel implements SpeechModelV1 {
+ readonly specificationVersion = 'v1';
+
+ get provider(): string {
+ return this.config.provider;
+ }
+
+ constructor(
+ readonly modelId: OpenAISpeechModelId,
+ private readonly config: OpenAISpeechModelConfig,
+ ) {}
+
+ private getArgs({
+ text,
+ voice = 'alloy',
+ outputFormat = 'mp3',
+ speed,
+ instructions,
+ providerOptions,
+ }: Parameters[0]) {
+ const warnings: SpeechModelV1CallWarning[] = [];
+
+ // Parse provider options
+ const openAIOptions = parseProviderOptions({
+ provider: 'openai',
+ providerOptions,
+ schema: OpenAIProviderOptionsSchema,
+ });
+
+ // Create request body
+ const requestBody: Record