diff --git a/packages/ai/__tests__/chat-session-helpers.test.ts b/packages/ai/__tests__/chat-session-helpers.test.ts index 8bc81f4eab..215dd6f4c8 100644 --- a/packages/ai/__tests__/chat-session-helpers.test.ts +++ b/packages/ai/__tests__/chat-session-helpers.test.ts @@ -139,6 +139,43 @@ describe('chat-session-helpers', () => { ], isValid: false, }, + { + history: [ + { role: 'user', parts: [{ text: 'hi' }] }, + { + role: 'model', + parts: [ + { text: 'hi' }, + { + text: 'thought about hi', + thought: true, + thoughtSignature: 'thought signature', + }, + ], + }, + ], + isValid: true, + }, + { + history: [ + { + role: 'user', + parts: [{ text: 'hi', thought: true, thoughtSignature: 'sig' }], + }, + { + role: 'model', + parts: [ + { text: 'hi' }, + { + text: 'thought about hi', + thought: true, + thoughtSignature: 'thought signature', + }, + ], + }, + ], + isValid: false, + }, ]; TCS.forEach(tc => { diff --git a/packages/ai/__tests__/response-helpers.test.ts b/packages/ai/__tests__/response-helpers.test.ts index cc0fddc658..23077b8ec1 100644 --- a/packages/ai/__tests__/response-helpers.test.ts +++ b/packages/ai/__tests__/response-helpers.test.ts @@ -15,9 +15,21 @@ * limitations under the License. */ import { describe, expect, it, jest, afterEach } from '@jest/globals'; -import { addHelpers, formatBlockErrorMessage } from '../lib/requests/response-helpers'; +import { + addHelpers, + formatBlockErrorMessage, + handlePredictResponse, +} from '../lib/requests/response-helpers'; -import { BlockReason, Content, FinishReason, GenerateContentResponse } from '../lib/types'; +import { + BlockReason, + Content, + FinishReason, + GenerateContentResponse, + ImagenInlineImage, + ImagenGCSImage, +} from '../lib/types'; +import { getMockResponse, BackendName } from './test-utils/mock-response'; const fakeResponseText: GenerateContentResponse = { candidates: [ @@ -31,6 +43,18 @@ const fakeResponseText: GenerateContentResponse = { ], }; +const fakeResponseThoughts: GenerateContentResponse = { + candidates: [ + { + index: 0, + content: { + role: 'model', + parts: [{ text: 'Some text' }, { text: 'and some thoughts', thought: true }], + }, + }, + ], +}; + const functionCallPart1 = { functionCall: { name: 'find_theaters', @@ -129,12 +153,14 @@ describe('response-helpers methods', () => { const enhancedResponse = addHelpers(fakeResponseText); expect(enhancedResponse.text()).toBe('Some text and some more text'); expect(enhancedResponse.functionCalls()).toBeUndefined(); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); }); it('good response functionCall', () => { const enhancedResponse = addHelpers(fakeResponseFunctionCall); expect(enhancedResponse.text()).toBe(''); expect(enhancedResponse.functionCalls()).toEqual([functionCallPart1.functionCall]); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); }); it('good response functionCalls', () => { @@ -144,29 +170,41 @@ describe('response-helpers methods', () => { functionCallPart1.functionCall, functionCallPart2.functionCall, ]); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); }); it('good response text/functionCall', () => { const enhancedResponse = addHelpers(fakeResponseMixed1); expect(enhancedResponse.functionCalls()).toEqual([functionCallPart2.functionCall]); expect(enhancedResponse.text()).toBe('some text'); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); }); it('good response functionCall/text', () => { const enhancedResponse = addHelpers(fakeResponseMixed2); expect(enhancedResponse.functionCalls()).toEqual([functionCallPart1.functionCall]); expect(enhancedResponse.text()).toBe('some text'); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); }); it('good response text/functionCall/text', () => { const enhancedResponse = addHelpers(fakeResponseMixed3); expect(enhancedResponse.functionCalls()).toEqual([functionCallPart1.functionCall]); expect(enhancedResponse.text()).toBe('some text and more text'); + expect(enhancedResponse.thoughtSummary()).toBeUndefined(); + }); + + it('good response text/thought', () => { + const enhancedResponse = addHelpers(fakeResponseThoughts); + expect(enhancedResponse.text()).toBe('Some text'); + expect(enhancedResponse.thoughtSummary()).toBe('and some thoughts'); + expect(enhancedResponse.functionCalls()).toBeUndefined(); }); it('bad response safety', () => { const enhancedResponse = addHelpers(badFakeResponse); expect(() => enhancedResponse.text()).toThrow('SAFETY'); + expect(() => enhancedResponse.thoughtSummary()).toThrow('SAFETY'); }); }); @@ -233,4 +271,80 @@ describe('response-helpers methods', () => { expect(message).toContain('Candidate was blocked due to SAFETY: unsafe candidate'); }); }); + + describe('handlePredictResponse', () => { + it('returns base64 images', async () => { + const mockResponse = getMockResponse( + BackendName.VertexAI, + 'unary-success-generate-images-base64.json', + ) as Response; + const res = await handlePredictResponse(mockResponse); + expect(res.filteredReason).toBeUndefined(); + expect(res.images.length).toBe(4); + res.images.forEach(image => { + expect(image.mimeType).toBe('image/png'); + expect(image.bytesBase64Encoded.length).toBeGreaterThan(0); + }); + }); + + it('returns GCS images', async () => { + const mockResponse = getMockResponse( + BackendName.VertexAI, + 'unary-success-generate-images-gcs.json', + ) as Response; + const res = await handlePredictResponse(mockResponse); + expect(res.filteredReason).toBeUndefined(); + expect(res.images.length).toBe(4); + res.images.forEach((image, i) => { + expect(image.mimeType).toBe('image/jpeg'); + expect(image.gcsURI).toBe( + `gs://test-project-id-1234.firebasestorage.app/images/1234567890123/sample_${i}.jpg`, + ); + }); + }); + + it('has filtered reason and no images if all images were filtered', async () => { + const mockResponse = getMockResponse( + BackendName.VertexAI, + 'unary-failure-generate-images-all-filtered.json', + ) as Response; + const res = await handlePredictResponse(mockResponse); + expect(res.filteredReason).toBe( + "Unable to show generated images. All images were filtered out because they violated Vertex AI's usage guidelines. You will not be charged for blocked images. Try rephrasing the prompt. If you think this was an error, send feedback. Support codes: 39322892, 29310472", + ); + expect(res.images.length).toBe(0); + }); + + it('has filtered reason and no images if all base64 images were filtered', async () => { + const mockResponse = getMockResponse( + BackendName.VertexAI, + 'unary-failure-generate-images-base64-some-filtered.json', + ) as Response; + const res = await handlePredictResponse(mockResponse); + expect(res.filteredReason).toBe( + 'Your current safety filter threshold filtered out 2 generated images. You will not be charged for blocked images. Try rephrasing the prompt. If you think this was an error, send feedback.', + ); + expect(res.images.length).toBe(2); + res.images.forEach(image => { + expect(image.mimeType).toBe('image/png'); + expect(image.bytesBase64Encoded.length).toBeGreaterThan(0); + }); + }); + + it('has filtered reason and no images if all GCS images were filtered', async () => { + const mockResponse = getMockResponse( + BackendName.VertexAI, + 'unary-failure-generate-images-gcs-some-filtered.json', + ) as Response; + const res = await handlePredictResponse(mockResponse); + expect(res.filteredReason).toBe( + 'Your current safety filter threshold filtered out 2 generated images. You will not be charged for blocked images. Try rephrasing the prompt. If you think this was an error, send feedback.', + ); + expect(res.images.length).toBe(2); + res.images.forEach(image => { + expect(image.mimeType).toBe('image/jpeg'); + expect(image.gcsURI.length).toBeGreaterThan(0); + }); + }); + }); }); diff --git a/packages/ai/lib/constants.ts b/packages/ai/lib/constants.ts index a0cffa49ad..24c14ff58f 100644 --- a/packages/ai/lib/constants.ts +++ b/packages/ai/lib/constants.ts @@ -21,15 +21,12 @@ export const AI_TYPE = 'AI'; export const DEFAULT_LOCATION = 'us-central1'; -export const DEFAULT_BASE_URL = 'https://firebasevertexai.googleapis.com'; +export const DEFAULT_DOMAIN = 'firebasevertexai.googleapis.com'; -// This is the default API version for the VertexAI API. At some point, should be able to change when the feature becomes available. -// `v1beta` & `stable` available: https://cloud.google.com/vertex-ai/docs/reference#versions export const DEFAULT_API_VERSION = 'v1beta'; export const PACKAGE_VERSION = version; export const LANGUAGE_TAG = 'gl-rn'; -// Timeout is 180s by default export const DEFAULT_FETCH_TIMEOUT_MS = 180 * 1000; diff --git a/packages/ai/lib/index.ts b/packages/ai/lib/index.ts index 61e2c50e84..7c8c476de4 100644 --- a/packages/ai/lib/index.ts +++ b/packages/ai/lib/index.ts @@ -17,7 +17,7 @@ import './polyfills'; import { getApp, ReactNativeFirebase } from '@react-native-firebase/app'; -import { GoogleAIBackend, VertexAIBackend } from './backend'; +import { Backend, GoogleAIBackend, VertexAIBackend } from './backend'; import { AIErrorCode, ModelParams, RequestOptions } from './types'; import { AI, AIOptions, ImagenModelParams } from './public-types'; import { AIError } from './errors'; @@ -27,8 +27,9 @@ import { AIModel, ImagenModel } from './models'; export * from './public-types'; export { ChatSession } from './methods/chat-session'; export * from './requests/schema-builder'; -export { GoogleAIBackend, VertexAIBackend } from './backend'; -export { GenerativeModel, AIError, AIModel }; +export { ImagenImageFormat } from './requests/imagen-image-format'; +export { Backend, GoogleAIBackend, VertexAIBackend } from './backend'; +export { GenerativeModel, AIError, AIModel, ImagenModel }; /** * Returns the default {@link AI} instance that is associated with the provided @@ -58,16 +59,22 @@ export { GenerativeModel, AIError, AIModel }; * * @public */ -export function getAI( - app: ReactNativeFirebase.FirebaseApp = getApp(), - options: AIOptions = { backend: new GoogleAIBackend() }, -): AI { +export function getAI(app: ReactNativeFirebase.FirebaseApp = getApp(), options?: AIOptions): AI { + const backend: Backend = options?.backend ?? new GoogleAIBackend(); + + const finalOptions: Omit = { + useLimitedUseAppCheckTokens: options?.useLimitedUseAppCheckTokens ?? false, + appCheck: options?.appCheck || null, + auth: options?.auth || null, + }; + return { app, - backend: options.backend, - location: (options.backend as VertexAIBackend)?.location || '', - appCheck: options.appCheck || null, - auth: options.auth || null, + backend, + options: finalOptions, + location: (backend as VertexAIBackend)?.location || '', + appCheck: options?.appCheck || null, + auth: options?.auth || null, } as AI; } diff --git a/packages/ai/lib/methods/chat-session-helpers.ts b/packages/ai/lib/methods/chat-session-helpers.ts index ea8cd826b9..0bf988ae63 100644 --- a/packages/ai/lib/methods/chat-session-helpers.ts +++ b/packages/ai/lib/methods/chat-session-helpers.ts @@ -25,12 +25,14 @@ const VALID_PART_FIELDS: Array = [ 'inlineData', 'functionCall', 'functionResponse', + 'thought', + 'thoughtSignature', ]; const VALID_PARTS_PER_ROLE: { [key in Role]: Array } = { user: ['text', 'inlineData'], function: ['functionResponse'], - model: ['text', 'functionCall'], + model: ['text', 'functionCall', 'thought', 'thoughtSignature'], // System instructions shouldn't be in history anyway. system: ['text'], }; @@ -78,6 +80,8 @@ export function validateChatHistory(history: Content[]): void { inlineData: 0, functionCall: 0, functionResponse: 0, + thought: 0, + thoughtSignature: 0, }; for (const part of parts) { diff --git a/packages/ai/lib/public-types.ts b/packages/ai/lib/public-types.ts index b64832df37..e17cb281db 100644 --- a/packages/ai/lib/public-types.ts +++ b/packages/ai/lib/public-types.ts @@ -18,8 +18,10 @@ import { ReactNativeFirebase } from '@react-native-firebase/app'; import { FirebaseAuthTypes } from '@react-native-firebase/auth'; import { FirebaseAppCheckTypes } from '@react-native-firebase/app-check'; +import { Backend } from './backend'; export * from './types'; +export { Backend }; /** * Options for initializing the AI service using {@link getAI | getAI()}. @@ -31,33 +33,15 @@ export * from './types'; export interface AIOptions { /** * The backend configuration to use for the AI service instance. + * Defaults to the Gemini Developer API backend ({@link GoogleAIBackend}). */ - backend: Backend; - appCheck?: FirebaseAppCheckTypes.Module | null; - auth?: FirebaseAuthTypes.Module | null; -} - -/** - * Abstract base class representing the configuration for an AI service backend. - * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for - * the Gemini Developer API (via {@link https://ai.google/ | Google AI}), and - * {@link VertexAIBackend} for the Vertex AI Gemini API. - * - * @public - */ -export abstract class Backend { - /** - * Specifies the backend type. - */ - readonly backendType: BackendType; - + backend?: Backend; /** - * Protected constructor for use by subclasses. - * @param type - The backend type. + * Whether to use App Check limited use tokens. Defaults to false. */ - protected constructor(type: BackendType) { - this.backendType = type; - } + useLimitedUseAppCheckTokens?: boolean; + appCheck?: FirebaseAppCheckTypes.Module | null; + auth?: FirebaseAuthTypes.Module | null; } /** @@ -94,20 +78,6 @@ export const BackendType = { */ export type BackendType = (typeof BackendType)[keyof typeof BackendType]; -/** - * Options for initializing the AI service using {@link getAI | getAI()}. - * This allows specifying which backend to use (Vertex AI Gemini API or Gemini Developer API) - * and configuring its specific options (like location for Vertex AI). - * - * @public - */ -export interface AIOptions { - /** - * The backend configuration to use for the AI service instance. - */ - backend: Backend; -} - /** * An instance of the Firebase AI SDK. * @@ -128,6 +98,10 @@ export interface AI { * Vertex AI Gemini API (using {@link VertexAIBackend}). */ backend: Backend; + /** + * Options applied to this {@link AI} instance. + */ + options?: AIOptions; /** * @deprecated use `AI.backend.location` instead. * diff --git a/packages/ai/lib/requests/request.ts b/packages/ai/lib/requests/request.ts index 5c5a6d7160..0b1c668ece 100644 --- a/packages/ai/lib/requests/request.ts +++ b/packages/ai/lib/requests/request.ts @@ -20,7 +20,7 @@ import { AIError } from '../errors'; import { ApiSettings } from '../types/internal'; import { DEFAULT_API_VERSION, - DEFAULT_BASE_URL, + DEFAULT_DOMAIN, DEFAULT_FETCH_TIMEOUT_MS, LANGUAGE_TAG, PACKAGE_VERSION, @@ -75,7 +75,7 @@ export class RequestUrl { } private get baseUrl(): string { - return this.requestOptions?.baseUrl || DEFAULT_BASE_URL; + return this.requestOptions?.baseUrl || `https://${DEFAULT_DOMAIN}`; } private get apiVersion(): string { diff --git a/packages/ai/lib/requests/response-helpers.ts b/packages/ai/lib/requests/response-helpers.ts index 670a671b71..c2667477d0 100644 --- a/packages/ai/lib/requests/response-helpers.ts +++ b/packages/ai/lib/requests/response-helpers.ts @@ -23,6 +23,7 @@ import { GenerateContentResponse, AIErrorCode, InlineDataPart, + Part, ImagenInlineImage, ImagenGCSImage, } from '../types'; @@ -30,6 +31,36 @@ import { AIError } from '../errors'; import { logger } from '../logger'; import { ImagenResponseInternal } from '../types/imagen/internal'; +/** + * Check that at least one candidate exists and does not have a bad + * finish reason. Warns if multiple candidates exist. + */ +function hasValidCandidates(response: GenerateContentResponse): boolean { + if (response.candidates && response.candidates.length > 0) { + if (response.candidates.length > 1) { + logger.warn( + `This response had ${response.candidates.length} ` + + `candidates. Returning text from the first candidate only. ` + + `Access response.candidates directly to use the other candidates.`, + ); + } + if (hadBadFinishReason(response.candidates[0]!)) { + throw new AIError( + AIErrorCode.RESPONSE_ERROR, + `Response error: ${formatBlockErrorMessage( + response, + )}. Response body stored in error.response`, + { + response, + }, + ); + } + return true; + } else { + return false; + } +} + /** * Creates an EnhancedGenerateContentResponse object that has helper functions and * other modifications that improve usability. @@ -57,26 +88,8 @@ export function createEnhancedContentResponse( */ export function addHelpers(response: GenerateContentResponse): EnhancedGenerateContentResponse { (response as EnhancedGenerateContentResponse).text = () => { - if (response.candidates && response.candidates.length > 0) { - if (response.candidates.length > 1) { - logger.warn( - `This response had ${response.candidates.length} ` + - `candidates. Returning text from the first candidate only. ` + - `Access response.candidates directly to use the other candidates.`, - ); - } - if (hadBadFinishReason(response.candidates[0]!)) { - throw new AIError( - AIErrorCode.RESPONSE_ERROR, - `Response error: ${formatBlockErrorMessage( - response, - )}. Response body stored in error.response`, - { - response, - }, - ); - } - return getText(response); + if (hasValidCandidates(response)) { + return getText(response, part => !part.thought); } else if (response.promptFeedback) { throw new AIError( AIErrorCode.RESPONSE_ERROR, @@ -88,28 +101,25 @@ export function addHelpers(response: GenerateContentResponse): EnhancedGenerateC } return ''; }; + (response as EnhancedGenerateContentResponse).thoughtSummary = () => { + if (hasValidCandidates(response)) { + const result = getText(response, part => !!part.thought); + return result === '' ? undefined : result; + } else if (response.promptFeedback) { + throw new AIError( + AIErrorCode.RESPONSE_ERROR, + `Thought summary not available. ${formatBlockErrorMessage(response)}`, + { + response, + }, + ); + } + return undefined; + }; (response as EnhancedGenerateContentResponse).inlineDataParts = (): | InlineDataPart[] | undefined => { - if (response.candidates && response.candidates.length > 0) { - if (response.candidates.length > 1) { - logger.warn( - `This response had ${response.candidates.length} ` + - `candidates. Returning data from the first candidate only. ` + - `Access response.candidates directly to use the other candidates.`, - ); - } - if (hadBadFinishReason(response.candidates[0]!)) { - throw new AIError( - AIErrorCode.RESPONSE_ERROR, - `Response error: ${formatBlockErrorMessage( - response, - )}. Response body stored in error.response`, - { - response, - }, - ); - } + if (hasValidCandidates(response)) { return getInlineDataParts(response); } else if (response.promptFeedback) { throw new AIError( @@ -123,25 +133,7 @@ export function addHelpers(response: GenerateContentResponse): EnhancedGenerateC return undefined; }; (response as EnhancedGenerateContentResponse).functionCalls = () => { - if (response.candidates && response.candidates.length > 0) { - if (response.candidates.length > 1) { - logger.warn( - `This response had ${response.candidates.length} ` + - `candidates. Returning function calls from the first candidate only. ` + - `Access response.candidates directly to use the other candidates.`, - ); - } - if (hadBadFinishReason(response.candidates[0]!)) { - throw new AIError( - AIErrorCode.RESPONSE_ERROR, - `Response error: ${formatBlockErrorMessage( - response, - )}. Response body stored in error.response`, - { - response, - }, - ); - } + if (hasValidCandidates(response)) { return getFunctionCalls(response); } else if (response.promptFeedback) { throw new AIError( @@ -158,13 +150,20 @@ export function addHelpers(response: GenerateContentResponse): EnhancedGenerateC } /** - * Returns all text found in all parts of first candidate. + * Returns all text from the first candidate's parts, filtering by whether + * `partFilter()` returns true. + * + * @param response - The `GenerateContentResponse` from which to extract text. + * @param partFilter - Only return `Part`s for which this returns true */ -export function getText(response: GenerateContentResponse): string { +export function getText( + response: GenerateContentResponse, + partFilter: (part: Part) => boolean, +): string { const textStrings = []; if (response.candidates?.[0]?.content?.parts) { for (const part of response.candidates?.[0]?.content?.parts) { - if (part.text) { + if (part.text && partFilter(part)) { textStrings.push(part.text); } } @@ -177,12 +176,12 @@ export function getText(response: GenerateContentResponse): string { } /** - * Returns {@link FunctionCall}s associated with first candidate. + * Returns every {@link FunctionCall} associated with first candidate. */ export function getFunctionCalls(response: GenerateContentResponse): FunctionCall[] | undefined { const functionCalls: FunctionCall[] = []; if (response.candidates?.[0]?.content?.parts) { - for (const part of response.candidates?.[0].content?.parts) { + for (const part of response.candidates?.[0]?.content?.parts) { if (part.functionCall) { functionCalls.push(part.functionCall); } @@ -196,7 +195,7 @@ export function getFunctionCalls(response: GenerateContentResponse): FunctionCal } /** - * Returns {@link InlineDataPart}s in the first candidate if present. + * Returns every {@link InlineDataPart} in the first candidate if present. * * @internal */ @@ -223,7 +222,9 @@ export function getInlineDataParts( const badFinishReasons = [FinishReason.RECITATION, FinishReason.SAFETY]; function hadBadFinishReason(candidate: GenerateContentCandidate): boolean { - return !!candidate.finishReason && badFinishReasons.includes(candidate.finishReason); + return ( + !!candidate.finishReason && badFinishReasons.some(reason => reason === candidate.finishReason) + ); } export function formatBlockErrorMessage(response: GenerateContentResponse): string { @@ -267,7 +268,7 @@ export async function handlePredictResponse{@link FunctionCall}. + * Content part interface if the part represents a {@link FunctionCall}. * @public */ export interface FunctionCallPart { @@ -90,10 +100,15 @@ export interface FunctionCallPart { inlineData?: never; functionCall: FunctionCall; functionResponse?: never; + thought?: boolean; + /** + * @internal + */ + thoughtSignature?: never; } /** - * Content part interface if the part represents {@link FunctionResponse}. + * Content part interface if the part represents {@link FunctionResponse}. * @public */ export interface FunctionResponsePart { @@ -101,10 +116,15 @@ export interface FunctionResponsePart { inlineData?: never; functionCall?: never; functionResponse: FunctionResponse; + thought?: boolean; + /** + * @internal + */ + thoughtSignature?: never; } /** - * Content part interface if the part represents {@link FileData} + * Content part interface if the part represents {@link FileData} * @public */ export interface FileDataPart { @@ -113,29 +133,51 @@ export interface FileDataPart { functionCall?: never; functionResponse?: never; fileData: FileData; + thought?: boolean; + /** + * @internal + */ + thoughtSignature?: never; } /** - * A predicted {@link FunctionCall} returned from the model + * A predicted {@link FunctionCall} returned from the model * that contains a string representing the {@link FunctionDeclaration.name} * and a structured JSON object containing the parameters and their values. * @public */ export interface FunctionCall { + /** + * The id of the function call. This must be sent back in the associated {@link FunctionResponse}. + * + * + * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}). + * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be + * `undefined`. + */ + id?: string; name: string; args: object; } /** - * The result output from a {@link FunctionCall} that contains a string + * The result output from a {@link FunctionCall} that contains a string * representing the {@link FunctionDeclaration.name} * and a structured JSON object containing any output * from the function is used as context to the model. - * This should contain the result of a {@link FunctionCall} + * This should contain the result of a {@link FunctionCall} * made based on model prediction. * @public */ export interface FunctionResponse { + /** + * The id of the {@link FunctionCall}. + * + * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}). + * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be + * `undefined`. + */ + id?: string; name: string; response: object; } diff --git a/packages/ai/lib/types/enums.ts b/packages/ai/lib/types/enums.ts index c2c5f909a6..73aaef699c 100644 --- a/packages/ai/lib/types/enums.ts +++ b/packages/ai/lib/types/enums.ts @@ -273,6 +273,11 @@ export const ResponseModality = { * @beta */ IMAGE: 'IMAGE', + /** + * Audio. + * @beta + */ + AUDIO: 'AUDIO', } as const; /** @@ -281,3 +286,21 @@ export const ResponseModality = { * @beta */ export type ResponseModality = (typeof ResponseModality)[keyof typeof ResponseModality]; + +/** + * (EXPERIMENTAL) + * Determines whether inference happens on-device or in-cloud. + * @public + */ +export const InferenceMode = { + PREFER_ON_DEVICE: 'prefer_on_device', + ONLY_ON_DEVICE: 'only_on_device', + ONLY_IN_CLOUD: 'only_in_cloud', +} as const; + +/** + * (EXPERIMENTAL) + * Determines whether inference happens on-device or in-cloud. + * @public + */ +export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode]; diff --git a/packages/ai/lib/types/imagen/internal.ts b/packages/ai/lib/types/imagen/internal.ts index 8e72402c12..e5fa91012d 100644 --- a/packages/ai/lib/types/imagen/internal.ts +++ b/packages/ai/lib/types/imagen/internal.ts @@ -61,6 +61,10 @@ export interface ImagenResponseInternal { * The reason why the image was filtered. */ raiFilteredReason?: string; + /** + * Safety attributes for the prediction. + */ + safetyAttributes?: unknown; }>; } diff --git a/packages/ai/lib/types/requests.ts b/packages/ai/lib/types/requests.ts index 53b35b1196..b6047d186a 100644 --- a/packages/ai/lib/types/requests.ts +++ b/packages/ai/lib/types/requests.ts @@ -113,6 +113,10 @@ export interface GenerationConfig { * @beta */ responseModalities?: ResponseModality[]; + /** + * Configuration for "thinking" behavior of compatible Gemini models. + */ + thinkingConfig?: ThinkingConfig; } /** @@ -165,7 +169,7 @@ export interface RequestOptions { * Defines a tool that model can call to access external knowledge. * @public */ -export declare type Tool = FunctionDeclarationsTool; +export type Tool = FunctionDeclarationsTool | GoogleSearchTool; /** * Structured representation of a function declaration as defined by the @@ -176,7 +180,7 @@ export declare type Tool = FunctionDeclarationsTool; * as a Tool by the model and executed by the client. * @public */ -export declare interface FunctionDeclaration { +export interface FunctionDeclaration { /** * The name of the function to call. Must start with a letter or an * underscore. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with @@ -196,13 +200,49 @@ export declare interface FunctionDeclaration { parameters?: ObjectSchemaInterface; } +/** + * A tool that allows a Gemini model to connect to Google Search to access and incorporate + * up-to-date information from the web into its responses. + * + * Important: If using Grounding with Google Search, you are required to comply with the + * "Grounding with Google Search" usage requirements for your chosen API provider: {@link https://ai.google.dev/gemini-api/terms#grounding-with-google-search | Gemini Developer API} + * or Vertex AI Gemini API (see {@link https://cloud.google.com/terms/service-terms | Service Terms} + * section within the Service Specific Terms). + * + * @public + */ +export interface GoogleSearchTool { + /** + * Specifies the Google Search configuration. + * Currently, this is an empty object, but it's reserved for future configuration options. + * Specifies the Google Search configuration. Currently, this is an empty object, but it's + * reserved for future configuration options. + * + * When using this feature, you are required to comply with the "Grounding with Google Search" + * usage requirements for your chosen API provider: {@link https://ai.google.dev/gemini-api/terms#grounding-with-google-search | Gemini Developer API} + * or Vertex AI Gemini API (see {@link https://cloud.google.com/terms/service-terms | Service Terms} + * section within the Service Specific Terms). + */ + googleSearch: GoogleSearch; +} + +/** + * Specifies the Google Search configuration. + * + * @remarks Currently, this is an empty object, but it's reserved for future configuration options. + * + * @public + */ +// eslint-disable-next-line @typescript-eslint/no-empty-object-type +export interface GoogleSearch {} + /** * A `FunctionDeclarationsTool` is a piece of code that enables the system to * interact with external systems to perform an action, or set of actions, * outside of knowledge and scope of the model. * @public */ -export declare interface FunctionDeclarationsTool { +export interface FunctionDeclarationsTool { /** * Optional. One or more function declarations * to be passed to the model along with the current user query. Model may @@ -231,3 +271,76 @@ export interface FunctionCallingConfig { mode?: FunctionCallingMode; allowedFunctionNames?: string[]; } + +/** + * Configuration for "thinking" behavior of compatible Gemini models. + * + * Certain models utilize a thinking process before generating a response. This allows them to + * reason through complex problems and plan a more coherent and accurate answer. + * + * @public + */ +export interface ThinkingConfig { + /** + * The thinking budget, in tokens. + * + * This parameter sets an upper limit on the number of tokens the model can use for its internal + * "thinking" process. A higher budget may result in higher quality responses for complex tasks + * but can also increase latency and cost. + * + * If you don't specify a budget, the model will determine the appropriate amount + * of thinking based on the complexity of the prompt. + * + * An error will be thrown if you set a thinking budget for a model that does not support this + * feature or if the specified budget is not within the model's supported range. + */ + thinkingBudget?: number; + + /** + * Whether to include "thought summaries" in the model's response. + * + * @remarks + * Thought summaries provide a brief overview of the model's internal thinking process, + * offering insight into how it arrived at the final answer. This can be useful for + * debugging, understanding the model's reasoning, and verifying its accuracy. + */ + includeThoughts?: boolean; +} + +/** + * Configuration for a pre-built voice. + * + * @beta + */ +export interface PrebuiltVoiceConfig { + /** + * The voice name to use for speech synthesis. + * + * For a full list of names and demos of what each voice sounds like, see {@link https://cloud.google.com/text-to-speech/docs/chirp3-hd | Chirp 3: HD Voices}. + */ + voiceName?: string; +} + +/** + * Configuration for the voice to used in speech synthesis. + * + * @beta + */ +export interface VoiceConfig { + /** + * Configures the voice using a pre-built voice configuration. + */ + prebuiltVoiceConfig?: PrebuiltVoiceConfig; +} + +/** + * Configures speech synthesis. + * + * @beta + */ +export interface SpeechConfig { + /** + * Configures the voice to be used in speech synthesis. + */ + voiceConfig?: VoiceConfig; +} diff --git a/packages/ai/lib/types/responses.ts b/packages/ai/lib/types/responses.ts index 04550f85f2..243d220f6e 100644 --- a/packages/ai/lib/types/responses.ts +++ b/packages/ai/lib/types/responses.ts @@ -67,7 +67,28 @@ export interface EnhancedGenerateContentResponse extends GenerateContentResponse * @throws If the prompt or candidate was blocked. */ inlineDataParts: () => InlineDataPart[] | undefined; + /** + * Aggregates and returns every {@link FunctionCall} from the first candidate of + * {@link GenerateContentResponse}. + * + * @throws If the prompt or candidate was blocked. + */ functionCalls: () => FunctionCall[] | undefined; + /** + * Aggregates and returns every {@link TextPart} with their `thought` property set + * to `true` from the first candidate of {@link GenerateContentResponse}. + * + * @throws If the prompt or candidate was blocked. + * + * @remarks + * Thought summaries provide a brief overview of the model's internal thinking process, + * offering insight into how it arrived at the final answer. This can be useful for + * debugging, understanding the model's reasoning, and verifying its accuracy. + * + * Thoughts will only be included if {@link ThinkingConfig.includeThoughts} is + * set to `true`. + */ + thoughtSummary: () => string | undefined; } /** @@ -91,6 +112,10 @@ export interface GenerateContentResponse { export interface UsageMetadata { promptTokenCount: number; candidatesTokenCount: number; + /** + * The number of tokens used by the model's internal "thinking" process. + */ + thoughtsTokenCount?: number; totalTokenCount: number; promptTokensDetails?: ModalityTokenCount[]; candidatesTokensDetails?: ModalityTokenCount[]; @@ -170,16 +195,98 @@ export interface Citation { } /** - * Metadata returned to client when grounding is enabled. + * Metadata returned when grounding is enabled. + * + * Currently, only Grounding with Google Search is supported (see {@link GoogleSearchTool}). + * + * Important: If using Grounding with Google Search, you are required to comply with the + * "Grounding with Google Search" usage requirements for your chosen API provider: {@link https://ai.google.dev/gemini-api/terms#grounding-with-google-search | Gemini Developer API} + * or Vertex AI Gemini API (see {@link https://cloud.google.com/terms/service-terms | Service Terms} + * section within the Service Specific Terms). + * * @public */ export interface GroundingMetadata { + /** + * A list of {@link GroundingChunk} objects. Each chunk represents a piece of retrieved content + * (for example, from a web page). that the model used to ground its response. + */ + groundingChunks?: GroundingChunk[]; + /** + * A list of {@link GroundingSupport} objects. Each object details how specific segments of the + * model's response are supported by the `groundingChunks`. + */ + groundingSupports?: GroundingSupport[]; + /** + * A list of web search queries that the model performed to gather the grounding information. + * These can be used to allow users to explore the search results themselves. + */ webSearchQueries?: string[]; + /** + * @deprecated Use {@link GroundingSupport} instead. + */ retrievalQueries?: string[]; +} + +/** + * Represents a chunk of retrieved data that supports a claim in the model's response. This is part + * of the grounding information provided when grounding is enabled. + * + * @public + */ +export interface GroundingChunk { /** - * @deprecated + * Contains details if the grounding chunk is from a web source. */ - groundingAttributions: GroundingAttribution[]; + web?: WebGroundingChunk; +} + +/** + * A grounding chunk from the web. + * + * Important: If using Grounding with Google Search, you are required to comply with the + * {@link https://cloud.google.com/terms/service-terms | Service Specific Terms} for "Grounding with Google Search". + * + * @public + */ +export interface WebGroundingChunk { + /** + * The URI of the retrieved web page. + */ + uri?: string; + /** + * The title of the retrieved web page. + */ + title?: string; + /** + * The domain of the original URI from which the content was retrieved. + * + * This property is only supported in the Vertex AI Gemini API ({@link VertexAIBackend}). + * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be + * `undefined`. + */ + domain?: string; +} + +/** + * Provides information about how a specific segment of the model's response is supported by the + * retrieved grounding chunks. + * + * @public + */ +export interface GroundingSupport { + /** + * Specifies the segment of the model's response content that this grounding support pertains to. + */ + segment?: Segment; + /** + * A list of indices that refer to specific {@link GroundingChunk} objects within the + * {@link GroundingMetadata.groundingChunks} array. These referenced chunks + * are the sources that support the claim made in the associated `segment` of the response. + * For example, an array `[1, 3, 4]` means that `groundingChunks[1]`, `groundingChunks[3]`, + * and `groundingChunks[4]` are the retrieved content supporting this part of the response. + */ + groundingChunkIndices?: number[]; } /** @@ -194,12 +301,34 @@ export interface GroundingAttribution { } /** + * Represents a specific segment within a {@link Content} object, often used to + * pinpoint the exact location of text or data that grounding information refers to. + * * @public */ export interface Segment { + /** + * The zero-based index of the {@link Part} object within the `parts` array + * of its parent {@link Content} object. This identifies which part of the + * content the segment belongs to. + */ partIndex: number; + /** + * The zero-based start index of the segment within the specified `Part`, + * measured in UTF-8 bytes. This offset is inclusive, starting from 0 at the + * beginning of the part's content (e.g., `Part.text`). + */ startIndex: number; + /** + * The zero-based end index of the segment within the specified `Part`, + * measured in UTF-8 bytes. This offset is exclusive, meaning the character + * at this index is not included in the segment. + */ endIndex: number; + /** + * The text corresponding to the segment from the response. + */ + text: string; } /**