From 8ebbae43070d3cffd6bba5628712391f544a5f8f Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 11:03:59 -0700 Subject: [PATCH 01/17] feat: Add prefer_in_cloud option for inference mode This change introduces a new InferenceMode option, prefer_in_cloud. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available. --- .changeset/feat-prefer-in-cloud.md | 10 + common/api-review/ai.api.md | 2 + docs-devsite/ai.chromeadapter.md | 16 ++ docs-devsite/ai.md | 1 + packages/ai/src/methods/count-tokens.test.ts | 26 +-- packages/ai/src/methods/count-tokens.ts | 13 +- packages/ai/src/methods/generate-content.ts | 38 ++-- packages/ai/src/methods/helpers.test.ts | 187 +++++++++++++++++++ packages/ai/src/methods/helpers.ts | 60 ++++++ packages/ai/src/types/chrome-adapter.ts | 6 + packages/ai/src/types/enums.ts | 3 +- 11 files changed, 317 insertions(+), 45 deletions(-) create mode 100644 .changeset/feat-prefer-in-cloud.md create mode 100644 packages/ai/src/methods/helpers.test.ts create mode 100644 packages/ai/src/methods/helpers.ts diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md new file mode 100644 index 00000000000..2ccfd0e3b67 --- /dev/null +++ b/.changeset/feat-prefer-in-cloud.md @@ -0,0 +1,10 @@ +--- +"@firebase/ai": minor +"firebase": minor +--- + +feat: Add `prefer_in_cloud` option for inference mode + +This change introduces a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available. + +This also includes a refactoring of the logic for dispatching requests to either the on-device or cloud backends to improve clarity and remove duplication. diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md index 5a8e5df6ab9..4918bf17305 100644 --- a/common/api-review/ai.api.md +++ b/common/api-review/ai.api.md @@ -152,6 +152,7 @@ export interface ChromeAdapter { generateContent(request: GenerateContentRequest): Promise; generateContentStream(request: GenerateContentRequest): Promise; isAvailable(request: GenerateContentRequest): Promise; + mode: InferenceMode; } // @public @@ -727,6 +728,7 @@ export const InferenceMode: { readonly PREFER_ON_DEVICE: "prefer_on_device"; readonly ONLY_ON_DEVICE: "only_on_device"; readonly ONLY_IN_CLOUD: "only_in_cloud"; + readonly PREFER_IN_CLOUD: "prefer_in_cloud"; }; // @public diff --git a/docs-devsite/ai.chromeadapter.md b/docs-devsite/ai.chromeadapter.md index e9207614992..7c01a0c2b9f 100644 --- a/docs-devsite/ai.chromeadapter.md +++ b/docs-devsite/ai.chromeadapter.md @@ -20,6 +20,12 @@ These methods should not be called directly by the user. export interface ChromeAdapter ``` +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [mode](./ai.chromeadapter.md#chromeadaptermode) | [InferenceMode](./ai.md#inferencemode) | The inference mode. | + ## Methods | Method | Description | @@ -28,6 +34,16 @@ export interface ChromeAdapter | [generateContentStream(request)](./ai.chromeadapter.md#chromeadaptergeneratecontentstream) | Generates a content stream using on-device inference. | | [isAvailable(request)](./ai.chromeadapter.md#chromeadapterisavailable) | Checks if the on-device model is capable of handling a given request. | +## ChromeAdapter.mode + +The inference mode. + +Signature: + +```typescript +mode: InferenceMode; +``` + ## ChromeAdapter.generateContent() Generates content using on-device inference. diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index d70b381d6fe..93c231a7324 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -631,6 +631,7 @@ InferenceMode: { readonly PREFER_ON_DEVICE: "prefer_on_device"; readonly ONLY_ON_DEVICE: "only_on_device"; readonly ONLY_IN_CLOUD: "only_in_cloud"; + readonly PREFER_IN_CLOUD: "prefer_in_cloud"; } ``` diff --git a/packages/ai/src/methods/count-tokens.test.ts b/packages/ai/src/methods/count-tokens.test.ts index 56985b4d54e..aabf06a841a 100644 --- a/packages/ai/src/methods/count-tokens.test.ts +++ b/packages/ai/src/methods/count-tokens.test.ts @@ -196,24 +196,16 @@ describe('countTokens()', () => { ); }); }); - it('on-device', async () => { - const chromeAdapter = fakeChromeAdapter; - const isAvailableStub = stub(chromeAdapter, 'isAvailable').resolves(true); - const mockResponse = getMockResponse( - 'vertexAI', - 'unary-success-total-tokens.json' - ); - const countTokensStub = stub(chromeAdapter, 'countTokens').resolves( - mockResponse as Response + it('throws if mode is ONLY_ON_DEVICE', async () => { + const chromeAdapter = new ChromeAdapterImpl( + // @ts-expect-error + undefined, + InferenceMode.ONLY_ON_DEVICE ); - const result = await countTokens( - fakeApiSettings, - 'model', - fakeRequestParams, - chromeAdapter + await expect( + countTokens(fakeApiSettings, 'model', fakeRequestParams, chromeAdapter) + ).to.be.rejectedWith( + /countTokens\(\) is not supported for on-device models/ ); - expect(result.totalTokens).eq(6); - expect(isAvailableStub).to.be.called; - expect(countTokensStub).to.be.calledWith(fakeRequestParams); }); }); diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts index 00dde84ab48..c6041a0bb99 100644 --- a/packages/ai/src/methods/count-tokens.ts +++ b/packages/ai/src/methods/count-tokens.ts @@ -15,10 +15,13 @@ * limitations under the License. */ +import { AIError } from '../errors'; import { CountTokensRequest, CountTokensResponse, - RequestOptions + InferenceMode, + RequestOptions, + AIErrorCode } from '../types'; import { Task, makeRequest } from '../requests/request'; import { ApiSettings } from '../types/internal'; @@ -57,9 +60,11 @@ export async function countTokens( chromeAdapter?: ChromeAdapter, requestOptions?: RequestOptions ): Promise { - if (chromeAdapter && (await chromeAdapter.isAvailable(params))) { - return (await chromeAdapter.countTokens(params)).json(); + if (chromeAdapter?.mode === InferenceMode.ONLY_ON_DEVICE) { + throw new AIError( + AIErrorCode.UNSUPPORTED, + 'countTokens() is not supported for on-device models.' + ); } - return countTokensOnCloud(apiSettings, model, params, requestOptions); } diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts index 2c1c383641f..b07ca704d53 100644 --- a/packages/ai/src/methods/generate-content.ts +++ b/packages/ai/src/methods/generate-content.ts @@ -9,7 +9,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. @@ -29,6 +29,7 @@ import { ApiSettings } from '../types/internal'; import * as GoogleAIMapper from '../googleai-mappers'; import { BackendType } from '../public-types'; import { ChromeAdapter } from '../types/chrome-adapter'; +import { callCloudOrDevice } from './helpers'; async function generateContentStreamOnCloud( apiSettings: ApiSettings, @@ -56,17 +57,13 @@ export async function generateContentStream( chromeAdapter?: ChromeAdapter, requestOptions?: RequestOptions ): Promise { - let response; - if (chromeAdapter && (await chromeAdapter.isAvailable(params))) { - response = await chromeAdapter.generateContentStream(params); - } else { - response = await generateContentStreamOnCloud( - apiSettings, - model, - params, - requestOptions - ); - } + const response = await callCloudOrDevice( + params, + chromeAdapter, + () => chromeAdapter!.generateContentStream(params), + () => + generateContentStreamOnCloud(apiSettings, model, params, requestOptions) + ); return processStream(response, apiSettings); // TODO: Map streaming responses } @@ -96,17 +93,12 @@ export async function generateContent( chromeAdapter?: ChromeAdapter, requestOptions?: RequestOptions ): Promise { - let response; - if (chromeAdapter && (await chromeAdapter.isAvailable(params))) { - response = await chromeAdapter.generateContent(params); - } else { - response = await generateContentOnCloud( - apiSettings, - model, - params, - requestOptions - ); - } + const response = await callCloudOrDevice( + params, + chromeAdapter, + () => chromeAdapter!.generateContent(params), + () => generateContentOnCloud(apiSettings, model, params, requestOptions) + ); const generateContentResponse = await processGenerateContentResponse( response, apiSettings diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts new file mode 100644 index 00000000000..cb89ea93435 --- /dev/null +++ b/packages/ai/src/methods/helpers.test.ts @@ -0,0 +1,187 @@ +/** + * @license + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { expect } from 'chai'; +import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon'; +import { callCloudOrDevice } from './helpers'; +import { + ChromeAdapter, + GenerateContentRequest, + InferenceMode, + AIErrorCode +} from '../types'; +import { AIError } from '../errors'; + +describe('callCloudOrDevice', () => { + let chromeAdapter: SinonStubbedInstance; + let onDeviceCall: SinonStub; + let inCloudCall: SinonStub; + let request: GenerateContentRequest; + + beforeEach(() => { + chromeAdapter = { + mode: InferenceMode.PREFER_ON_DEVICE, + isAvailable: stub(), + generateContent: stub(), + generateContentStream: stub(), + countTokens: stub() + }; + onDeviceCall = stub().resolves('on-device-response'); + inCloudCall = stub().resolves('in-cloud-response'); + request = { contents: [] }; + }); + + afterEach(() => { + restore(); + }); + + it('should call inCloudCall if chromeAdapter is undefined', async () => { + const result = await callCloudOrDevice( + request, + undefined, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('in-cloud-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + + describe('PREFER_ON_DEVICE mode', () => { + beforeEach(() => { + chromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE; + }); + + it('should call onDeviceCall if available', async () => { + chromeAdapter.isAvailable.resolves(true); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('on-device-response'); + expect(onDeviceCall).to.have.been.calledOnce; + expect(inCloudCall).to.not.have.been.called; + }); + + it('should call inCloudCall if not available', async () => { + chromeAdapter.isAvailable.resolves(false); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('in-cloud-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + }); + + describe('ONLY_ON_DEVICE mode', () => { + beforeEach(() => { + chromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE; + }); + + it('should call onDeviceCall if available', async () => { + chromeAdapter.isAvailable.resolves(true); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('on-device-response'); + expect(onDeviceCall).to.have.been.calledOnce; + expect(inCloudCall).to.not.have.been.called; + }); + + it('should call inCloudCall if not available', async () => { + chromeAdapter.isAvailable.resolves(false); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('in-cloud-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + }); + + describe('ONLY_IN_CLOUD mode', () => { + beforeEach(() => { + chromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD; + }); + + it('should call inCloudCall even if on-device is available', async () => { + chromeAdapter.isAvailable.resolves(true); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('in-cloud-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + }); + + describe('PREFER_IN_CLOUD mode', () => { + beforeEach(() => { + chromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD; + }); + + it('should call inCloudCall first', async () => { + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('in-cloud-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + + it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => { + inCloudCall.rejects(new AIError(AIErrorCode.FETCH_ERROR, 'Network error')); + const result = await callCloudOrDevice( + request, + chromeAdapter, + onDeviceCall, + inCloudCall + ); + expect(result).to.equal('on-device-response'); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.have.been.calledOnce; + }); + + it('should re-throw other errors from inCloudCall', async () => { + const error = new Error('Some other error'); + inCloudCall.rejects(error); + await expect( + callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) + ).to.be.rejectedWith(error); + expect(inCloudCall).to.have.been.calledOnce; + expect(onDeviceCall).to.not.have.been.called; + }); + }); +}); diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts new file mode 100644 index 00000000000..de0651e5da5 --- /dev/null +++ b/packages/ai/src/methods/helpers.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { AIError } from '../errors'; +import { + GenerateContentRequest, + InferenceMode +} from '../types'; +import { ChromeAdapter } from '../types/chrome-adapter'; + +/** + * Dispatches a request to the appropriate backend (on-device or in-cloud) + * based on the inference mode. + * + * @param request - The request to be sent. + * @param chromeAdapter - The on-device model adapter. + * @param onDeviceCall - The function to call for on-device inference. + * @param inCloudCall - The function to call for in-cloud inference. + * @returns The response from the backend. + */ +export async function callCloudOrDevice( + request: GenerateContentRequest, + chromeAdapter: ChromeAdapter | undefined, + onDeviceCall: () => Promise, + inCloudCall: () => Promise +): Promise { + if (!chromeAdapter) { + return inCloudCall(); + } + switch (chromeAdapter.mode) { + case InferenceMode.PREFER_IN_CLOUD: + try { + return await inCloudCall(); + } catch (e) { + if (e instanceof AIError) { + return onDeviceCall(); + } + throw e; + } + default: + if (await chromeAdapter.isAvailable(request)) { + return onDeviceCall(); + } + return inCloudCall(); + } +} diff --git a/packages/ai/src/types/chrome-adapter.ts b/packages/ai/src/types/chrome-adapter.ts index 9ec0dc2a0ab..6702a227fd8 100644 --- a/packages/ai/src/types/chrome-adapter.ts +++ b/packages/ai/src/types/chrome-adapter.ts @@ -16,6 +16,7 @@ */ import { CountTokensRequest, GenerateContentRequest } from './requests'; +import { InferenceMode } from './enums'; /** * (EXPERIMENTAL) Defines an inference "backend" that uses Chrome's on-device model, @@ -27,6 +28,11 @@ import { CountTokensRequest, GenerateContentRequest } from './requests'; * @public */ export interface ChromeAdapter { + /** + * The inference mode. + */ + mode: InferenceMode; + /** * Checks if the on-device model is capable of handling a given * request. diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts index 701cd4a695d..a161185b470 100644 --- a/packages/ai/src/types/enums.ts +++ b/packages/ai/src/types/enums.ts @@ -352,7 +352,8 @@ export type ResponseModality = export const InferenceMode = { 'PREFER_ON_DEVICE': 'prefer_on_device', 'ONLY_ON_DEVICE': 'only_on_device', - 'ONLY_IN_CLOUD': 'only_in_cloud' + 'ONLY_IN_CLOUD': 'only_in_cloud', + 'PREFER_IN_CLOUD': 'prefer_in_cloud' } as const; /** From 4e393902ec1a6766029aa4debd0c62839f215795 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 13:59:29 -0700 Subject: [PATCH 02/17] test: Add comprehensive dispatch logic tests for GenerativeModel This commit adds a new test suite to verify that the GenerativeModel's methods correctly dispatch requests to either the on-device or cloud backends based on the selected InferenceMode. It covers generateContent, generateContentStream, and countTokens. --- packages/ai/src/methods/helpers.test.ts | 4 +- packages/ai/src/methods/helpers.ts | 5 +- .../ai/src/models/generative-model.test.ts | 250 +++++++++++++++++- 3 files changed, 252 insertions(+), 7 deletions(-) diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts index cb89ea93435..e45f9ba82c4 100644 --- a/packages/ai/src/methods/helpers.test.ts +++ b/packages/ai/src/methods/helpers.test.ts @@ -162,7 +162,9 @@ describe('callCloudOrDevice', () => { }); it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => { - inCloudCall.rejects(new AIError(AIErrorCode.FETCH_ERROR, 'Network error')); + inCloudCall.rejects( + new AIError(AIErrorCode.FETCH_ERROR, 'Network error') + ); const result = await callCloudOrDevice( request, chromeAdapter, diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts index de0651e5da5..cda9367ada2 100644 --- a/packages/ai/src/methods/helpers.ts +++ b/packages/ai/src/methods/helpers.ts @@ -16,10 +16,7 @@ */ import { AIError } from '../errors'; -import { - GenerateContentRequest, - InferenceMode -} from '../types'; +import { GenerateContentRequest, InferenceMode } from '../types'; import { ChromeAdapter } from '../types/chrome-adapter'; /** diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts index 68f1565b26a..4af67a9cfa3 100644 --- a/packages/ai/src/models/generative-model.test.ts +++ b/packages/ai/src/models/generative-model.test.ts @@ -16,13 +16,20 @@ */ import { use, expect } from 'chai'; import { GenerativeModel } from './generative-model'; -import { FunctionCallingMode, AI, InferenceMode } from '../public-types'; +import { + FunctionCallingMode, + AI, + InferenceMode, + AIErrorCode +} from '../public-types'; import * as request from '../requests/request'; -import { match, restore, stub } from 'sinon'; +import { SinonStub, match, restore, stub } from 'sinon'; import { getMockResponse } from '../../test-utils/mock-response'; import sinonChai from 'sinon-chai'; import { VertexAIBackend } from '../backend'; +import { ChromeAdapter } from '../types/chrome-adapter'; import { ChromeAdapterImpl } from '../methods/chrome-adapter'; +import { AIError } from '../errors'; use(sinonChai); @@ -406,3 +413,242 @@ describe('GenerativeModel', () => { restore(); }); }); + +describe('GenerativeModel dispatch logic', () => { + let makeRequestStub: SinonStub; + let mockChromeAdapter: ChromeAdapter; + + beforeEach(() => { + makeRequestStub = stub(request, 'makeRequest').resolves( + getMockResponse( + 'vertexAI', + 'unary-success-basic-reply-short.json' + ) as Response + ); + mockChromeAdapter = { + isAvailable: stub(), + generateContent: stub().resolves({} as Response), + generateContentStream: stub().resolves({} as Response), + countTokens: stub().resolves({} as Response), + mode: InferenceMode.PREFER_ON_DEVICE + }; + }); + + afterEach(() => { + restore(); + }); + + describe('PREFER_ON_DEVICE', () => { + beforeEach(() => { + mockChromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE; + }); + it('should use on-device for generateContent when available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(mockChromeAdapter.generateContent).to.have.been.calledOnce; + expect(makeRequestStub).to.not.have.been.called; + }); + it('should use cloud for generateContent when on-device is not available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(false); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(mockChromeAdapter.generateContent).to.not.have.been.called; + expect(makeRequestStub).to.have.been.calledOnce; + }); + it('should use on-device for generateContentStream when available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce; + expect(makeRequestStub).to.not.have.been.called; + }); + it('should use cloud for generateContentStream when on-device is not available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(false); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(mockChromeAdapter.generateContentStream).to.not.have.been.called; + expect(makeRequestStub).to.have.been.calledOnce; + }); + it('should use cloud for countTokens', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.countTokens('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + }); + }); + + describe('ONLY_ON_DEVICE', () => { + beforeEach(() => { + mockChromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE; + }); + it('should use on-device for generateContent when available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(mockChromeAdapter.generateContent).to.have.been.calledOnce; + expect(makeRequestStub).to.not.have.been.called; + }); + it('should use cloud for generateContent when on-device is not available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(false); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(mockChromeAdapter.generateContent).to.not.have.been.called; + expect(makeRequestStub).to.have.been.calledOnce; + }); + it('should throw for countTokens', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await expect(model.countTokens('hello')).to.be.rejectedWith( + /countTokens\(\) is not supported for on-device models/ + ); + expect(makeRequestStub).to.not.have.been.called; + }); + }); + + describe('ONLY_IN_CLOUD', () => { + beforeEach(() => { + mockChromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD; + }); + it('should use cloud for generateContent even when on-device is available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(mockChromeAdapter.generateContent).to.not.have.been.called; + expect(makeRequestStub).to.have.been.calledOnce; + }); + it('should use cloud for generateContentStream even when on-device is available', async () => { + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(mockChromeAdapter.generateContentStream).to.not.have.been.called; + expect(makeRequestStub).to.have.been.calledOnce; + }); + it('should use cloud for countTokens', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.countTokens('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + }); + }); + + describe('PREFER_IN_CLOUD', () => { + beforeEach(() => { + mockChromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD; + }); + it('should use cloud for generateContent when available', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContent).to.not.have.been.called; + }); + it('should fall back to on-device for generateContent if cloud fails', async () => { + makeRequestStub.rejects( + new AIError(AIErrorCode.FETCH_ERROR, 'Network error') + ); + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContent('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContent).to.have.been.calledOnce; + }); + it('should use cloud for generateContentStream when available', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContentStream).to.not.have.been.called; + }); + it('should fall back to on-device for generateContentStream if cloud fails', async () => { + makeRequestStub.rejects( + new AIError(AIErrorCode.FETCH_ERROR, 'Network error') + ); + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce; + }); + it('should use cloud for countTokens', async () => { + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.countTokens('hello'); + expect(makeRequestStub).to.have.been.calledOnce; + }); + }); +}); From 6619ed421033a6dc220d19d7ba8dbd99b69ef8be Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 15:16:07 -0700 Subject: [PATCH 03/17] fix tests --- packages/ai/src/methods/helpers.test.ts | 21 ++-- packages/ai/src/methods/helpers.ts | 14 ++- .../ai/src/models/generative-model.test.ts | 99 +++++++++++++++---- 3 files changed, 103 insertions(+), 31 deletions(-) diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts index e45f9ba82c4..8b9541fa58e 100644 --- a/packages/ai/src/methods/helpers.test.ts +++ b/packages/ai/src/methods/helpers.test.ts @@ -15,7 +15,7 @@ * limitations under the License. */ -import { expect } from 'chai'; +import { use, expect } from 'chai'; import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon'; import { callCloudOrDevice } from './helpers'; import { @@ -25,6 +25,11 @@ import { AIErrorCode } from '../types'; import { AIError } from '../errors'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; + +use(sinonChai); +use(chaiAsPromised); describe('callCloudOrDevice', () => { let chromeAdapter: SinonStubbedInstance; @@ -111,16 +116,12 @@ describe('callCloudOrDevice', () => { expect(inCloudCall).to.not.have.been.called; }); - it('should call inCloudCall if not available', async () => { + it('should throw if not available', async () => { chromeAdapter.isAvailable.resolves(false); - const result = await callCloudOrDevice( - request, - chromeAdapter, - onDeviceCall, - inCloudCall - ); - expect(result).to.equal('in-cloud-response'); - expect(inCloudCall).to.have.been.calledOnce; + await expect( + callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) + ).to.be.rejectedWith(/On-device model is not available/); + expect(inCloudCall).to.not.have.been.called; expect(onDeviceCall).to.not.have.been.called; }); }); diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts index cda9367ada2..a6a9e309134 100644 --- a/packages/ai/src/methods/helpers.ts +++ b/packages/ai/src/methods/helpers.ts @@ -16,7 +16,7 @@ */ import { AIError } from '../errors'; -import { GenerateContentRequest, InferenceMode } from '../types'; +import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types'; import { ChromeAdapter } from '../types/chrome-adapter'; /** @@ -39,6 +39,16 @@ export async function callCloudOrDevice( return inCloudCall(); } switch (chromeAdapter.mode) { + case InferenceMode.ONLY_ON_DEVICE: + if (await chromeAdapter.isAvailable(request)) { + return onDeviceCall(); + } + throw new AIError( + AIErrorCode.UNSUPPORTED, + 'On-device model is not available.' + ); + case InferenceMode.ONLY_IN_CLOUD: + return inCloudCall(); case InferenceMode.PREFER_IN_CLOUD: try { return await inCloudCall(); @@ -48,7 +58,7 @@ export async function callCloudOrDevice( } throw e; } - default: + default: // PREFER_ON_DEVICE if (await chromeAdapter.isAvailable(request)) { return onDeviceCall(); } diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts index 4af67a9cfa3..7646a15d1d3 100644 --- a/packages/ai/src/models/generative-model.test.ts +++ b/packages/ai/src/models/generative-model.test.ts @@ -24,14 +24,19 @@ import { } from '../public-types'; import * as request from '../requests/request'; import { SinonStub, match, restore, stub } from 'sinon'; -import { getMockResponse } from '../../test-utils/mock-response'; +import { + getMockResponse, + getMockResponseStreaming +} from '../../test-utils/mock-response'; import sinonChai from 'sinon-chai'; import { VertexAIBackend } from '../backend'; import { ChromeAdapter } from '../types/chrome-adapter'; import { ChromeAdapterImpl } from '../methods/chrome-adapter'; import { AIError } from '../errors'; +import chaiAsPromised from 'chai-as-promised'; use(sinonChai); +use(chaiAsPromised); const fakeAI: AI = { app: { @@ -418,18 +423,32 @@ describe('GenerativeModel dispatch logic', () => { let makeRequestStub: SinonStub; let mockChromeAdapter: ChromeAdapter; + function stubMakeRequest(stream?: boolean): void { + if (stream) { + makeRequestStub = stub(request, 'makeRequest').resolves( + getMockResponseStreaming( + 'vertexAI', + 'unary-success-basic-reply-short.json' + ) as Response + ); + } else { + makeRequestStub = stub(request, 'makeRequest').resolves( + getMockResponse( + 'vertexAI', + 'unary-success-basic-reply-short.json' + ) as Response + ); + } + } + beforeEach(() => { - makeRequestStub = stub(request, 'makeRequest').resolves( - getMockResponse( - 'vertexAI', - 'unary-success-basic-reply-short.json' - ) as Response - ); mockChromeAdapter = { isAvailable: stub(), - generateContent: stub().resolves({} as Response), - generateContentStream: stub().resolves({} as Response), - countTokens: stub().resolves({} as Response), + generateContent: stub().resolves(new Response(JSON.stringify({}))), + generateContentStream: stub().resolves( + new Response(new ReadableStream()) + ), + countTokens: stub().resolves(new Response(JSON.stringify({}))), mode: InferenceMode.PREFER_ON_DEVICE }; }); @@ -443,6 +462,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE; }); it('should use on-device for generateContent when available', async () => { + stubMakeRequest(); (mockChromeAdapter.isAvailable as SinonStub).resolves(true); const model = new GenerativeModel( fakeAI, @@ -455,6 +475,7 @@ describe('GenerativeModel dispatch logic', () => { expect(makeRequestStub).to.not.have.been.called; }); it('should use cloud for generateContent when on-device is not available', async () => { + stubMakeRequest(); (mockChromeAdapter.isAvailable as SinonStub).resolves(false); const model = new GenerativeModel( fakeAI, @@ -467,6 +488,7 @@ describe('GenerativeModel dispatch logic', () => { expect(makeRequestStub).to.have.been.calledOnce; }); it('should use on-device for generateContentStream when available', async () => { + stubMakeRequest(true); (mockChromeAdapter.isAvailable as SinonStub).resolves(true); const model = new GenerativeModel( fakeAI, @@ -479,6 +501,7 @@ describe('GenerativeModel dispatch logic', () => { expect(makeRequestStub).to.not.have.been.called; }); it('should use cloud for generateContentStream when on-device is not available', async () => { + stubMakeRequest(true); (mockChromeAdapter.isAvailable as SinonStub).resolves(false); const model = new GenerativeModel( fakeAI, @@ -491,6 +514,7 @@ describe('GenerativeModel dispatch logic', () => { expect(makeRequestStub).to.have.been.calledOnce; }); it('should use cloud for countTokens', async () => { + stubMakeRequest(); const model = new GenerativeModel( fakeAI, { model: 'model' }, @@ -507,6 +531,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE; }); it('should use on-device for generateContent when available', async () => { + stubMakeRequest(); (mockChromeAdapter.isAvailable as SinonStub).resolves(true); const model = new GenerativeModel( fakeAI, @@ -518,7 +543,8 @@ describe('GenerativeModel dispatch logic', () => { expect(mockChromeAdapter.generateContent).to.have.been.calledOnce; expect(makeRequestStub).to.not.have.been.called; }); - it('should use cloud for generateContent when on-device is not available', async () => { + it('generateContent should throw when on-device is not available', async () => { + stubMakeRequest(); (mockChromeAdapter.isAvailable as SinonStub).resolves(false); const model = new GenerativeModel( fakeAI, @@ -526,20 +552,49 @@ describe('GenerativeModel dispatch logic', () => { {}, mockChromeAdapter ); - await model.generateContent('hello'); + await expect(model.generateContent('hello')).to.be.rejectedWith( + /On-device model is not available/ + ); expect(mockChromeAdapter.generateContent).to.not.have.been.called; - expect(makeRequestStub).to.have.been.calledOnce; + expect(makeRequestStub).to.not.have.been.called; + }); + it('should use on-device for generateContentStream when available', async () => { + stubMakeRequest(true); + (mockChromeAdapter.isAvailable as SinonStub).resolves(true); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await model.generateContentStream('hello'); + expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce; + expect(makeRequestStub).to.not.have.been.called; }); - it('should throw for countTokens', async () => { + it('generateContentStream should throw when on-device is not available', async () => { + stubMakeRequest(true); + (mockChromeAdapter.isAvailable as SinonStub).resolves(false); const model = new GenerativeModel( fakeAI, { model: 'model' }, {}, mockChromeAdapter ); - await expect(model.countTokens('hello')).to.be.rejectedWith( - /countTokens\(\) is not supported for on-device models/ + await expect(model.generateContentStream('hello')).to.be.rejectedWith( + /On-device model is not available/ ); + expect(mockChromeAdapter.generateContent).to.not.have.been.called; + expect(makeRequestStub).to.not.have.been.called; + }); + it('should always throw for countTokens', async () => { + stubMakeRequest(); + const model = new GenerativeModel( + fakeAI, + { model: 'model' }, + {}, + mockChromeAdapter + ); + await expect(model.countTokens('hello')).to.be.rejectedWith(AIError); expect(makeRequestStub).to.not.have.been.called; }); }); @@ -549,6 +604,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD; }); it('should use cloud for generateContent even when on-device is available', async () => { + stubMakeRequest(); (mockChromeAdapter.isAvailable as SinonStub).resolves(true); const model = new GenerativeModel( fakeAI, @@ -557,10 +613,11 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter ); await model.generateContent('hello'); - expect(mockChromeAdapter.generateContent).to.not.have.been.called; expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContent).to.not.have.been.called; }); it('should use cloud for generateContentStream even when on-device is available', async () => { + stubMakeRequest(true); (mockChromeAdapter.isAvailable as SinonStub).resolves(true); const model = new GenerativeModel( fakeAI, @@ -569,10 +626,11 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter ); await model.generateContentStream('hello'); - expect(mockChromeAdapter.generateContentStream).to.not.have.been.called; expect(makeRequestStub).to.have.been.calledOnce; + expect(mockChromeAdapter.generateContentStream).to.not.have.been.called; }); - it('should use cloud for countTokens', async () => { + it('should always use cloud for countTokens', async () => { + stubMakeRequest(); const model = new GenerativeModel( fakeAI, { model: 'model' }, @@ -589,6 +647,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD; }); it('should use cloud for generateContent when available', async () => { + stubMakeRequest(); const model = new GenerativeModel( fakeAI, { model: 'model' }, @@ -615,6 +674,7 @@ describe('GenerativeModel dispatch logic', () => { expect(mockChromeAdapter.generateContent).to.have.been.calledOnce; }); it('should use cloud for generateContentStream when available', async () => { + stubMakeRequest(true); const model = new GenerativeModel( fakeAI, { model: 'model' }, @@ -641,6 +701,7 @@ describe('GenerativeModel dispatch logic', () => { expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce; }); it('should use cloud for countTokens', async () => { + stubMakeRequest(); const model = new GenerativeModel( fakeAI, { model: 'model' }, From cadfd095c226c96ee7097c9bc9f7502c9711e184 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 15:19:41 -0700 Subject: [PATCH 04/17] stop publicly exposing mode --- common/api-review/ai.api.md | 1 - docs-devsite/ai.chromeadapter.md | 16 ---------------- packages/ai/src/methods/helpers.ts | 4 ++-- packages/ai/src/types/chrome-adapter.ts | 6 ------ 4 files changed, 2 insertions(+), 25 deletions(-) diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md index 4918bf17305..43a10169cfa 100644 --- a/common/api-review/ai.api.md +++ b/common/api-review/ai.api.md @@ -152,7 +152,6 @@ export interface ChromeAdapter { generateContent(request: GenerateContentRequest): Promise; generateContentStream(request: GenerateContentRequest): Promise; isAvailable(request: GenerateContentRequest): Promise; - mode: InferenceMode; } // @public diff --git a/docs-devsite/ai.chromeadapter.md b/docs-devsite/ai.chromeadapter.md index 7c01a0c2b9f..e9207614992 100644 --- a/docs-devsite/ai.chromeadapter.md +++ b/docs-devsite/ai.chromeadapter.md @@ -20,12 +20,6 @@ These methods should not be called directly by the user. export interface ChromeAdapter ``` -## Properties - -| Property | Type | Description | -| --- | --- | --- | -| [mode](./ai.chromeadapter.md#chromeadaptermode) | [InferenceMode](./ai.md#inferencemode) | The inference mode. | - ## Methods | Method | Description | @@ -34,16 +28,6 @@ export interface ChromeAdapter | [generateContentStream(request)](./ai.chromeadapter.md#chromeadaptergeneratecontentstream) | Generates a content stream using on-device inference. | | [isAvailable(request)](./ai.chromeadapter.md#chromeadapterisavailable) | Checks if the on-device model is capable of handling a given request. | -## ChromeAdapter.mode - -The inference mode. - -Signature: - -```typescript -mode: InferenceMode; -``` - ## ChromeAdapter.generateContent() Generates content using on-device inference. diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts index a6a9e309134..6325ab97133 100644 --- a/packages/ai/src/methods/helpers.ts +++ b/packages/ai/src/methods/helpers.ts @@ -17,7 +17,7 @@ import { AIError } from '../errors'; import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types'; -import { ChromeAdapter } from '../types/chrome-adapter'; +import { ChromeAdapterImpl } from './chrome-adapter'; /** * Dispatches a request to the appropriate backend (on-device or in-cloud) @@ -31,7 +31,7 @@ import { ChromeAdapter } from '../types/chrome-adapter'; */ export async function callCloudOrDevice( request: GenerateContentRequest, - chromeAdapter: ChromeAdapter | undefined, + chromeAdapter: ChromeAdapterImpl | undefined, onDeviceCall: () => Promise, inCloudCall: () => Promise ): Promise { diff --git a/packages/ai/src/types/chrome-adapter.ts b/packages/ai/src/types/chrome-adapter.ts index 6702a227fd8..9ec0dc2a0ab 100644 --- a/packages/ai/src/types/chrome-adapter.ts +++ b/packages/ai/src/types/chrome-adapter.ts @@ -16,7 +16,6 @@ */ import { CountTokensRequest, GenerateContentRequest } from './requests'; -import { InferenceMode } from './enums'; /** * (EXPERIMENTAL) Defines an inference "backend" that uses Chrome's on-device model, @@ -28,11 +27,6 @@ import { InferenceMode } from './enums'; * @public */ export interface ChromeAdapter { - /** - * The inference mode. - */ - mode: InferenceMode; - /** * Checks if the on-device model is capable of handling a given * request. From 36deac4271b4853f71a63fb83327222befd70e14 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 15:32:39 -0700 Subject: [PATCH 05/17] fix ts errors --- packages/ai/src/methods/count-tokens.ts | 3 ++- packages/ai/src/methods/helpers.test.ts | 5 +++-- packages/ai/src/methods/helpers.ts | 11 ++++++++--- packages/ai/src/models/generative-model.test.ts | 4 ++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts index c6041a0bb99..d3974641881 100644 --- a/packages/ai/src/methods/count-tokens.ts +++ b/packages/ai/src/methods/count-tokens.ts @@ -28,6 +28,7 @@ import { ApiSettings } from '../types/internal'; import * as GoogleAIMapper from '../googleai-mappers'; import { BackendType } from '../public-types'; import { ChromeAdapter } from '../types/chrome-adapter'; +import { ChromeAdapterImpl } from './chrome-adapter'; export async function countTokensOnCloud( apiSettings: ApiSettings, @@ -60,7 +61,7 @@ export async function countTokens( chromeAdapter?: ChromeAdapter, requestOptions?: RequestOptions ): Promise { - if (chromeAdapter?.mode === InferenceMode.ONLY_ON_DEVICE) { + if ((chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE) { throw new AIError( AIErrorCode.UNSUPPORTED, 'countTokens() is not supported for on-device models.' diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts index 8b9541fa58e..13d789d5ec1 100644 --- a/packages/ai/src/methods/helpers.test.ts +++ b/packages/ai/src/methods/helpers.test.ts @@ -19,7 +19,6 @@ import { use, expect } from 'chai'; import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon'; import { callCloudOrDevice } from './helpers'; import { - ChromeAdapter, GenerateContentRequest, InferenceMode, AIErrorCode @@ -27,17 +26,19 @@ import { import { AIError } from '../errors'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; +import { ChromeAdapterImpl } from './chrome-adapter'; use(sinonChai); use(chaiAsPromised); describe('callCloudOrDevice', () => { - let chromeAdapter: SinonStubbedInstance; + let chromeAdapter: SinonStubbedInstance; let onDeviceCall: SinonStub; let inCloudCall: SinonStub; let request: GenerateContentRequest; beforeEach(() => { + // @ts-ignore chromeAdapter = { mode: InferenceMode.PREFER_ON_DEVICE, isAvailable: stub(), diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts index 6325ab97133..4adde787935 100644 --- a/packages/ai/src/methods/helpers.ts +++ b/packages/ai/src/methods/helpers.ts @@ -16,7 +16,12 @@ */ import { AIError } from '../errors'; -import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types'; +import { + GenerateContentRequest, + InferenceMode, + AIErrorCode, + ChromeAdapter +} from '../types'; import { ChromeAdapterImpl } from './chrome-adapter'; /** @@ -31,14 +36,14 @@ import { ChromeAdapterImpl } from './chrome-adapter'; */ export async function callCloudOrDevice( request: GenerateContentRequest, - chromeAdapter: ChromeAdapterImpl | undefined, + chromeAdapter: ChromeAdapter | undefined, onDeviceCall: () => Promise, inCloudCall: () => Promise ): Promise { if (!chromeAdapter) { return inCloudCall(); } - switch (chromeAdapter.mode) { + switch ((chromeAdapter as ChromeAdapterImpl).mode) { case InferenceMode.ONLY_ON_DEVICE: if (await chromeAdapter.isAvailable(request)) { return onDeviceCall(); diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts index 7646a15d1d3..eb53eeb2eb7 100644 --- a/packages/ai/src/models/generative-model.test.ts +++ b/packages/ai/src/models/generative-model.test.ts @@ -30,7 +30,6 @@ import { } from '../../test-utils/mock-response'; import sinonChai from 'sinon-chai'; import { VertexAIBackend } from '../backend'; -import { ChromeAdapter } from '../types/chrome-adapter'; import { ChromeAdapterImpl } from '../methods/chrome-adapter'; import { AIError } from '../errors'; import chaiAsPromised from 'chai-as-promised'; @@ -421,7 +420,7 @@ describe('GenerativeModel', () => { describe('GenerativeModel dispatch logic', () => { let makeRequestStub: SinonStub; - let mockChromeAdapter: ChromeAdapter; + let mockChromeAdapter: ChromeAdapterImpl; function stubMakeRequest(stream?: boolean): void { if (stream) { @@ -442,6 +441,7 @@ describe('GenerativeModel dispatch logic', () => { } beforeEach(() => { + // @ts-ignore mockChromeAdapter = { isAvailable: stub(), generateContent: stub().resolves(new Response(JSON.stringify({}))), From 0aadd2e6c6995fec4566f556a29da52a9ee054a3 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Tue, 2 Sep 2025 15:33:58 -0700 Subject: [PATCH 06/17] format --- packages/ai/src/methods/count-tokens.ts | 4 +++- packages/ai/src/methods/helpers.test.ts | 6 +----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts index d3974641881..ecd86a82912 100644 --- a/packages/ai/src/methods/count-tokens.ts +++ b/packages/ai/src/methods/count-tokens.ts @@ -61,7 +61,9 @@ export async function countTokens( chromeAdapter?: ChromeAdapter, requestOptions?: RequestOptions ): Promise { - if ((chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE) { + if ( + (chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE + ) { throw new AIError( AIErrorCode.UNSUPPORTED, 'countTokens() is not supported for on-device models.' diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts index 13d789d5ec1..0c4d14e8291 100644 --- a/packages/ai/src/methods/helpers.test.ts +++ b/packages/ai/src/methods/helpers.test.ts @@ -18,11 +18,7 @@ import { use, expect } from 'chai'; import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon'; import { callCloudOrDevice } from './helpers'; -import { - GenerateContentRequest, - InferenceMode, - AIErrorCode -} from '../types'; +import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types'; import { AIError } from '../errors'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; From f929d6cf0b04b0d15eb009b1ab1ddf62003e1eb4 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 09:37:55 -0700 Subject: [PATCH 07/17] undo grammar fix --- packages/ai/src/methods/generate-content.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts index b07ca704d53..cb54d3bd34d 100644 --- a/packages/ai/src/methods/generate-content.ts +++ b/packages/ai/src/methods/generate-content.ts @@ -9,7 +9,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS-IS" BASIS, + * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. From 8c9c1c2d305c14f96fe492f8ec550dfa7b7920a3 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 09:56:51 -0700 Subject: [PATCH 08/17] fix prefer_on_cloud fallback criteria --- packages/ai/src/methods/generate-content.ts | 2 +- .../hybrid-helpers.test.ts} | 8 ++++---- .../helpers.ts => requests/hybrid-helpers.ts} | 13 +++++++++++-- 3 files changed, 16 insertions(+), 7 deletions(-) rename packages/ai/src/{methods/helpers.test.ts => requests/hybrid-helpers.test.ts} (95%) rename packages/ai/src/{methods/helpers.ts => requests/hybrid-helpers.ts} (84%) diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts index cb54d3bd34d..0e65b479343 100644 --- a/packages/ai/src/methods/generate-content.ts +++ b/packages/ai/src/methods/generate-content.ts @@ -29,7 +29,7 @@ import { ApiSettings } from '../types/internal'; import * as GoogleAIMapper from '../googleai-mappers'; import { BackendType } from '../public-types'; import { ChromeAdapter } from '../types/chrome-adapter'; -import { callCloudOrDevice } from './helpers'; +import { callCloudOrDevice } from '../requests/hybrid-helpers'; async function generateContentStreamOnCloud( apiSettings: ApiSettings, diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/requests/hybrid-helpers.test.ts similarity index 95% rename from packages/ai/src/methods/helpers.test.ts rename to packages/ai/src/requests/hybrid-helpers.test.ts index 0c4d14e8291..923f65f249b 100644 --- a/packages/ai/src/methods/helpers.test.ts +++ b/packages/ai/src/requests/hybrid-helpers.test.ts @@ -17,12 +17,12 @@ import { use, expect } from 'chai'; import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon'; -import { callCloudOrDevice } from './helpers'; +import { callCloudOrDevice } from './hybrid-helpers'; import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types'; import { AIError } from '../errors'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; -import { ChromeAdapterImpl } from './chrome-adapter'; +import { ChromeAdapterImpl } from '../methods/chrome-adapter'; use(sinonChai); use(chaiAsPromised); @@ -159,7 +159,7 @@ describe('callCloudOrDevice', () => { expect(onDeviceCall).to.not.have.been.called; }); - it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => { + it('should fall back to onDeviceCall if inCloudCall fails with AIErrorCode.FETCH_ERROR', async () => { inCloudCall.rejects( new AIError(AIErrorCode.FETCH_ERROR, 'Network error') ); @@ -175,7 +175,7 @@ describe('callCloudOrDevice', () => { }); it('should re-throw other errors from inCloudCall', async () => { - const error = new Error('Some other error'); + const error = new AIError(AIErrorCode.RESPONSE_ERROR, 'safety problem'); inCloudCall.rejects(error); await expect( callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts similarity index 84% rename from packages/ai/src/methods/helpers.ts rename to packages/ai/src/requests/hybrid-helpers.ts index 4adde787935..343f3e21834 100644 --- a/packages/ai/src/methods/helpers.ts +++ b/packages/ai/src/requests/hybrid-helpers.ts @@ -22,7 +22,16 @@ import { AIErrorCode, ChromeAdapter } from '../types'; -import { ChromeAdapterImpl } from './chrome-adapter'; +import { ChromeAdapterImpl } from '../methods/chrome-adapter'; + +const errorsCausingFallback: AIErrorCode[] = [ + // most network errors + AIErrorCode.FETCH_ERROR, + // fallback code for all other errors in makeRequest + AIErrorCode.ERROR, + // error due to API not being enabled in project + AIErrorCode.API_NOT_ENABLED +]; /** * Dispatches a request to the appropriate backend (on-device or in-cloud) @@ -58,7 +67,7 @@ export async function callCloudOrDevice( try { return await inCloudCall(); } catch (e) { - if (e instanceof AIError) { + if (e instanceof AIError && errorsCausingFallback.includes(e.code)) { return onDeviceCall(); } throw e; From 34094c8748d2a61292a678b50f1f50078885372b Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 12:09:10 -0700 Subject: [PATCH 09/17] Apply suggestions from code review Co-authored-by: Daniel La Rocque --- packages/ai/src/requests/hybrid-helpers.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/ai/src/requests/hybrid-helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts index 343f3e21834..c2f13a6bd90 100644 --- a/packages/ai/src/requests/hybrid-helpers.ts +++ b/packages/ai/src/requests/hybrid-helpers.ts @@ -59,7 +59,7 @@ export async function callCloudOrDevice( } throw new AIError( AIErrorCode.UNSUPPORTED, - 'On-device model is not available.' + 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.' ); case InferenceMode.ONLY_IN_CLOUD: return inCloudCall(); @@ -72,10 +72,12 @@ export async function callCloudOrDevice( } throw e; } - default: // PREFER_ON_DEVICE + case InferenceMode.PREFER_ON_DEVICE: if (await chromeAdapter.isAvailable(request)) { return onDeviceCall(); } return inCloudCall(); + default: + throw new AIError(AIErrorCode.Error, `Unexpected infererence mode: ${(chromeAdapter as ChromeAdapterImpl).mode}`); } } From 7c4f837ac1e1704ca53455dd938495c9eebb7c69 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 12:29:13 -0700 Subject: [PATCH 10/17] Add doc comments on InferenceModes --- packages/ai/src/types/enums.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts index a161185b470..d765a4bbbfc 100644 --- a/packages/ai/src/types/enums.ts +++ b/packages/ai/src/types/enums.ts @@ -350,9 +350,26 @@ export type ResponseModality = * @public */ export const InferenceMode = { + /** + * Attempt to make inference calls on-device. If on-device + * inference is not available, it will fall back to cloud. + */ 'PREFER_ON_DEVICE': 'prefer_on_device', + /** + * Only attempt to make inference calls on-device. It will not + * fall back to cloud. If on-device inference is not available, + * inference methods will throw. + */ 'ONLY_ON_DEVICE': 'only_on_device', + /** + * Only attempt to make inference calls to the cloud. It will not + * fall back to on-device. + */ 'ONLY_IN_CLOUD': 'only_in_cloud', + /** + * Attempt to make inference calls to the cloud. If not available, + * it will fall back to on-device. + */ 'PREFER_IN_CLOUD': 'prefer_in_cloud' } as const; From db53108f94066541d636c2c2ac34332849bd9cf4 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 14:53:37 -0700 Subject: [PATCH 11/17] Update .changeset/feat-prefer-in-cloud.md Co-authored-by: Daniel La Rocque --- .changeset/feat-prefer-in-cloud.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md index 2ccfd0e3b67..10c1366cebc 100644 --- a/.changeset/feat-prefer-in-cloud.md +++ b/.changeset/feat-prefer-in-cloud.md @@ -3,8 +3,4 @@ "firebase": minor --- -feat: Add `prefer_in_cloud` option for inference mode - -This change introduces a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available. - -This also includes a refactoring of the logic for dispatching requests to either the on-device or cloud backends to improve clarity and remove duplication. +Added a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available. From a1af166cfc757e65571dd698d9ad36ef3f1db154 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Wed, 3 Sep 2025 16:50:12 -0700 Subject: [PATCH 12/17] fix ERROR casing and formatting --- packages/ai/src/requests/hybrid-helpers.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/ai/src/requests/hybrid-helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts index c2f13a6bd90..3140594c00e 100644 --- a/packages/ai/src/requests/hybrid-helpers.ts +++ b/packages/ai/src/requests/hybrid-helpers.ts @@ -78,6 +78,11 @@ export async function callCloudOrDevice( } return inCloudCall(); default: - throw new AIError(AIErrorCode.Error, `Unexpected infererence mode: ${(chromeAdapter as ChromeAdapterImpl).mode}`); + throw new AIError( + AIErrorCode.ERROR, + `Unexpected infererence mode: ${ + (chromeAdapter as ChromeAdapterImpl).mode + }` + ); } } From 7fd62c431f3b5a20c2b1cb2c2e2415656cb5c35f Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 4 Sep 2025 09:32:54 -0700 Subject: [PATCH 13/17] fix expected error case in tests --- packages/ai/src/models/generative-model.test.ts | 4 ++-- packages/ai/src/requests/hybrid-helpers.test.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts index eb53eeb2eb7..9c5074b8eb3 100644 --- a/packages/ai/src/models/generative-model.test.ts +++ b/packages/ai/src/models/generative-model.test.ts @@ -553,7 +553,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter ); await expect(model.generateContent('hello')).to.be.rejectedWith( - /On-device model is not available/ + /on-device model is not available/ ); expect(mockChromeAdapter.generateContent).to.not.have.been.called; expect(makeRequestStub).to.not.have.been.called; @@ -581,7 +581,7 @@ describe('GenerativeModel dispatch logic', () => { mockChromeAdapter ); await expect(model.generateContentStream('hello')).to.be.rejectedWith( - /On-device model is not available/ + /on-device model is not available/ ); expect(mockChromeAdapter.generateContent).to.not.have.been.called; expect(makeRequestStub).to.not.have.been.called; diff --git a/packages/ai/src/requests/hybrid-helpers.test.ts b/packages/ai/src/requests/hybrid-helpers.test.ts index 923f65f249b..a758f34ad21 100644 --- a/packages/ai/src/requests/hybrid-helpers.test.ts +++ b/packages/ai/src/requests/hybrid-helpers.test.ts @@ -117,7 +117,7 @@ describe('callCloudOrDevice', () => { chromeAdapter.isAvailable.resolves(false); await expect( callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) - ).to.be.rejectedWith(/On-device model is not available/); + ).to.be.rejectedWith(/on-device model is not available/); expect(inCloudCall).to.not.have.been.called; expect(onDeviceCall).to.not.have.been.called; }); From d4e843e9b8007a5404bada746c0ed500a200e3ac Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 4 Sep 2025 12:20:43 -0700 Subject: [PATCH 14/17] Move InferenceMode doc comment to be on top of type --- docs-devsite/ai.md | 2 ++ packages/ai/src/types/enums.ts | 31 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index 93c231a7324..b0f839bcfba 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -863,6 +863,8 @@ export type ImagenSafetyFilterLevel = (typeof ImagenSafetyFilterLevel)[keyof typ (EXPERIMENTAL) Determines whether inference happens on-device or in-cloud. +PREFER\_ON\_DEVICE: Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud.
ONLY\_ON\_DEVICE: Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw.
ONLY\_IN\_CLOUD: Only attempt to make inference calls to the cloud. It will not fall back to on-device.
PREFER\_IN\_CLOUD: Attempt to make inference calls to the cloud. If not available, it will fall back to on-device. + Signature: ```typescript diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts index d765a4bbbfc..6bb84949e25 100644 --- a/packages/ai/src/types/enums.ts +++ b/packages/ai/src/types/enums.ts @@ -350,32 +350,29 @@ export type ResponseModality = * @public */ export const InferenceMode = { - /** - * Attempt to make inference calls on-device. If on-device - * inference is not available, it will fall back to cloud. - */ 'PREFER_ON_DEVICE': 'prefer_on_device', - /** - * Only attempt to make inference calls on-device. It will not - * fall back to cloud. If on-device inference is not available, - * inference methods will throw. - */ 'ONLY_ON_DEVICE': 'only_on_device', - /** - * Only attempt to make inference calls to the cloud. It will not - * fall back to on-device. - */ 'ONLY_IN_CLOUD': 'only_in_cloud', - /** - * Attempt to make inference calls to the cloud. If not available, - * it will fall back to on-device. - */ 'PREFER_IN_CLOUD': 'prefer_in_cloud' } as const; /** * (EXPERIMENTAL) * Determines whether inference happens on-device or in-cloud. + * @remarks + * PREFER_ON_DEVICE: Attempt to make inference calls on-device. + * If on-device inference is not available, it will fall back to cloud. + *
+ * ONLY_ON_DEVICE: Only attempt to make inference calls on-device. + * It will not fall back to cloud. If on-device inference is not available, + * inference methods will throw. + *
+ * ONLY_IN_CLOUD: Only attempt to make inference calls to the cloud. + * It will not fall back to on-device. + *
+ * PREFER_IN_CLOUD: Attempt to make inference calls to the cloud. + * If not available, it will fall back to on-device. + * * @public */ export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode]; From 15908c7c74f3c9b9912bc868e3852d8f828ab977 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Mon, 8 Sep 2025 12:12:57 -0700 Subject: [PATCH 15/17] move docs to where they will be seen --- docs-devsite/ai.md | 4 ++-- packages/ai/src/types/enums.ts | 26 ++++++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index b0f839bcfba..bb0caec5911 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -624,6 +624,8 @@ ImagenSafetyFilterLevel: { (EXPERIMENTAL) Determines whether inference happens on-device or in-cloud. +PREFER\_ON\_DEVICE: Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud.
ONLY\_ON\_DEVICE: Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw.
ONLY\_IN\_CLOUD: Only attempt to make inference calls to the cloud. It will not fall back to on-device.
PREFER\_IN\_CLOUD: Attempt to make inference calls to the cloud. If not available, it will fall back to on-device. + Signature: ```typescript @@ -863,8 +865,6 @@ export type ImagenSafetyFilterLevel = (typeof ImagenSafetyFilterLevel)[keyof typ (EXPERIMENTAL) Determines whether inference happens on-device or in-cloud. -PREFER\_ON\_DEVICE: Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud.
ONLY\_ON\_DEVICE: Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw.
ONLY\_IN\_CLOUD: Only attempt to make inference calls to the cloud. It will not fall back to on-device.
PREFER\_IN\_CLOUD: Attempt to make inference calls to the cloud. If not available, it will fall back to on-device. - Signature: ```typescript diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts index 6bb84949e25..b41afd3e111 100644 --- a/packages/ai/src/types/enums.ts +++ b/packages/ai/src/types/enums.ts @@ -347,18 +347,7 @@ export type ResponseModality = /** * (EXPERIMENTAL) * Determines whether inference happens on-device or in-cloud. - * @public - */ -export const InferenceMode = { - 'PREFER_ON_DEVICE': 'prefer_on_device', - 'ONLY_ON_DEVICE': 'only_on_device', - 'ONLY_IN_CLOUD': 'only_in_cloud', - 'PREFER_IN_CLOUD': 'prefer_in_cloud' -} as const; - -/** - * (EXPERIMENTAL) - * Determines whether inference happens on-device or in-cloud. + * * @remarks * PREFER_ON_DEVICE: Attempt to make inference calls on-device. * If on-device inference is not available, it will fall back to cloud. @@ -375,4 +364,17 @@ export const InferenceMode = { * * @public */ +export const InferenceMode = { + 'PREFER_ON_DEVICE': 'prefer_on_device', + 'ONLY_ON_DEVICE': 'only_on_device', + 'ONLY_IN_CLOUD': 'only_in_cloud', + 'PREFER_IN_CLOUD': 'prefer_in_cloud' +} as const; + +/** + * (EXPERIMENTAL) + * Determines whether inference happens on-device or in-cloud. + * + * @public + */ export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode]; From d165c4289450f45a4fbc82532a2d594e3adba026 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Mon, 8 Sep 2025 12:52:52 -0700 Subject: [PATCH 16/17] Update .changeset/feat-prefer-in-cloud.md Co-authored-by: rachelsaunders <52258509+rachelsaunders@users.noreply.github.com> --- .changeset/feat-prefer-in-cloud.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md index 10c1366cebc..90f859c2da8 100644 --- a/.changeset/feat-prefer-in-cloud.md +++ b/.changeset/feat-prefer-in-cloud.md @@ -3,4 +3,4 @@ "firebase": minor --- -Added a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available. +Added a new `InferenceMode` option for the hybrid on-device capability: `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use a cloud-hosted model first. If the call to the cloud-hosted model fails with a network-related error, the SDK will fall back to the on-device model, if it's available. From 4324ddd989e7824eb780b0fe1e0646534973676a Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Mon, 8 Sep 2025 12:58:41 -0700 Subject: [PATCH 17/17] Address comment --- docs-devsite/ai.md | 2 +- packages/ai/src/types/enums.ts | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md index bb0caec5911..d94c31155de 100644 --- a/docs-devsite/ai.md +++ b/docs-devsite/ai.md @@ -624,7 +624,7 @@ ImagenSafetyFilterLevel: { (EXPERIMENTAL) Determines whether inference happens on-device or in-cloud. -PREFER\_ON\_DEVICE: Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud.
ONLY\_ON\_DEVICE: Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw.
ONLY\_IN\_CLOUD: Only attempt to make inference calls to the cloud. It will not fall back to on-device.
PREFER\_IN\_CLOUD: Attempt to make inference calls to the cloud. If not available, it will fall back to on-device. +PREFER\_ON\_DEVICE: Attempt to make inference calls using an on-device model. If on-device inference is not available, the SDK will fall back to using a cloud-hosted model.
ONLY\_ON\_DEVICE: Only attempt to make inference calls using an on-device model. The SDK will not fall back to a cloud-hosted model. If on-device inference is not available, inference methods will throw.
ONLY\_IN\_CLOUD: Only attempt to make inference calls using a cloud-hosted model. The SDK will not fall back to an on-device model.
PREFER\_IN\_CLOUD: Attempt to make inference calls to a cloud-hosted model. If not available, the SDK will fall back to an on-device model. Signature: diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts index b41afd3e111..de70d325157 100644 --- a/packages/ai/src/types/enums.ts +++ b/packages/ai/src/types/enums.ts @@ -349,18 +349,20 @@ export type ResponseModality = * Determines whether inference happens on-device or in-cloud. * * @remarks - * PREFER_ON_DEVICE: Attempt to make inference calls on-device. - * If on-device inference is not available, it will fall back to cloud. + * PREFER_ON_DEVICE: Attempt to make inference calls using an + * on-device model. If on-device inference is not available, the SDK + * will fall back to using a cloud-hosted model. *
- * ONLY_ON_DEVICE: Only attempt to make inference calls on-device. - * It will not fall back to cloud. If on-device inference is not available, - * inference methods will throw. + * ONLY_ON_DEVICE: Only attempt to make inference calls using an + * on-device model. The SDK will not fall back to a cloud-hosted model. + * If on-device inference is not available, inference methods will throw. *
- * ONLY_IN_CLOUD: Only attempt to make inference calls to the cloud. - * It will not fall back to on-device. + * ONLY_IN_CLOUD: Only attempt to make inference calls using a + * cloud-hosted model. The SDK will not fall back to an on-device model. *
- * PREFER_IN_CLOUD: Attempt to make inference calls to the cloud. - * If not available, it will fall back to on-device. + * PREFER_IN_CLOUD: Attempt to make inference calls to a + * cloud-hosted model. If not available, the SDK will fall back to an + * on-device model. * * @public */