From 8ebbae43070d3cffd6bba5628712391f544a5f8f Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 11:03:59 -0700
Subject: [PATCH 01/17] feat: Add prefer_in_cloud option for inference mode

This change introduces a new InferenceMode option, prefer_in_cloud. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available.
---
 .changeset/feat-prefer-in-cloud.md           |  10 +
 common/api-review/ai.api.md                  |   2 +
 docs-devsite/ai.chromeadapter.md             |  16 ++
 docs-devsite/ai.md                           |   1 +
 packages/ai/src/methods/count-tokens.test.ts |  26 +--
 packages/ai/src/methods/count-tokens.ts      |  13 +-
 packages/ai/src/methods/generate-content.ts  |  38 ++--
 packages/ai/src/methods/helpers.test.ts      | 187 +++++++++++++++++++
 packages/ai/src/methods/helpers.ts           |  60 ++++++
 packages/ai/src/types/chrome-adapter.ts      |   6 +
 packages/ai/src/types/enums.ts               |   3 +-
 11 files changed, 317 insertions(+), 45 deletions(-)
 create mode 100644 .changeset/feat-prefer-in-cloud.md
 create mode 100644 packages/ai/src/methods/helpers.test.ts
 create mode 100644 packages/ai/src/methods/helpers.ts
diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md
new file mode 100644
index 00000000000..2ccfd0e3b67
--- /dev/null
+++ b/.changeset/feat-prefer-in-cloud.md
@@ -0,0 +1,10 @@
+---
+"@firebase/ai": minor
+"firebase": minor
+---
+
+feat: Add `prefer_in_cloud` option for inference mode
+
+This change introduces a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available.
+
+This also includes a refactoring of the logic for dispatching requests to either the on-device or cloud backends to improve clarity and remove duplication.
diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md
index 5a8e5df6ab9..4918bf17305 100644
--- a/common/api-review/ai.api.md
+++ b/common/api-review/ai.api.md
@@ -152,6 +152,7 @@ export interface ChromeAdapter {
     generateContent(request: GenerateContentRequest): Promise<Response>;
     generateContentStream(request: GenerateContentRequest): Promise<Response>;
     isAvailable(request: GenerateContentRequest): Promise<boolean>;
+    mode: InferenceMode;
 }
 
 // @public
@@ -727,6 +728,7 @@ export const InferenceMode: {
     readonly PREFER_ON_DEVICE: "prefer_on_device";
     readonly ONLY_ON_DEVICE: "only_on_device";
     readonly ONLY_IN_CLOUD: "only_in_cloud";
+    readonly PREFER_IN_CLOUD: "prefer_in_cloud";
 };
 
 // @public
diff --git a/docs-devsite/ai.chromeadapter.md b/docs-devsite/ai.chromeadapter.md
index e9207614992..7c01a0c2b9f 100644
--- a/docs-devsite/ai.chromeadapter.md
+++ b/docs-devsite/ai.chromeadapter.md
@@ -20,6 +20,12 @@ These methods should not be called directly by the user.
 export interface ChromeAdapter 
 ```
 
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [mode](./ai.chromeadapter.md#chromeadaptermode) | [InferenceMode](./ai.md#inferencemode) | The inference mode. |
+
 ## Methods
 
 |  Method | Description |
@@ -28,6 +34,16 @@ export interface ChromeAdapter
 |  [generateContentStream(request)](./ai.chromeadapter.md#chromeadaptergeneratecontentstream) | Generates a content stream using on-device inference. |
 |  [isAvailable(request)](./ai.chromeadapter.md#chromeadapterisavailable) | Checks if the on-device model is capable of handling a given request. |
 
+## ChromeAdapter.mode
+
+The inference mode.
+
+<b>Signature:</b>
+
+```typescript
+mode: InferenceMode;
+```
+
 ## ChromeAdapter.generateContent()
 
 Generates content using on-device inference.
diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
index d70b381d6fe..93c231a7324 100644
--- a/docs-devsite/ai.md
+++ b/docs-devsite/ai.md
@@ -631,6 +631,7 @@ InferenceMode: {
     readonly PREFER_ON_DEVICE: "prefer_on_device";
     readonly ONLY_ON_DEVICE: "only_on_device";
     readonly ONLY_IN_CLOUD: "only_in_cloud";
+    readonly PREFER_IN_CLOUD: "prefer_in_cloud";
 }
 ```
 
diff --git a/packages/ai/src/methods/count-tokens.test.ts b/packages/ai/src/methods/count-tokens.test.ts
index 56985b4d54e..aabf06a841a 100644
--- a/packages/ai/src/methods/count-tokens.test.ts
+++ b/packages/ai/src/methods/count-tokens.test.ts
@@ -196,24 +196,16 @@ describe('countTokens()', () => {
       );
     });
   });
-  it('on-device', async () => {
-    const chromeAdapter = fakeChromeAdapter;
-    const isAvailableStub = stub(chromeAdapter, 'isAvailable').resolves(true);
-    const mockResponse = getMockResponse(
-      'vertexAI',
-      'unary-success-total-tokens.json'
-    );
-    const countTokensStub = stub(chromeAdapter, 'countTokens').resolves(
-      mockResponse as Response
+  it('throws if mode is ONLY_ON_DEVICE', async () => {
+    const chromeAdapter = new ChromeAdapterImpl(
+      // @ts-expect-error
+      undefined,
+      InferenceMode.ONLY_ON_DEVICE
     );
-    const result = await countTokens(
-      fakeApiSettings,
-      'model',
-      fakeRequestParams,
-      chromeAdapter
+    await expect(
+      countTokens(fakeApiSettings, 'model', fakeRequestParams, chromeAdapter)
+    ).to.be.rejectedWith(
+      /countTokens\(\) is not supported for on-device models/
     );
-    expect(result.totalTokens).eq(6);
-    expect(isAvailableStub).to.be.called;
-    expect(countTokensStub).to.be.calledWith(fakeRequestParams);
   });
 });
diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts
index 00dde84ab48..c6041a0bb99 100644
--- a/packages/ai/src/methods/count-tokens.ts
+++ b/packages/ai/src/methods/count-tokens.ts
@@ -15,10 +15,13 @@
  * limitations under the License.
  */
 
+import { AIError } from '../errors';
 import {
   CountTokensRequest,
   CountTokensResponse,
-  RequestOptions
+  InferenceMode,
+  RequestOptions,
+  AIErrorCode
 } from '../types';
 import { Task, makeRequest } from '../requests/request';
 import { ApiSettings } from '../types/internal';
@@ -57,9 +60,11 @@ export async function countTokens(
   chromeAdapter?: ChromeAdapter,
   requestOptions?: RequestOptions
 ): Promise<CountTokensResponse> {
-  if (chromeAdapter && (await chromeAdapter.isAvailable(params))) {
-    return (await chromeAdapter.countTokens(params)).json();
+  if (chromeAdapter?.mode === InferenceMode.ONLY_ON_DEVICE) {
+    throw new AIError(
+      AIErrorCode.UNSUPPORTED,
+      'countTokens() is not supported for on-device models.'
+    );
   }
-
   return countTokensOnCloud(apiSettings, model, params, requestOptions);
 }
diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts
index 2c1c383641f..b07ca704d53 100644
--- a/packages/ai/src/methods/generate-content.ts
+++ b/packages/ai/src/methods/generate-content.ts
@@ -9,7 +9,7 @@
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is distributed on an "AS-IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
@@ -29,6 +29,7 @@ import { ApiSettings } from '../types/internal';
 import * as GoogleAIMapper from '../googleai-mappers';
 import { BackendType } from '../public-types';
 import { ChromeAdapter } from '../types/chrome-adapter';
+import { callCloudOrDevice } from './helpers';
 
 async function generateContentStreamOnCloud(
   apiSettings: ApiSettings,
@@ -56,17 +57,13 @@ export async function generateContentStream(
   chromeAdapter?: ChromeAdapter,
   requestOptions?: RequestOptions
 ): Promise<GenerateContentStreamResult> {
-  let response;
-  if (chromeAdapter && (await chromeAdapter.isAvailable(params))) {
-    response = await chromeAdapter.generateContentStream(params);
-  } else {
-    response = await generateContentStreamOnCloud(
-      apiSettings,
-      model,
-      params,
-      requestOptions
-    );
-  }
+  const response = await callCloudOrDevice(
+    params,
+    chromeAdapter,
+    () => chromeAdapter!.generateContentStream(params),
+    () =>
+      generateContentStreamOnCloud(apiSettings, model, params, requestOptions)
+  );
   return processStream(response, apiSettings); // TODO: Map streaming responses
 }
 
@@ -96,17 +93,12 @@ export async function generateContent(
   chromeAdapter?: ChromeAdapter,
   requestOptions?: RequestOptions
 ): Promise<GenerateContentResult> {
-  let response;
-  if (chromeAdapter && (await chromeAdapter.isAvailable(params))) {
-    response = await chromeAdapter.generateContent(params);
-  } else {
-    response = await generateContentOnCloud(
-      apiSettings,
-      model,
-      params,
-      requestOptions
-    );
-  }
+  const response = await callCloudOrDevice(
+    params,
+    chromeAdapter,
+    () => chromeAdapter!.generateContent(params),
+    () => generateContentOnCloud(apiSettings, model, params, requestOptions)
+  );
   const generateContentResponse = await processGenerateContentResponse(
     response,
     apiSettings
diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts
new file mode 100644
index 00000000000..cb89ea93435
--- /dev/null
+++ b/packages/ai/src/methods/helpers.test.ts
@@ -0,0 +1,187 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { expect } from 'chai';
+import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon';
+import { callCloudOrDevice } from './helpers';
+import {
+  ChromeAdapter,
+  GenerateContentRequest,
+  InferenceMode,
+  AIErrorCode
+} from '../types';
+import { AIError } from '../errors';
+
+describe('callCloudOrDevice', () => {
+  let chromeAdapter: SinonStubbedInstance<ChromeAdapter>;
+  let onDeviceCall: SinonStub;
+  let inCloudCall: SinonStub;
+  let request: GenerateContentRequest;
+
+  beforeEach(() => {
+    chromeAdapter = {
+      mode: InferenceMode.PREFER_ON_DEVICE,
+      isAvailable: stub(),
+      generateContent: stub(),
+      generateContentStream: stub(),
+      countTokens: stub()
+    };
+    onDeviceCall = stub().resolves('on-device-response');
+    inCloudCall = stub().resolves('in-cloud-response');
+    request = { contents: [] };
+  });
+
+  afterEach(() => {
+    restore();
+  });
+
+  it('should call inCloudCall if chromeAdapter is undefined', async () => {
+    const result = await callCloudOrDevice(
+      request,
+      undefined,
+      onDeviceCall,
+      inCloudCall
+    );
+    expect(result).to.equal('in-cloud-response');
+    expect(inCloudCall).to.have.been.calledOnce;
+    expect(onDeviceCall).to.not.have.been.called;
+  });
+
+  describe('PREFER_ON_DEVICE mode', () => {
+    beforeEach(() => {
+      chromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE;
+    });
+
+    it('should call onDeviceCall if available', async () => {
+      chromeAdapter.isAvailable.resolves(true);
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('on-device-response');
+      expect(onDeviceCall).to.have.been.calledOnce;
+      expect(inCloudCall).to.not.have.been.called;
+    });
+
+    it('should call inCloudCall if not available', async () => {
+      chromeAdapter.isAvailable.resolves(false);
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('in-cloud-response');
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.not.have.been.called;
+    });
+  });
+
+  describe('ONLY_ON_DEVICE mode', () => {
+    beforeEach(() => {
+      chromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE;
+    });
+
+    it('should call onDeviceCall if available', async () => {
+      chromeAdapter.isAvailable.resolves(true);
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('on-device-response');
+      expect(onDeviceCall).to.have.been.calledOnce;
+      expect(inCloudCall).to.not.have.been.called;
+    });
+
+    it('should call inCloudCall if not available', async () => {
+      chromeAdapter.isAvailable.resolves(false);
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('in-cloud-response');
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.not.have.been.called;
+    });
+  });
+
+  describe('ONLY_IN_CLOUD mode', () => {
+    beforeEach(() => {
+      chromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD;
+    });
+
+    it('should call inCloudCall even if on-device is available', async () => {
+      chromeAdapter.isAvailable.resolves(true);
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('in-cloud-response');
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.not.have.been.called;
+    });
+  });
+
+  describe('PREFER_IN_CLOUD mode', () => {
+    beforeEach(() => {
+      chromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD;
+    });
+
+    it('should call inCloudCall first', async () => {
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('in-cloud-response');
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.not.have.been.called;
+    });
+
+    it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => {
+      inCloudCall.rejects(new AIError(AIErrorCode.FETCH_ERROR, 'Network error'));
+      const result = await callCloudOrDevice(
+        request,
+        chromeAdapter,
+        onDeviceCall,
+        inCloudCall
+      );
+      expect(result).to.equal('on-device-response');
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.have.been.calledOnce;
+    });
+
+    it('should re-throw other errors from inCloudCall', async () => {
+      const error = new Error('Some other error');
+      inCloudCall.rejects(error);
+      await expect(
+        callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall)
+      ).to.be.rejectedWith(error);
+      expect(inCloudCall).to.have.been.calledOnce;
+      expect(onDeviceCall).to.not.have.been.called;
+    });
+  });
+});
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts
new file mode 100644
index 00000000000..de0651e5da5
--- /dev/null
+++ b/packages/ai/src/methods/helpers.ts
@@ -0,0 +1,60 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { AIError } from '../errors';
+import {
+  GenerateContentRequest,
+  InferenceMode
+} from '../types';
+import { ChromeAdapter } from '../types/chrome-adapter';
+
+/**
+ * Dispatches a request to the appropriate backend (on-device or in-cloud)
+ * based on the inference mode.
+ *
+ * @param request - The request to be sent.
+ * @param chromeAdapter - The on-device model adapter.
+ * @param onDeviceCall - The function to call for on-device inference.
+ * @param inCloudCall - The function to call for in-cloud inference.
+ * @returns The response from the backend.
+ */
+export async function callCloudOrDevice<Response>(
+  request: GenerateContentRequest,
+  chromeAdapter: ChromeAdapter | undefined,
+  onDeviceCall: () => Promise<Response>,
+  inCloudCall: () => Promise<Response>
+): Promise<Response> {
+  if (!chromeAdapter) {
+    return inCloudCall();
+  }
+  switch (chromeAdapter.mode) {
+    case InferenceMode.PREFER_IN_CLOUD:
+      try {
+        return await inCloudCall();
+      } catch (e) {
+        if (e instanceof AIError) {
+          return onDeviceCall();
+        }
+        throw e;
+      }
+    default:
+      if (await chromeAdapter.isAvailable(request)) {
+        return onDeviceCall();
+      }
+      return inCloudCall();
+  }
+}
diff --git a/packages/ai/src/types/chrome-adapter.ts b/packages/ai/src/types/chrome-adapter.ts
index 9ec0dc2a0ab..6702a227fd8 100644
--- a/packages/ai/src/types/chrome-adapter.ts
+++ b/packages/ai/src/types/chrome-adapter.ts
@@ -16,6 +16,7 @@
  */
 
 import { CountTokensRequest, GenerateContentRequest } from './requests';
+import { InferenceMode } from './enums';
 
 /**
  * <b>(EXPERIMENTAL)</b> Defines an inference "backend" that uses Chrome's on-device model,
@@ -27,6 +28,11 @@ import { CountTokensRequest, GenerateContentRequest } from './requests';
  * @public
  */
 export interface ChromeAdapter {
+  /**
+   * The inference mode.
+   */
+  mode: InferenceMode;
+
   /**
    * Checks if the on-device model is capable of handling a given
    * request.
diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index 701cd4a695d..a161185b470 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -352,7 +352,8 @@ export type ResponseModality =
 export const InferenceMode = {
   'PREFER_ON_DEVICE': 'prefer_on_device',
   'ONLY_ON_DEVICE': 'only_on_device',
-  'ONLY_IN_CLOUD': 'only_in_cloud'
+  'ONLY_IN_CLOUD': 'only_in_cloud',
+  'PREFER_IN_CLOUD': 'prefer_in_cloud'
 } as const;
 
 /**

From 4e393902ec1a6766029aa4debd0c62839f215795 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 13:59:29 -0700
Subject: [PATCH 02/17] test: Add comprehensive dispatch logic tests for
 GenerativeModel

This commit adds a new test suite to verify that the GenerativeModel's methods correctly dispatch requests to either the on-device or cloud backends based on the selected InferenceMode. It covers generateContent, generateContentStream, and countTokens.
---
 packages/ai/src/methods/helpers.test.ts       |   4 +-
 packages/ai/src/methods/helpers.ts            |   5 +-
 .../ai/src/models/generative-model.test.ts    | 250 +++++++++++++++++-
 3 files changed, 252 insertions(+), 7 deletions(-)

diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts
index cb89ea93435..e45f9ba82c4 100644
--- a/packages/ai/src/methods/helpers.test.ts
+++ b/packages/ai/src/methods/helpers.test.ts
@@ -162,7 +162,9 @@ describe('callCloudOrDevice', () => {
     });
 
     it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => {
-      inCloudCall.rejects(new AIError(AIErrorCode.FETCH_ERROR, 'Network error'));
+      inCloudCall.rejects(
+        new AIError(AIErrorCode.FETCH_ERROR, 'Network error')
+      );
       const result = await callCloudOrDevice(
         request,
         chromeAdapter,
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts
index de0651e5da5..cda9367ada2 100644
--- a/packages/ai/src/methods/helpers.ts
+++ b/packages/ai/src/methods/helpers.ts
@@ -16,10 +16,7 @@
  */
 
 import { AIError } from '../errors';
-import {
-  GenerateContentRequest,
-  InferenceMode
-} from '../types';
+import { GenerateContentRequest, InferenceMode } from '../types';
 import { ChromeAdapter } from '../types/chrome-adapter';
 
 /**
diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts
index 68f1565b26a..4af67a9cfa3 100644
--- a/packages/ai/src/models/generative-model.test.ts
+++ b/packages/ai/src/models/generative-model.test.ts
@@ -16,13 +16,20 @@
  */
 import { use, expect } from 'chai';
 import { GenerativeModel } from './generative-model';
-import { FunctionCallingMode, AI, InferenceMode } from '../public-types';
+import {
+  FunctionCallingMode,
+  AI,
+  InferenceMode,
+  AIErrorCode
+} from '../public-types';
 import * as request from '../requests/request';
-import { match, restore, stub } from 'sinon';
+import { SinonStub, match, restore, stub } from 'sinon';
 import { getMockResponse } from '../../test-utils/mock-response';
 import sinonChai from 'sinon-chai';
 import { VertexAIBackend } from '../backend';
+import { ChromeAdapter } from '../types/chrome-adapter';
 import { ChromeAdapterImpl } from '../methods/chrome-adapter';
+import { AIError } from '../errors';
 
 use(sinonChai);
 
@@ -406,3 +413,242 @@ describe('GenerativeModel', () => {
     restore();
   });
 });
+
+describe('GenerativeModel dispatch logic', () => {
+  let makeRequestStub: SinonStub;
+  let mockChromeAdapter: ChromeAdapter;
+
+  beforeEach(() => {
+    makeRequestStub = stub(request, 'makeRequest').resolves(
+      getMockResponse(
+        'vertexAI',
+        'unary-success-basic-reply-short.json'
+      ) as Response
+    );
+    mockChromeAdapter = {
+      isAvailable: stub(),
+      generateContent: stub().resolves({} as Response),
+      generateContentStream: stub().resolves({} as Response),
+      countTokens: stub().resolves({} as Response),
+      mode: InferenceMode.PREFER_ON_DEVICE
+    };
+  });
+
+  afterEach(() => {
+    restore();
+  });
+
+  describe('PREFER_ON_DEVICE', () => {
+    beforeEach(() => {
+      mockChromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE;
+    });
+    it('should use on-device for generateContent when available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(mockChromeAdapter.generateContent).to.have.been.calledOnce;
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+    it('should use cloud for generateContent when on-device is not available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+    it('should use on-device for generateContentStream when available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce;
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+    it('should use cloud for generateContentStream when on-device is not available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(mockChromeAdapter.generateContentStream).to.not.have.been.called;
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+    it('should use cloud for countTokens', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.countTokens('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+  });
+
+  describe('ONLY_ON_DEVICE', () => {
+    beforeEach(() => {
+      mockChromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE;
+    });
+    it('should use on-device for generateContent when available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(mockChromeAdapter.generateContent).to.have.been.calledOnce;
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+    it('should use cloud for generateContent when on-device is not available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+    it('should throw for countTokens', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await expect(model.countTokens('hello')).to.be.rejectedWith(
+        /countTokens\(\) is not supported for on-device models/
+      );
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+  });
+
+  describe('ONLY_IN_CLOUD', () => {
+    beforeEach(() => {
+      mockChromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD;
+    });
+    it('should use cloud for generateContent even when on-device is available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+    it('should use cloud for generateContentStream even when on-device is available', async () => {
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(mockChromeAdapter.generateContentStream).to.not.have.been.called;
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+    it('should use cloud for countTokens', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.countTokens('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+  });
+
+  describe('PREFER_IN_CLOUD', () => {
+    beforeEach(() => {
+      mockChromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD;
+    });
+    it('should use cloud for generateContent when available', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
+    });
+    it('should fall back to on-device for generateContent if cloud fails', async () => {
+      makeRequestStub.rejects(
+        new AIError(AIErrorCode.FETCH_ERROR, 'Network error')
+      );
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContent('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContent).to.have.been.calledOnce;
+    });
+    it('should use cloud for generateContentStream when available', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContentStream).to.not.have.been.called;
+    });
+    it('should fall back to on-device for generateContentStream if cloud fails', async () => {
+      makeRequestStub.rejects(
+        new AIError(AIErrorCode.FETCH_ERROR, 'Network error')
+      );
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce;
+    });
+    it('should use cloud for countTokens', async () => {
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.countTokens('hello');
+      expect(makeRequestStub).to.have.been.calledOnce;
+    });
+  });
+});

From 6619ed421033a6dc220d19d7ba8dbd99b69ef8be Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 15:16:07 -0700
Subject: [PATCH 03/17] fix tests

---
 packages/ai/src/methods/helpers.test.ts       | 21 ++--
 packages/ai/src/methods/helpers.ts            | 14 ++-
 .../ai/src/models/generative-model.test.ts    | 99 +++++++++++++++----
 3 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts
index e45f9ba82c4..8b9541fa58e 100644
--- a/packages/ai/src/methods/helpers.test.ts
+++ b/packages/ai/src/methods/helpers.test.ts
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-import { expect } from 'chai';
+import { use, expect } from 'chai';
 import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon';
 import { callCloudOrDevice } from './helpers';
 import {
@@ -25,6 +25,11 @@ import {
   AIErrorCode
 } from '../types';
 import { AIError } from '../errors';
+import sinonChai from 'sinon-chai';
+import chaiAsPromised from 'chai-as-promised';
+
+use(sinonChai);
+use(chaiAsPromised);
 
 describe('callCloudOrDevice', () => {
   let chromeAdapter: SinonStubbedInstance<ChromeAdapter>;
@@ -111,16 +116,12 @@ describe('callCloudOrDevice', () => {
       expect(inCloudCall).to.not.have.been.called;
     });
 
-    it('should call inCloudCall if not available', async () => {
+    it('should throw if not available', async () => {
       chromeAdapter.isAvailable.resolves(false);
-      const result = await callCloudOrDevice(
-        request,
-        chromeAdapter,
-        onDeviceCall,
-        inCloudCall
-      );
-      expect(result).to.equal('in-cloud-response');
-      expect(inCloudCall).to.have.been.calledOnce;
+      await expect(
+        callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall)
+      ).to.be.rejectedWith(/On-device model is not available/);
+      expect(inCloudCall).to.not.have.been.called;
       expect(onDeviceCall).to.not.have.been.called;
     });
   });
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts
index cda9367ada2..a6a9e309134 100644
--- a/packages/ai/src/methods/helpers.ts
+++ b/packages/ai/src/methods/helpers.ts
@@ -16,7 +16,7 @@
  */
 
 import { AIError } from '../errors';
-import { GenerateContentRequest, InferenceMode } from '../types';
+import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types';
 import { ChromeAdapter } from '../types/chrome-adapter';
 
 /**
@@ -39,6 +39,16 @@ export async function callCloudOrDevice<Response>(
     return inCloudCall();
   }
   switch (chromeAdapter.mode) {
+    case InferenceMode.ONLY_ON_DEVICE:
+      if (await chromeAdapter.isAvailable(request)) {
+        return onDeviceCall();
+      }
+      throw new AIError(
+        AIErrorCode.UNSUPPORTED,
+        'On-device model is not available.'
+      );
+    case InferenceMode.ONLY_IN_CLOUD:
+      return inCloudCall();
     case InferenceMode.PREFER_IN_CLOUD:
       try {
         return await inCloudCall();
@@ -48,7 +58,7 @@ export async function callCloudOrDevice<Response>(
         }
         throw e;
       }
-    default:
+    default: // PREFER_ON_DEVICE
       if (await chromeAdapter.isAvailable(request)) {
         return onDeviceCall();
       }
diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts
index 4af67a9cfa3..7646a15d1d3 100644
--- a/packages/ai/src/models/generative-model.test.ts
+++ b/packages/ai/src/models/generative-model.test.ts
@@ -24,14 +24,19 @@ import {
 } from '../public-types';
 import * as request from '../requests/request';
 import { SinonStub, match, restore, stub } from 'sinon';
-import { getMockResponse } from '../../test-utils/mock-response';
+import {
+  getMockResponse,
+  getMockResponseStreaming
+} from '../../test-utils/mock-response';
 import sinonChai from 'sinon-chai';
 import { VertexAIBackend } from '../backend';
 import { ChromeAdapter } from '../types/chrome-adapter';
 import { ChromeAdapterImpl } from '../methods/chrome-adapter';
 import { AIError } from '../errors';
+import chaiAsPromised from 'chai-as-promised';
 
 use(sinonChai);
+use(chaiAsPromised);
 
 const fakeAI: AI = {
   app: {
@@ -418,18 +423,32 @@ describe('GenerativeModel dispatch logic', () => {
   let makeRequestStub: SinonStub;
   let mockChromeAdapter: ChromeAdapter;
 
+  function stubMakeRequest(stream?: boolean): void {
+    if (stream) {
+      makeRequestStub = stub(request, 'makeRequest').resolves(
+        getMockResponseStreaming(
+          'vertexAI',
+          'unary-success-basic-reply-short.json'
+        ) as Response
+      );
+    } else {
+      makeRequestStub = stub(request, 'makeRequest').resolves(
+        getMockResponse(
+          'vertexAI',
+          'unary-success-basic-reply-short.json'
+        ) as Response
+      );
+    }
+  }
+
   beforeEach(() => {
-    makeRequestStub = stub(request, 'makeRequest').resolves(
-      getMockResponse(
-        'vertexAI',
-        'unary-success-basic-reply-short.json'
-      ) as Response
-    );
     mockChromeAdapter = {
       isAvailable: stub(),
-      generateContent: stub().resolves({} as Response),
-      generateContentStream: stub().resolves({} as Response),
-      countTokens: stub().resolves({} as Response),
+      generateContent: stub().resolves(new Response(JSON.stringify({}))),
+      generateContentStream: stub().resolves(
+        new Response(new ReadableStream())
+      ),
+      countTokens: stub().resolves(new Response(JSON.stringify({}))),
       mode: InferenceMode.PREFER_ON_DEVICE
     };
   });
@@ -443,6 +462,7 @@ describe('GenerativeModel dispatch logic', () => {
       mockChromeAdapter.mode = InferenceMode.PREFER_ON_DEVICE;
     });
     it('should use on-device for generateContent when available', async () => {
+      stubMakeRequest();
       (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
       const model = new GenerativeModel(
         fakeAI,
@@ -455,6 +475,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(makeRequestStub).to.not.have.been.called;
     });
     it('should use cloud for generateContent when on-device is not available', async () => {
+      stubMakeRequest();
       (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
       const model = new GenerativeModel(
         fakeAI,
@@ -467,6 +488,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(makeRequestStub).to.have.been.calledOnce;
     });
     it('should use on-device for generateContentStream when available', async () => {
+      stubMakeRequest(true);
       (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
       const model = new GenerativeModel(
         fakeAI,
@@ -479,6 +501,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(makeRequestStub).to.not.have.been.called;
     });
     it('should use cloud for generateContentStream when on-device is not available', async () => {
+      stubMakeRequest(true);
       (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
       const model = new GenerativeModel(
         fakeAI,
@@ -491,6 +514,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(makeRequestStub).to.have.been.calledOnce;
     });
     it('should use cloud for countTokens', async () => {
+      stubMakeRequest();
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },
@@ -507,6 +531,7 @@ describe('GenerativeModel dispatch logic', () => {
       mockChromeAdapter.mode = InferenceMode.ONLY_ON_DEVICE;
     });
     it('should use on-device for generateContent when available', async () => {
+      stubMakeRequest();
       (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
       const model = new GenerativeModel(
         fakeAI,
@@ -518,7 +543,8 @@ describe('GenerativeModel dispatch logic', () => {
       expect(mockChromeAdapter.generateContent).to.have.been.calledOnce;
       expect(makeRequestStub).to.not.have.been.called;
     });
-    it('should use cloud for generateContent when on-device is not available', async () => {
+    it('generateContent should throw when on-device is not available', async () => {
+      stubMakeRequest();
       (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
       const model = new GenerativeModel(
         fakeAI,
@@ -526,20 +552,49 @@ describe('GenerativeModel dispatch logic', () => {
         {},
         mockChromeAdapter
       );
-      await model.generateContent('hello');
+      await expect(model.generateContent('hello')).to.be.rejectedWith(
+        /On-device model is not available/
+      );
       expect(mockChromeAdapter.generateContent).to.not.have.been.called;
-      expect(makeRequestStub).to.have.been.calledOnce;
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+    it('should use on-device for generateContentStream when available', async () => {
+      stubMakeRequest(true);
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await model.generateContentStream('hello');
+      expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce;
+      expect(makeRequestStub).to.not.have.been.called;
     });
-    it('should throw for countTokens', async () => {
+    it('generateContentStream should throw when on-device is not available', async () => {
+      stubMakeRequest(true);
+      (mockChromeAdapter.isAvailable as SinonStub).resolves(false);
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },
         {},
         mockChromeAdapter
       );
-      await expect(model.countTokens('hello')).to.be.rejectedWith(
-        /countTokens\(\) is not supported for on-device models/
+      await expect(model.generateContentStream('hello')).to.be.rejectedWith(
+        /On-device model is not available/
       );
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
+      expect(makeRequestStub).to.not.have.been.called;
+    });
+    it('should always throw for countTokens', async () => {
+      stubMakeRequest();
+      const model = new GenerativeModel(
+        fakeAI,
+        { model: 'model' },
+        {},
+        mockChromeAdapter
+      );
+      await expect(model.countTokens('hello')).to.be.rejectedWith(AIError);
       expect(makeRequestStub).to.not.have.been.called;
     });
   });
@@ -549,6 +604,7 @@ describe('GenerativeModel dispatch logic', () => {
       mockChromeAdapter.mode = InferenceMode.ONLY_IN_CLOUD;
     });
     it('should use cloud for generateContent even when on-device is available', async () => {
+      stubMakeRequest();
       (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
       const model = new GenerativeModel(
         fakeAI,
@@ -557,10 +613,11 @@ describe('GenerativeModel dispatch logic', () => {
         mockChromeAdapter
       );
       await model.generateContent('hello');
-      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
       expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContent).to.not.have.been.called;
     });
     it('should use cloud for generateContentStream even when on-device is available', async () => {
+      stubMakeRequest(true);
       (mockChromeAdapter.isAvailable as SinonStub).resolves(true);
       const model = new GenerativeModel(
         fakeAI,
@@ -569,10 +626,11 @@ describe('GenerativeModel dispatch logic', () => {
         mockChromeAdapter
       );
       await model.generateContentStream('hello');
-      expect(mockChromeAdapter.generateContentStream).to.not.have.been.called;
       expect(makeRequestStub).to.have.been.calledOnce;
+      expect(mockChromeAdapter.generateContentStream).to.not.have.been.called;
     });
-    it('should use cloud for countTokens', async () => {
+    it('should always use cloud for countTokens', async () => {
+      stubMakeRequest();
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },
@@ -589,6 +647,7 @@ describe('GenerativeModel dispatch logic', () => {
       mockChromeAdapter.mode = InferenceMode.PREFER_IN_CLOUD;
     });
     it('should use cloud for generateContent when available', async () => {
+      stubMakeRequest();
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },
@@ -615,6 +674,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(mockChromeAdapter.generateContent).to.have.been.calledOnce;
     });
     it('should use cloud for generateContentStream when available', async () => {
+      stubMakeRequest(true);
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },
@@ -641,6 +701,7 @@ describe('GenerativeModel dispatch logic', () => {
       expect(mockChromeAdapter.generateContentStream).to.have.been.calledOnce;
     });
     it('should use cloud for countTokens', async () => {
+      stubMakeRequest();
       const model = new GenerativeModel(
         fakeAI,
         { model: 'model' },

From cadfd095c226c96ee7097c9bc9f7502c9711e184 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 15:19:41 -0700
Subject: [PATCH 04/17] stop publicly exposing mode

---
 common/api-review/ai.api.md             |  1 -
 docs-devsite/ai.chromeadapter.md        | 16 ----------------
 packages/ai/src/methods/helpers.ts      |  4 ++--
 packages/ai/src/types/chrome-adapter.ts |  6 ------
 4 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md
index 4918bf17305..43a10169cfa 100644
--- a/common/api-review/ai.api.md
+++ b/common/api-review/ai.api.md
@@ -152,7 +152,6 @@ export interface ChromeAdapter {
     generateContent(request: GenerateContentRequest): Promise<Response>;
     generateContentStream(request: GenerateContentRequest): Promise<Response>;
     isAvailable(request: GenerateContentRequest): Promise<boolean>;
-    mode: InferenceMode;
 }
 
 // @public
diff --git a/docs-devsite/ai.chromeadapter.md b/docs-devsite/ai.chromeadapter.md
index 7c01a0c2b9f..e9207614992 100644
--- a/docs-devsite/ai.chromeadapter.md
+++ b/docs-devsite/ai.chromeadapter.md
@@ -20,12 +20,6 @@ These methods should not be called directly by the user.
 export interface ChromeAdapter 
 ```
 
-## Properties
-
-|  Property | Type | Description |
-|  --- | --- | --- |
-|  [mode](./ai.chromeadapter.md#chromeadaptermode) | [InferenceMode](./ai.md#inferencemode) | The inference mode. |
-
 ## Methods
 
 |  Method | Description |
@@ -34,16 +28,6 @@ export interface ChromeAdapter
 |  [generateContentStream(request)](./ai.chromeadapter.md#chromeadaptergeneratecontentstream) | Generates a content stream using on-device inference. |
 |  [isAvailable(request)](./ai.chromeadapter.md#chromeadapterisavailable) | Checks if the on-device model is capable of handling a given request. |
 
-## ChromeAdapter.mode
-
-The inference mode.
-
-<b>Signature:</b>
-
-```typescript
-mode: InferenceMode;
-```
-
 ## ChromeAdapter.generateContent()
 
 Generates content using on-device inference.
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts
index a6a9e309134..6325ab97133 100644
--- a/packages/ai/src/methods/helpers.ts
+++ b/packages/ai/src/methods/helpers.ts
@@ -17,7 +17,7 @@
 
 import { AIError } from '../errors';
 import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types';
-import { ChromeAdapter } from '../types/chrome-adapter';
+import { ChromeAdapterImpl } from './chrome-adapter';
 
 /**
  * Dispatches a request to the appropriate backend (on-device or in-cloud)
@@ -31,7 +31,7 @@ import { ChromeAdapter } from '../types/chrome-adapter';
  */
 export async function callCloudOrDevice<Response>(
   request: GenerateContentRequest,
-  chromeAdapter: ChromeAdapter | undefined,
+  chromeAdapter: ChromeAdapterImpl | undefined,
   onDeviceCall: () => Promise<Response>,
   inCloudCall: () => Promise<Response>
 ): Promise<Response> {
diff --git a/packages/ai/src/types/chrome-adapter.ts b/packages/ai/src/types/chrome-adapter.ts
index 6702a227fd8..9ec0dc2a0ab 100644
--- a/packages/ai/src/types/chrome-adapter.ts
+++ b/packages/ai/src/types/chrome-adapter.ts
@@ -16,7 +16,6 @@
  */
 
 import { CountTokensRequest, GenerateContentRequest } from './requests';
-import { InferenceMode } from './enums';
 
 /**
  * <b>(EXPERIMENTAL)</b> Defines an inference "backend" that uses Chrome's on-device model,
@@ -28,11 +27,6 @@ import { InferenceMode } from './enums';
  * @public
  */
 export interface ChromeAdapter {
-  /**
-   * The inference mode.
-   */
-  mode: InferenceMode;
-
   /**
    * Checks if the on-device model is capable of handling a given
    * request.

From 36deac4271b4853f71a63fb83327222befd70e14 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 15:32:39 -0700
Subject: [PATCH 05/17] fix ts errors

---
 packages/ai/src/methods/count-tokens.ts         |  3 ++-
 packages/ai/src/methods/helpers.test.ts         |  5 +++--
 packages/ai/src/methods/helpers.ts              | 11 ++++++++---
 packages/ai/src/models/generative-model.test.ts |  4 ++--
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts
index c6041a0bb99..d3974641881 100644
--- a/packages/ai/src/methods/count-tokens.ts
+++ b/packages/ai/src/methods/count-tokens.ts
@@ -28,6 +28,7 @@ import { ApiSettings } from '../types/internal';
 import * as GoogleAIMapper from '../googleai-mappers';
 import { BackendType } from '../public-types';
 import { ChromeAdapter } from '../types/chrome-adapter';
+import { ChromeAdapterImpl } from './chrome-adapter';
 
 export async function countTokensOnCloud(
   apiSettings: ApiSettings,
@@ -60,7 +61,7 @@ export async function countTokens(
   chromeAdapter?: ChromeAdapter,
   requestOptions?: RequestOptions
 ): Promise<CountTokensResponse> {
-  if (chromeAdapter?.mode === InferenceMode.ONLY_ON_DEVICE) {
+  if ((chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE) {
     throw new AIError(
       AIErrorCode.UNSUPPORTED,
       'countTokens() is not supported for on-device models.'
diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts
index 8b9541fa58e..13d789d5ec1 100644
--- a/packages/ai/src/methods/helpers.test.ts
+++ b/packages/ai/src/methods/helpers.test.ts
@@ -19,7 +19,6 @@ import { use, expect } from 'chai';
 import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon';
 import { callCloudOrDevice } from './helpers';
 import {
-  ChromeAdapter,
   GenerateContentRequest,
   InferenceMode,
   AIErrorCode
@@ -27,17 +26,19 @@ import {
 import { AIError } from '../errors';
 import sinonChai from 'sinon-chai';
 import chaiAsPromised from 'chai-as-promised';
+import { ChromeAdapterImpl } from './chrome-adapter';
 
 use(sinonChai);
 use(chaiAsPromised);
 
 describe('callCloudOrDevice', () => {
-  let chromeAdapter: SinonStubbedInstance<ChromeAdapter>;
+  let chromeAdapter: SinonStubbedInstance<ChromeAdapterImpl>;
   let onDeviceCall: SinonStub;
   let inCloudCall: SinonStub;
   let request: GenerateContentRequest;
 
   beforeEach(() => {
+    // @ts-ignore
     chromeAdapter = {
       mode: InferenceMode.PREFER_ON_DEVICE,
       isAvailable: stub(),
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/methods/helpers.ts
index 6325ab97133..4adde787935 100644
--- a/packages/ai/src/methods/helpers.ts
+++ b/packages/ai/src/methods/helpers.ts
@@ -16,7 +16,12 @@
  */
 
 import { AIError } from '../errors';
-import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types';
+import {
+  GenerateContentRequest,
+  InferenceMode,
+  AIErrorCode,
+  ChromeAdapter
+} from '../types';
 import { ChromeAdapterImpl } from './chrome-adapter';
 
 /**
@@ -31,14 +36,14 @@ import { ChromeAdapterImpl } from './chrome-adapter';
  */
 export async function callCloudOrDevice<Response>(
   request: GenerateContentRequest,
-  chromeAdapter: ChromeAdapterImpl | undefined,
+  chromeAdapter: ChromeAdapter | undefined,
   onDeviceCall: () => Promise<Response>,
   inCloudCall: () => Promise<Response>
 ): Promise<Response> {
   if (!chromeAdapter) {
     return inCloudCall();
   }
-  switch (chromeAdapter.mode) {
+  switch ((chromeAdapter as ChromeAdapterImpl).mode) {
     case InferenceMode.ONLY_ON_DEVICE:
       if (await chromeAdapter.isAvailable(request)) {
         return onDeviceCall();
diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts
index 7646a15d1d3..eb53eeb2eb7 100644
--- a/packages/ai/src/models/generative-model.test.ts
+++ b/packages/ai/src/models/generative-model.test.ts
@@ -30,7 +30,6 @@ import {
 } from '../../test-utils/mock-response';
 import sinonChai from 'sinon-chai';
 import { VertexAIBackend } from '../backend';
-import { ChromeAdapter } from '../types/chrome-adapter';
 import { ChromeAdapterImpl } from '../methods/chrome-adapter';
 import { AIError } from '../errors';
 import chaiAsPromised from 'chai-as-promised';
@@ -421,7 +420,7 @@ describe('GenerativeModel', () => {
 
 describe('GenerativeModel dispatch logic', () => {
   let makeRequestStub: SinonStub;
-  let mockChromeAdapter: ChromeAdapter;
+  let mockChromeAdapter: ChromeAdapterImpl;
 
   function stubMakeRequest(stream?: boolean): void {
     if (stream) {
@@ -442,6 +441,7 @@ describe('GenerativeModel dispatch logic', () => {
   }
 
   beforeEach(() => {
+    // @ts-ignore
     mockChromeAdapter = {
       isAvailable: stub(),
       generateContent: stub().resolves(new Response(JSON.stringify({}))),

From 0aadd2e6c6995fec4566f556a29da52a9ee054a3 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Tue, 2 Sep 2025 15:33:58 -0700
Subject: [PATCH 06/17] format

---
 packages/ai/src/methods/count-tokens.ts | 4 +++-
 packages/ai/src/methods/helpers.test.ts | 6 +-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/packages/ai/src/methods/count-tokens.ts b/packages/ai/src/methods/count-tokens.ts
index d3974641881..ecd86a82912 100644
--- a/packages/ai/src/methods/count-tokens.ts
+++ b/packages/ai/src/methods/count-tokens.ts
@@ -61,7 +61,9 @@ export async function countTokens(
   chromeAdapter?: ChromeAdapter,
   requestOptions?: RequestOptions
 ): Promise<CountTokensResponse> {
-  if ((chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE) {
+  if (
+    (chromeAdapter as ChromeAdapterImpl)?.mode === InferenceMode.ONLY_ON_DEVICE
+  ) {
     throw new AIError(
       AIErrorCode.UNSUPPORTED,
       'countTokens() is not supported for on-device models.'
diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/methods/helpers.test.ts
index 13d789d5ec1..0c4d14e8291 100644
--- a/packages/ai/src/methods/helpers.test.ts
+++ b/packages/ai/src/methods/helpers.test.ts
@@ -18,11 +18,7 @@
 import { use, expect } from 'chai';
 import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon';
 import { callCloudOrDevice } from './helpers';
-import {
-  GenerateContentRequest,
-  InferenceMode,
-  AIErrorCode
-} from '../types';
+import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types';
 import { AIError } from '../errors';
 import sinonChai from 'sinon-chai';
 import chaiAsPromised from 'chai-as-promised';

From f929d6cf0b04b0d15eb009b1ab1ddf62003e1eb4 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 09:37:55 -0700
Subject: [PATCH 07/17] undo grammar fix

---
 packages/ai/src/methods/generate-content.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts
index b07ca704d53..cb54d3bd34d 100644
--- a/packages/ai/src/methods/generate-content.ts
+++ b/packages/ai/src/methods/generate-content.ts
@@ -9,7 +9,7 @@
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS-IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.

From 8c9c1c2d305c14f96fe492f8ec550dfa7b7920a3 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 09:56:51 -0700
Subject: [PATCH 08/17] fix prefer_on_cloud fallback criteria

---
 packages/ai/src/methods/generate-content.ts         |  2 +-
 .../hybrid-helpers.test.ts}                         |  8 ++++----
 .../helpers.ts => requests/hybrid-helpers.ts}       | 13 +++++++++++--
 3 files changed, 16 insertions(+), 7 deletions(-)
 rename packages/ai/src/{methods/helpers.test.ts => requests/hybrid-helpers.test.ts} (95%)
 rename packages/ai/src/{methods/helpers.ts => requests/hybrid-helpers.ts} (84%)

diff --git a/packages/ai/src/methods/generate-content.ts b/packages/ai/src/methods/generate-content.ts
index cb54d3bd34d..0e65b479343 100644
--- a/packages/ai/src/methods/generate-content.ts
+++ b/packages/ai/src/methods/generate-content.ts
@@ -29,7 +29,7 @@ import { ApiSettings } from '../types/internal';
 import * as GoogleAIMapper from '../googleai-mappers';
 import { BackendType } from '../public-types';
 import { ChromeAdapter } from '../types/chrome-adapter';
-import { callCloudOrDevice } from './helpers';
+import { callCloudOrDevice } from '../requests/hybrid-helpers';
 
 async function generateContentStreamOnCloud(
   apiSettings: ApiSettings,
diff --git a/packages/ai/src/methods/helpers.test.ts b/packages/ai/src/requests/hybrid-helpers.test.ts
similarity index 95%
rename from packages/ai/src/methods/helpers.test.ts
rename to packages/ai/src/requests/hybrid-helpers.test.ts
index 0c4d14e8291..923f65f249b 100644
--- a/packages/ai/src/methods/helpers.test.ts
+++ b/packages/ai/src/requests/hybrid-helpers.test.ts
@@ -17,12 +17,12 @@
 
 import { use, expect } from 'chai';
 import { SinonStub, SinonStubbedInstance, restore, stub } from 'sinon';
-import { callCloudOrDevice } from './helpers';
+import { callCloudOrDevice } from './hybrid-helpers';
 import { GenerateContentRequest, InferenceMode, AIErrorCode } from '../types';
 import { AIError } from '../errors';
 import sinonChai from 'sinon-chai';
 import chaiAsPromised from 'chai-as-promised';
-import { ChromeAdapterImpl } from './chrome-adapter';
+import { ChromeAdapterImpl } from '../methods/chrome-adapter';
 
 use(sinonChai);
 use(chaiAsPromised);
@@ -159,7 +159,7 @@ describe('callCloudOrDevice', () => {
       expect(onDeviceCall).to.not.have.been.called;
     });
 
-    it('should fall back to onDeviceCall if inCloudCall fails with AIError', async () => {
+    it('should fall back to onDeviceCall if inCloudCall fails with AIErrorCode.FETCH_ERROR', async () => {
       inCloudCall.rejects(
         new AIError(AIErrorCode.FETCH_ERROR, 'Network error')
       );
@@ -175,7 +175,7 @@ describe('callCloudOrDevice', () => {
     });
 
     it('should re-throw other errors from inCloudCall', async () => {
-      const error = new Error('Some other error');
+      const error = new AIError(AIErrorCode.RESPONSE_ERROR, 'safety problem');
       inCloudCall.rejects(error);
       await expect(
         callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall)
diff --git a/packages/ai/src/methods/helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts
similarity index 84%
rename from packages/ai/src/methods/helpers.ts
rename to packages/ai/src/requests/hybrid-helpers.ts
index 4adde787935..343f3e21834 100644
--- a/packages/ai/src/methods/helpers.ts
+++ b/packages/ai/src/requests/hybrid-helpers.ts
@@ -22,7 +22,16 @@ import {
   AIErrorCode,
   ChromeAdapter
 } from '../types';
-import { ChromeAdapterImpl } from './chrome-adapter';
+import { ChromeAdapterImpl } from '../methods/chrome-adapter';
+
+const errorsCausingFallback: AIErrorCode[] = [
+  // most network errors
+  AIErrorCode.FETCH_ERROR,
+  // fallback code for all other errors in makeRequest
+  AIErrorCode.ERROR,
+  // error due to API not being enabled in project
+  AIErrorCode.API_NOT_ENABLED
+];
 
 /**
  * Dispatches a request to the appropriate backend (on-device or in-cloud)
@@ -58,7 +67,7 @@ export async function callCloudOrDevice<Response>(
       try {
         return await inCloudCall();
       } catch (e) {
-        if (e instanceof AIError) {
+        if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
           return onDeviceCall();
         }
         throw e;

From 34094c8748d2a61292a678b50f1f50078885372b Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 12:09:10 -0700
Subject: [PATCH 09/17] Apply suggestions from code review

Co-authored-by: Daniel La Rocque <dlarocque@google.com>
---
 packages/ai/src/requests/hybrid-helpers.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/packages/ai/src/requests/hybrid-helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts
index 343f3e21834..c2f13a6bd90 100644
--- a/packages/ai/src/requests/hybrid-helpers.ts
+++ b/packages/ai/src/requests/hybrid-helpers.ts
@@ -59,7 +59,7 @@ export async function callCloudOrDevice<Response>(
       }
       throw new AIError(
         AIErrorCode.UNSUPPORTED,
-        'On-device model is not available.'
+        'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.'
       );
     case InferenceMode.ONLY_IN_CLOUD:
       return inCloudCall();
@@ -72,10 +72,12 @@ export async function callCloudOrDevice<Response>(
         }
         throw e;
       }
-    default: // PREFER_ON_DEVICE
+    case InferenceMode.PREFER_ON_DEVICE:
       if (await chromeAdapter.isAvailable(request)) {
         return onDeviceCall();
       }
       return inCloudCall();
+    default:
+      throw new AIError(AIErrorCode.Error, `Unexpected infererence mode: ${(chromeAdapter as ChromeAdapterImpl).mode}`);
   }
 }

From 7c4f837ac1e1704ca53455dd938495c9eebb7c69 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 12:29:13 -0700
Subject: [PATCH 10/17] Add doc comments on InferenceModes

---
 packages/ai/src/types/enums.ts | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index a161185b470..d765a4bbbfc 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -350,9 +350,26 @@ export type ResponseModality =
  * @public
  */
 export const InferenceMode = {
+  /**
+   * Attempt to make inference calls on-device. If on-device
+   * inference is not available, it will fall back to cloud.
+   */
   'PREFER_ON_DEVICE': 'prefer_on_device',
+  /**
+   * Only attempt to make inference calls on-device. It will not
+   * fall back to cloud. If on-device inference is not available,
+   * inference methods will throw.
+   */
   'ONLY_ON_DEVICE': 'only_on_device',
+  /**
+   * Only attempt to make inference calls to the cloud. It will not
+   * fall back to on-device.
+   */
   'ONLY_IN_CLOUD': 'only_in_cloud',
+  /**
+   * Attempt to make inference calls to the cloud. If not available,
+   * it will fall back to on-device.
+   */
   'PREFER_IN_CLOUD': 'prefer_in_cloud'
 } as const;
 

From db53108f94066541d636c2c2ac34332849bd9cf4 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 14:53:37 -0700
Subject: [PATCH 11/17] Update .changeset/feat-prefer-in-cloud.md

Co-authored-by: Daniel La Rocque <dlarocque@google.com>
---
 .changeset/feat-prefer-in-cloud.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md
index 2ccfd0e3b67..10c1366cebc 100644
--- a/.changeset/feat-prefer-in-cloud.md
+++ b/.changeset/feat-prefer-in-cloud.md
@@ -3,8 +3,4 @@
 "firebase": minor
 ---
 
-feat: Add `prefer_in_cloud` option for inference mode
-
-This change introduces a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available.
-
-This also includes a refactoring of the logic for dispatching requests to either the on-device or cloud backends to improve clarity and remove duplication.
+Added a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available.

From a1af166cfc757e65571dd698d9ad36ef3f1db154 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Wed, 3 Sep 2025 16:50:12 -0700
Subject: [PATCH 12/17] fix ERROR casing and formatting

---
 packages/ai/src/requests/hybrid-helpers.ts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/packages/ai/src/requests/hybrid-helpers.ts b/packages/ai/src/requests/hybrid-helpers.ts
index c2f13a6bd90..3140594c00e 100644
--- a/packages/ai/src/requests/hybrid-helpers.ts
+++ b/packages/ai/src/requests/hybrid-helpers.ts
@@ -78,6 +78,11 @@ export async function callCloudOrDevice<Response>(
       }
       return inCloudCall();
     default:
-      throw new AIError(AIErrorCode.Error, `Unexpected infererence mode: ${(chromeAdapter as ChromeAdapterImpl).mode}`);
+      throw new AIError(
+        AIErrorCode.ERROR,
+        `Unexpected infererence mode: ${
+          (chromeAdapter as ChromeAdapterImpl).mode
+        }`
+      );
   }
 }

From 7fd62c431f3b5a20c2b1cb2c2e2415656cb5c35f Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Thu, 4 Sep 2025 09:32:54 -0700
Subject: [PATCH 13/17] fix expected error case in tests

---
 packages/ai/src/models/generative-model.test.ts | 4 ++--
 packages/ai/src/requests/hybrid-helpers.test.ts | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/ai/src/models/generative-model.test.ts b/packages/ai/src/models/generative-model.test.ts
index eb53eeb2eb7..9c5074b8eb3 100644
--- a/packages/ai/src/models/generative-model.test.ts
+++ b/packages/ai/src/models/generative-model.test.ts
@@ -553,7 +553,7 @@ describe('GenerativeModel dispatch logic', () => {
         mockChromeAdapter
       );
       await expect(model.generateContent('hello')).to.be.rejectedWith(
-        /On-device model is not available/
+        /on-device model is not available/
       );
       expect(mockChromeAdapter.generateContent).to.not.have.been.called;
       expect(makeRequestStub).to.not.have.been.called;
@@ -581,7 +581,7 @@ describe('GenerativeModel dispatch logic', () => {
         mockChromeAdapter
       );
       await expect(model.generateContentStream('hello')).to.be.rejectedWith(
-        /On-device model is not available/
+        /on-device model is not available/
       );
       expect(mockChromeAdapter.generateContent).to.not.have.been.called;
       expect(makeRequestStub).to.not.have.been.called;
diff --git a/packages/ai/src/requests/hybrid-helpers.test.ts b/packages/ai/src/requests/hybrid-helpers.test.ts
index 923f65f249b..a758f34ad21 100644
--- a/packages/ai/src/requests/hybrid-helpers.test.ts
+++ b/packages/ai/src/requests/hybrid-helpers.test.ts
@@ -117,7 +117,7 @@ describe('callCloudOrDevice', () => {
       chromeAdapter.isAvailable.resolves(false);
       await expect(
         callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall)
-      ).to.be.rejectedWith(/On-device model is not available/);
+      ).to.be.rejectedWith(/on-device model is not available/);
       expect(inCloudCall).to.not.have.been.called;
       expect(onDeviceCall).to.not.have.been.called;
     });

From d4e843e9b8007a5404bada746c0ed500a200e3ac Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Thu, 4 Sep 2025 12:20:43 -0700
Subject: [PATCH 14/17] Move InferenceMode doc comment to be on top of type

---
 docs-devsite/ai.md             |  2 ++
 packages/ai/src/types/enums.ts | 31 ++++++++++++++-----------------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
index 93c231a7324..b0f839bcfba 100644
--- a/docs-devsite/ai.md
+++ b/docs-devsite/ai.md
@@ -863,6 +863,8 @@ export type ImagenSafetyFilterLevel = (typeof ImagenSafetyFilterLevel)[keyof typ
 
 <b>(EXPERIMENTAL)</b> Determines whether inference happens on-device or in-cloud.
 
+<b>PREFER\_ON\_DEVICE:</b> Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud. <br/> <b>ONLY\_ON\_DEVICE:</b> Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw. <br/> <b>ONLY\_IN\_CLOUD:</b> Only attempt to make inference calls to the cloud. It will not fall back to on-device. <br/> <b>PREFER\_IN\_CLOUD:</b> Attempt to make inference calls to the cloud. If not available, it will fall back to on-device.
+
 <b>Signature:</b>
 
 ```typescript
diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index d765a4bbbfc..6bb84949e25 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -350,32 +350,29 @@ export type ResponseModality =
  * @public
  */
 export const InferenceMode = {
-  /**
-   * Attempt to make inference calls on-device. If on-device
-   * inference is not available, it will fall back to cloud.
-   */
   'PREFER_ON_DEVICE': 'prefer_on_device',
-  /**
-   * Only attempt to make inference calls on-device. It will not
-   * fall back to cloud. If on-device inference is not available,
-   * inference methods will throw.
-   */
   'ONLY_ON_DEVICE': 'only_on_device',
-  /**
-   * Only attempt to make inference calls to the cloud. It will not
-   * fall back to on-device.
-   */
   'ONLY_IN_CLOUD': 'only_in_cloud',
-  /**
-   * Attempt to make inference calls to the cloud. If not available,
-   * it will fall back to on-device.
-   */
   'PREFER_IN_CLOUD': 'prefer_in_cloud'
 } as const;
 
 /**
  * <b>(EXPERIMENTAL)</b>
  * Determines whether inference happens on-device or in-cloud.
+ * @remarks
+ * <b>PREFER_ON_DEVICE:</b> Attempt to make inference calls on-device.
+ * If on-device inference is not available, it will fall back to cloud.
+ * <br/>
+ * <b>ONLY_ON_DEVICE:</b> Only attempt to make inference calls on-device.
+ * It will not fall back to cloud. If on-device inference is not available,
+ * inference methods will throw.
+ * <br/>
+ * <b>ONLY_IN_CLOUD:</b> Only attempt to make inference calls to the cloud.
+ * It will not fall back to on-device.
+ * <br/>
+ * <b>PREFER_IN_CLOUD:</b> Attempt to make inference calls to the cloud.
+ * If not available, it will fall back to on-device.
+ *
  * @public
  */
 export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];

From 15908c7c74f3c9b9912bc868e3852d8f828ab977 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Mon, 8 Sep 2025 12:12:57 -0700
Subject: [PATCH 15/17] move docs to where they will be seen

---
 docs-devsite/ai.md             |  4 ++--
 packages/ai/src/types/enums.ts | 26 ++++++++++++++------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
index b0f839bcfba..bb0caec5911 100644
--- a/docs-devsite/ai.md
+++ b/docs-devsite/ai.md
@@ -624,6 +624,8 @@ ImagenSafetyFilterLevel: {
 
 <b>(EXPERIMENTAL)</b> Determines whether inference happens on-device or in-cloud.
 
+<b>PREFER\_ON\_DEVICE:</b> Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud. <br/> <b>ONLY\_ON\_DEVICE:</b> Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw. <br/> <b>ONLY\_IN\_CLOUD:</b> Only attempt to make inference calls to the cloud. It will not fall back to on-device. <br/> <b>PREFER\_IN\_CLOUD:</b> Attempt to make inference calls to the cloud. If not available, it will fall back to on-device.
+
 <b>Signature:</b>
 
 ```typescript
@@ -863,8 +865,6 @@ export type ImagenSafetyFilterLevel = (typeof ImagenSafetyFilterLevel)[keyof typ
 
 <b>(EXPERIMENTAL)</b> Determines whether inference happens on-device or in-cloud.
 
-<b>PREFER\_ON\_DEVICE:</b> Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud. <br/> <b>ONLY\_ON\_DEVICE:</b> Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw. <br/> <b>ONLY\_IN\_CLOUD:</b> Only attempt to make inference calls to the cloud. It will not fall back to on-device. <br/> <b>PREFER\_IN\_CLOUD:</b> Attempt to make inference calls to the cloud. If not available, it will fall back to on-device.
-
 <b>Signature:</b>
 
 ```typescript
diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index 6bb84949e25..b41afd3e111 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -347,18 +347,7 @@ export type ResponseModality =
 /**
  * <b>(EXPERIMENTAL)</b>
  * Determines whether inference happens on-device or in-cloud.
- * @public
- */
-export const InferenceMode = {
-  'PREFER_ON_DEVICE': 'prefer_on_device',
-  'ONLY_ON_DEVICE': 'only_on_device',
-  'ONLY_IN_CLOUD': 'only_in_cloud',
-  'PREFER_IN_CLOUD': 'prefer_in_cloud'
-} as const;
-
-/**
- * <b>(EXPERIMENTAL)</b>
- * Determines whether inference happens on-device or in-cloud.
+ *
  * @remarks
  * <b>PREFER_ON_DEVICE:</b> Attempt to make inference calls on-device.
  * If on-device inference is not available, it will fall back to cloud.
@@ -375,4 +364,17 @@ export const InferenceMode = {
  *
  * @public
  */
+export const InferenceMode = {
+  'PREFER_ON_DEVICE': 'prefer_on_device',
+  'ONLY_ON_DEVICE': 'only_on_device',
+  'ONLY_IN_CLOUD': 'only_in_cloud',
+  'PREFER_IN_CLOUD': 'prefer_in_cloud'
+} as const;
+
+/**
+ * <b>(EXPERIMENTAL)</b>
+ * Determines whether inference happens on-device or in-cloud.
+ *
+ * @public
+ */
 export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];

From d165c4289450f45a4fbc82532a2d594e3adba026 Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Mon, 8 Sep 2025 12:52:52 -0700
Subject: [PATCH 16/17] Update .changeset/feat-prefer-in-cloud.md

Co-authored-by: rachelsaunders <52258509+rachelsaunders@users.noreply.github.com>
---
 .changeset/feat-prefer-in-cloud.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.changeset/feat-prefer-in-cloud.md b/.changeset/feat-prefer-in-cloud.md
index 10c1366cebc..90f859c2da8 100644
--- a/.changeset/feat-prefer-in-cloud.md
+++ b/.changeset/feat-prefer-in-cloud.md
@@ -3,4 +3,4 @@
 "firebase": minor
 ---
 
-Added a new `InferenceMode` option, `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use the cloud backend first. If the cloud call fails with a network-related error, it will fall back to the on-device model if available.
+Added a new `InferenceMode` option for the hybrid on-device capability: `prefer_in_cloud`. When this mode is selected, the SDK will attempt to use a cloud-hosted model first. If the call to the cloud-hosted model fails with a network-related error, the SDK will fall back to the on-device model, if it's available.

From 4324ddd989e7824eb780b0fe1e0646534973676a Mon Sep 17 00:00:00 2001
From: Christina Holland <chholland@google.com>
Date: Mon, 8 Sep 2025 12:58:41 -0700
Subject: [PATCH 17/17] Address comment

---
 docs-devsite/ai.md             |  2 +-
 packages/ai/src/types/enums.ts | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
index bb0caec5911..d94c31155de 100644
--- a/docs-devsite/ai.md
+++ b/docs-devsite/ai.md
@@ -624,7 +624,7 @@ ImagenSafetyFilterLevel: {
 
 <b>(EXPERIMENTAL)</b> Determines whether inference happens on-device or in-cloud.
 
-<b>PREFER\_ON\_DEVICE:</b> Attempt to make inference calls on-device. If on-device inference is not available, it will fall back to cloud. <br/> <b>ONLY\_ON\_DEVICE:</b> Only attempt to make inference calls on-device. It will not fall back to cloud. If on-device inference is not available, inference methods will throw. <br/> <b>ONLY\_IN\_CLOUD:</b> Only attempt to make inference calls to the cloud. It will not fall back to on-device. <br/> <b>PREFER\_IN\_CLOUD:</b> Attempt to make inference calls to the cloud. If not available, it will fall back to on-device.
+<b>PREFER\_ON\_DEVICE:</b> Attempt to make inference calls using an on-device model. If on-device inference is not available, the SDK will fall back to using a cloud-hosted model. <br/> <b>ONLY\_ON\_DEVICE:</b> Only attempt to make inference calls using an on-device model. The SDK will not fall back to a cloud-hosted model. If on-device inference is not available, inference methods will throw. <br/> <b>ONLY\_IN\_CLOUD:</b> Only attempt to make inference calls using a cloud-hosted model. The SDK will not fall back to an on-device model. <br/> <b>PREFER\_IN\_CLOUD:</b> Attempt to make inference calls to a cloud-hosted model. If not available, the SDK will fall back to an on-device model.
 
 <b>Signature:</b>
 
diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index b41afd3e111..de70d325157 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -349,18 +349,20 @@ export type ResponseModality =
  * Determines whether inference happens on-device or in-cloud.
  *
  * @remarks
- * <b>PREFER_ON_DEVICE:</b> Attempt to make inference calls on-device.
- * If on-device inference is not available, it will fall back to cloud.
+ * <b>PREFER_ON_DEVICE:</b> Attempt to make inference calls using an
+ * on-device model. If on-device inference is not available, the SDK
+ * will fall back to using a cloud-hosted model.
  * <br/>
- * <b>ONLY_ON_DEVICE:</b> Only attempt to make inference calls on-device.
- * It will not fall back to cloud. If on-device inference is not available,
- * inference methods will throw.
+ * <b>ONLY_ON_DEVICE:</b> Only attempt to make inference calls using an
+ * on-device model. The SDK will not fall back to a cloud-hosted model.
+ * If on-device inference is not available, inference methods will throw.
  * <br/>
- * <b>ONLY_IN_CLOUD:</b> Only attempt to make inference calls to the cloud.
- * It will not fall back to on-device.
+ * <b>ONLY_IN_CLOUD:</b> Only attempt to make inference calls using a
+ * cloud-hosted model. The SDK will not fall back to an on-device model.
  * <br/>
- * <b>PREFER_IN_CLOUD:</b> Attempt to make inference calls to the cloud.
- * If not available, it will fall back to on-device.
+ * <b>PREFER_IN_CLOUD:</b> Attempt to make inference calls to a
+ * cloud-hosted model. If not available, the SDK will fall back to an
+ * on-device model.
  *
  * @public
  */