firebase · rlazo · Feb 24, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
@@ -0,0 +1,6 @@
+---
+'@firebase/vertexai': minor
+'firebase': minor
+---
+
+Added support for modality-based token count.
@@ -92,6 +92,7 @@ export interface CountTokensRequest {
 
 // @public
 export interface CountTokensResponse {
+    promptTokensDetails?: ModalityTokenCount[];
     totalBillableCharacters?: number;
     totalTokens: number;
 }
@@ -532,6 +533,22 @@ export class IntegerSchema extends Schema {
     constructor(schemaParams?: SchemaParams);
 }
 
+// @public
+export enum Modality {
+    AUDIO = "AUDIO",
+    DOCUMENT = "DOCUMENT",
+    IMAGE = "IMAGE",
+    MODALITY_UNSPECIFIED = "MODALITY_UNSPECIFIED",
+    TEXT = "TEXT",
+    VIDEO = "VIDEO"
+}
+
+// @public
+export interface ModalityTokenCount {
+    modality: Modality;
+    tokenCount: number;
+}
+
 // @public
 export interface ModelParams extends BaseParams {
     // (undocumented)
@@ -767,8 +784,12 @@ export interface UsageMetadata {
     // (undocumented)
     candidatesTokenCount: number;
     // (undocumented)
+    candidatesTokensDetails?: ModalityTokenCount[];
+    // (undocumented)
     promptTokenCount: number;
     // (undocumented)
+    promptTokensDetails?: ModalityTokenCount[];
+    // (undocumented)
     totalTokenCount: number;
 }
 

@@ -556,6 +556,8 @@ toc:
     path: /docs/reference/js/vertexai.inlinedatapart.md
   - title: IntegerSchema
     path: /docs/reference/js/vertexai.integerschema.md
+  - title: ModalityTokenCount
+    path: /docs/reference/js/vertexai.modalitytokencount.md
   - title: ModelParams
     path: /docs/reference/js/vertexai.modelparams.md
   - title: NumberSchema

@@ -22,9 +22,20 @@ export interface CountTokensResponse
 
 |  Property | Type | Description |
 |  --- | --- | --- |
+|  [promptTokensDetails](./vertexai.counttokensresponse.md#counttokensresponseprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)<!-- -->\[\] | The breakdown, by modality, of how many tokens are consumed by the prompt. |
 |  [totalBillableCharacters](./vertexai.counttokensresponse.md#counttokensresponsetotalbillablecharacters) | number | The total number of billable characters counted across all instances from the request. |
 |  [totalTokens](./vertexai.counttokensresponse.md#counttokensresponsetotaltokens) | number | The total number of tokens counted across all instances from the request. |
 
+## CountTokensResponse.promptTokensDetails
+
+The breakdown, by modality, of how many tokens are consumed by the prompt.
+
+<b>Signature:</b>
+
+```typescript
+promptTokensDetails?: ModalityTokenCount[];
+```
+
 ## CountTokensResponse.totalBillableCharacters
 
 The total number of billable characters counted across all instances from the request.

@@ -55,6 +55,7 @@ The Vertex AI in Firebase Web SDK.
 |  [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | <b><i>(Public Preview)</i></b> Aspect ratios for Imagen images.<!-- -->To specify an aspect ratio for generated images, set the <code>aspectRatio</code> property in your <code>[ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface)</code>.<!-- -->See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. |
 |  [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | <b><i>(Public Preview)</i></b> A filter level controlling whether generation of images containing people or faces is allowed.<!-- -->See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images"><code>personGeneration</code></a> documentation for more details. |
 |  [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | <b><i>(Public Preview)</i></b> A filter level controlling how aggressively to filter sensitive content.<!-- -->Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, <code>violence</code>, <code>sexual</code>, <code>derogatory</code>, and <code>toxic</code>). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. |
+|  [Modality](./vertexai.md#modality) | Content part modality. |
 |  [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) |
 |  [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that <code>[VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class)</code> can have. |
 
@@ -97,6 +98,7 @@ The Vertex AI in Firebase Web SDK.
 |  [ImagenModelParams](./vertexai.imagenmodelparams.md#imagenmodelparams_interface) | <b><i>(Public Preview)</i></b> Parameters for configuring an <code>[ImagenModel](./vertexai.imagenmodel.md#imagenmodel_class)</code>. |
 |  [ImagenSafetySettings](./vertexai.imagensafetysettings.md#imagensafetysettings_interface) | <b><i>(Public Preview)</i></b> Settings for controlling the aggressiveness of filtering out sensitive content.<!-- -->See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details. |
 |  [InlineDataPart](./vertexai.inlinedatapart.md#inlinedatapart_interface) | Content part interface if the part represents an image. |
+|  [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface) | Represents token counting info for a single modality. |
 |  [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to <code>[getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9)</code>. |
 |  [ObjectSchemaInterface](./vertexai.objectschemainterface.md#objectschemainterface_interface) | Interface for <code>[ObjectSchema](./vertexai.objectschema.md#objectschema_class)</code> class. |
 |  [PromptFeedback](./vertexai.promptfeedback.md#promptfeedback_interface) | If the prompt was blocked, this will be populated with <code>blockReason</code> and the relevant <code>safetyRatings</code>. |
@@ -481,6 +483,27 @@ export declare enum ImagenSafetyFilterLevel
 |  BLOCK\_NONE | <code>&quot;block_none&quot;</code> | <b><i>(Public Preview)</i></b> The least aggressive filtering level; blocks very few sensitive prompts and responses.<!-- -->Access to this feature is restricted and may require your case to be reviewed and approved by Cloud support. |
 |  BLOCK\_ONLY\_HIGH | <code>&quot;block_only_high&quot;</code> | <b><i>(Public Preview)</i></b> Blocks few sensitive prompts and responses. |
 
+## Modality
+
+Content part modality.
+
+<b>Signature:</b>
+
+```typescript
+export declare enum Modality 
+```
+
+## Enumeration Members
+
+|  Member | Value | Description |
+|  --- | --- | --- |
+|  AUDIO | <code>&quot;AUDIO&quot;</code> | Audio. |
+|  DOCUMENT | <code>&quot;DOCUMENT&quot;</code> | Document (for example, PDF). |
+|  IMAGE | <code>&quot;IMAGE&quot;</code> | Image. |
+|  MODALITY\_UNSPECIFIED | <code>&quot;MODALITY_UNSPECIFIED&quot;</code> | Unspecified modality. |
+|  TEXT | <code>&quot;TEXT&quot;</code> | Plain text. |
+|  VIDEO | <code>&quot;VIDEO&quot;</code> | Video. |
+
 ## SchemaType
 
 Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/)

@@ -0,0 +1,46 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# ModalityTokenCount interface
+Represents token counting info for a single modality.
+
+<b>Signature:</b>
+
+```typescript
+export interface ModalityTokenCount 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [modality](./vertexai.modalitytokencount.md#modalitytokencountmodality) | [Modality](./vertexai.md#modality) | The modality associated with this token count. |
+|  [tokenCount](./vertexai.modalitytokencount.md#modalitytokencounttokencount) | number | The number of tokens counted. |
+
+## ModalityTokenCount.modality
+
+The modality associated with this token count.
+
+<b>Signature:</b>
+
+```typescript
+modality: Modality;
+```
+
+## ModalityTokenCount.tokenCount
+
+The number of tokens counted.
+
+<b>Signature:</b>
+
+```typescript
+tokenCount: number;
+```
@@ -23,7 +23,9 @@ export interface UsageMetadata
 |  Property | Type | Description |
 |  --- | --- | --- |
 |  [candidatesTokenCount](./vertexai.usagemetadata.md#usagemetadatacandidatestokencount) | number |  |
+|  [candidatesTokensDetails](./vertexai.usagemetadata.md#usagemetadatacandidatestokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)<!-- -->\[\] |  |
 |  [promptTokenCount](./vertexai.usagemetadata.md#usagemetadataprompttokencount) | number |  |
+|  [promptTokensDetails](./vertexai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)<!-- -->\[\] |  |
 |  [totalTokenCount](./vertexai.usagemetadata.md#usagemetadatatotaltokencount) | number |  |
 
 ## UsageMetadata.candidatesTokenCount
@@ -34,6 +36,14 @@ export interface UsageMetadata
 candidatesTokenCount: number;
 ```
 
+## UsageMetadata.candidatesTokensDetails
+
+<b>Signature:</b>
+
+```typescript
+candidatesTokensDetails?: ModalityTokenCount[];
+```
+
 ## UsageMetadata.promptTokenCount
 
 <b>Signature:</b>
@@ -42,6 +52,14 @@ candidatesTokenCount: number;
 promptTokenCount: number;
 ```
 
+## UsageMetadata.promptTokensDetails
+
+<b>Signature:</b>
+
+```typescript
+promptTokensDetails?: ModalityTokenCount[];
+```
+
 ## UsageMetadata.totalTokenCount
 
 <b>Signature:</b>

@@ -66,6 +66,33 @@ describe('countTokens()', () => {
       undefined
     );
   });
+  it('total tokens with modality details', async () => {
+    const mockResponse = getMockResponse(
+      'unary-success-detailed-token-response.json'
+    );
+    const makeRequestStub = stub(request, 'makeRequest').resolves(
+      mockResponse as Response
+    );
+    const result = await countTokens(
+      fakeApiSettings,
+      'model',
+      fakeRequestParams
+    );
+    expect(result.totalTokens).to.equal(1837);
+    expect(result.totalBillableCharacters).to.equal(117);
+    expect(result.promptTokensDetails?.[0].modality).to.equal('IMAGE');
+    expect(result.promptTokensDetails?.[0].tokenCount).to.equal(1806);
+    expect(makeRequestStub).to.be.calledWith(
+      'model',
+      Task.COUNT_TOKENS,
+      fakeApiSettings,
+      false,
+      match((value: string) => {
+        return value.includes('contents');
+      }),
+      undefined
+    );
+  });
   it('total tokens no billable characters', async () => {
     const mockResponse = getMockResponse(
       'unary-success-no-billable-characters.json'

@@ -102,6 +102,40 @@ describe('generateContent()', () => {
       match.any
     );
   });
+  it('long response with token details', async () => {
+    const mockResponse = getMockResponse(
+      'unary-success-basic-response-long-usage-metadata.json'
+    );
+    const makeRequestStub = stub(request, 'makeRequest').resolves(
+      mockResponse as Response
+    );
+    const result = await generateContent(
+      fakeApiSettings,
+      'model',
+      fakeRequestParams
+    );
+    expect(result.response.usageMetadata?.totalTokenCount).to.equal(1913);
+    expect(result.response.usageMetadata?.candidatesTokenCount).to.equal(76);
+    expect(
+      result.response.usageMetadata?.promptTokensDetails?.[0].modality
+    ).to.equal('IMAGE');
+    expect(
+      result.response.usageMetadata?.promptTokensDetails?.[0].tokenCount
+    ).to.equal(1806);
+    expect(
+      result.response.usageMetadata?.candidatesTokensDetails?.[0].modality
+    ).to.equal('TEXT');
+    expect(
+      result.response.usageMetadata?.candidatesTokensDetails?.[0].tokenCount
+    ).to.equal(76);
+    expect(makeRequestStub).to.be.calledWith(
+      'model',
+      Task.GENERATE_CONTENT,
+      fakeApiSettings,
+      false,
+      match.any
+    );
+  });
   it('citations', async () => {
     const mockResponse = getMockResponse('unary-success-citations.json');
     const makeRequestStub = stub(request, 'makeRequest').resolves(

@@ -137,3 +137,34 @@ export enum FunctionCallingMode {
   // not passing any function declarations.
   NONE = 'NONE'
 }
+
+/**
+ * Content part modality.
+ * @public
+ */
+export enum Modality {
+  /**
+   * Unspecified modality.
+   */
+  MODALITY_UNSPECIFIED = 'MODALITY_UNSPECIFIED',
+  /**
+   * Plain text.
+   */
+  TEXT = 'TEXT',
+  /**
+   * Image.
+   */
+  IMAGE = 'IMAGE',
+  /**
+   * Video.
+   */
+  VIDEO = 'VIDEO',
+  /**
+   * Audio.
+   */
+  AUDIO = 'AUDIO',
+  /**
+   * Document (for example, PDF).
+   */
+  DOCUMENT = 'DOCUMENT'
+}
@@ -21,7 +21,8 @@ import {
   FinishReason,
   HarmCategory,
   HarmProbability,
-  HarmSeverity
+  HarmSeverity,
+  Modality
 } from './enums';
 
 /**
@@ -83,6 +84,20 @@ export interface UsageMetadata {
   promptTokenCount: number;
   candidatesTokenCount: number;
   totalTokenCount: number;
+  promptTokensDetails?: ModalityTokenCount[];
+  candidatesTokensDetails?: ModalityTokenCount[];
+}
+
+/**
+ * Represents token counting info for a single modality.
+ *
+ * @public
+ */
+export interface ModalityTokenCount {
+  /** The modality associated with this token count. */
+  modality: Modality;
+  /** The number of tokens counted. */
+  tokenCount: number;
 }
 
 /**
@@ -213,4 +228,8 @@ export interface CountTokensResponse {
    * from the request.
    */
   totalBillableCharacters?: number;
+  /**
+   * The breakdown, by modality, of how many tokens are consumed by the prompt.
+   */
+  promptTokensDetails?: ModalityTokenCount[];
 }