Skip to content

Commit 44fe5d0

Browse files
authored
[VertexAI] Add support for token-based usage metrics (#14406)
1 parent 25e724d commit 44fe5d0

File tree

6 files changed

+123
-0
lines changed

6 files changed

+123
-0
lines changed

FirebaseVertexAI/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Note: This feature is in Public Preview, which means that the it is not
66
subject to any SLA or deprecation policy and could change in
77
backwards-incompatible ways.
8+
- [feature] Added support for modality-based token count. (#14406)
89

910
# 11.6.0
1011
- [changed] The token counts from `GenerativeModel.countTokens(...)` now include

FirebaseVertexAI/Sources/CountTokensRequest.swift

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ public struct CountTokensResponse {
4646
/// > Important: This does not include billable image, video or other non-text input. See
4747
/// [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details.
4848
public let totalBillableCharacters: Int?
49+
50+
/// The breakdown, by modality, of how many tokens are consumed by the prompt.
51+
public let promptTokensDetails: [ModalityTokenCount]
4952
}
5053

5154
// MARK: - Codable Conformances

FirebaseVertexAI/Sources/GenerateContentResponse.swift

+14
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ public struct GenerateContentResponse: Sendable {
2828

2929
/// The total number of tokens in both the request and response.
3030
public let totalTokenCount: Int
31+
32+
/// The breakdown, by modality, of how many tokens are consumed by the prompt
33+
public let promptTokensDetails: [ModalityTokenCount]
34+
35+
/// The breakdown, by modality, of how many tokens are consumed by the candidates
36+
public let candidatesTokensDetails: [ModalityTokenCount]
3137
}
3238

3339
/// A list of candidate response content, ordered from best to worst.
@@ -299,6 +305,8 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
299305
case promptTokenCount
300306
case candidatesTokenCount
301307
case totalTokenCount
308+
case promptTokensDetails
309+
case candidatesTokensDetails
302310
}
303311

304312
public init(from decoder: any Decoder) throws {
@@ -307,6 +315,12 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
307315
candidatesTokenCount = try container
308316
.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0
309317
totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0
318+
promptTokensDetails = try container
319+
.decodeIfPresent([ModalityTokenCount].self, forKey: .promptTokensDetails) ??
320+
[ModalityTokenCount]()
321+
candidatesTokensDetails = try container
322+
.decodeIfPresent([ModalityTokenCount].self, forKey: .candidatesTokensDetails) ??
323+
[ModalityTokenCount]()
310324
}
311325
}
312326

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import Foundation
16+
17+
/// Represents token counting info for a single modality.
18+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
19+
public struct ModalityTokenCount: Sendable {
20+
/// The modality associated with this token count.
21+
public let modality: ContentModality
22+
23+
/// The number of tokens counted.
24+
public let tokenCount: Int
25+
}
26+
27+
/// Content part modality.
28+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
29+
public struct ContentModality: DecodableProtoEnum, Hashable, Sendable {
30+
enum Kind: String {
31+
case text = "TEXT"
32+
case image = "IMAGE"
33+
case video = "VIDEO"
34+
case audio = "AUDIO"
35+
case document = "DOCUMENT"
36+
}
37+
38+
/// Plain text.
39+
public static let text = ContentModality(kind: .text)
40+
41+
/// Image.
42+
public static let image = ContentModality(kind: .image)
43+
44+
/// Video.
45+
public static let video = ContentModality(kind: .video)
46+
47+
/// Audio.
48+
public static let audio = ContentModality(kind: .audio)
49+
50+
/// Document, e.g. PDF.
51+
public static let document = ContentModality(kind: .document)
52+
53+
/// Returns the raw string representation of the `ContentModality` value.
54+
public let rawValue: String
55+
56+
static let unrecognizedValueMessageCode =
57+
VertexLog.MessageCode.generateContentResponseUnrecognizedContentModality
58+
}
59+
60+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
61+
extension ModalityTokenCount: Decodable {}

FirebaseVertexAI/Sources/VertexLog.swift

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ enum VertexLog {
5757
case decodedInvalidProtoDateMonth = 3009
5858
case decodedInvalidProtoDateDay = 3010
5959
case decodedInvalidCitationPublicationDate = 3011
60+
case generateContentResponseUnrecognizedContentModality = 3012
6061

6162
// SDK State Errors
6263
case generateContentResponseNoCandidates = 4000

FirebaseVertexAI/Tests/Unit/GenerativeModelTests.swift

+43
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,30 @@ final class GenerativeModelTests: XCTestCase {
128128
XCTAssertEqual(response.functionCalls, [])
129129
}
130130

131+
func testGenerateContent_success_basicReplyFullUsageMetadata() async throws {
132+
MockURLProtocol
133+
.requestHandler = try httpRequestHandler(
134+
forResource: "unary-success-basic-response-long-usage-metadata",
135+
withExtension: "json"
136+
)
137+
138+
let response = try await model.generateContent(testPrompt)
139+
140+
XCTAssertEqual(response.candidates.count, 1)
141+
let candidate = try XCTUnwrap(response.candidates.first)
142+
let finishReason = try XCTUnwrap(candidate.finishReason)
143+
XCTAssertEqual(finishReason, .stop)
144+
let usageMetadata = try XCTUnwrap(response.usageMetadata)
145+
XCTAssertEqual(usageMetadata.promptTokensDetails.count, 2)
146+
XCTAssertEqual(usageMetadata.promptTokensDetails[0].modality, .image)
147+
XCTAssertEqual(usageMetadata.promptTokensDetails[0].tokenCount, 1806)
148+
XCTAssertEqual(usageMetadata.promptTokensDetails[1].modality, .text)
149+
XCTAssertEqual(usageMetadata.promptTokensDetails[1].tokenCount, 76)
150+
XCTAssertEqual(usageMetadata.candidatesTokensDetails.count, 1)
151+
XCTAssertEqual(usageMetadata.candidatesTokensDetails[0].modality, .text)
152+
XCTAssertEqual(usageMetadata.candidatesTokensDetails[0].tokenCount, 76)
153+
}
154+
131155
func testGenerateContent_success_citations() async throws {
132156
MockURLProtocol
133157
.requestHandler = try httpRequestHandler(
@@ -488,6 +512,8 @@ final class GenerativeModelTests: XCTestCase {
488512
XCTAssertEqual(usageMetadata.promptTokenCount, 6)
489513
XCTAssertEqual(usageMetadata.candidatesTokenCount, 7)
490514
XCTAssertEqual(usageMetadata.totalTokenCount, 13)
515+
XCTAssertEqual(usageMetadata.promptTokensDetails.isEmpty, true)
516+
XCTAssertEqual(usageMetadata.candidatesTokensDetails.isEmpty, true)
491517
}
492518

493519
func testGenerateContent_failure_invalidAPIKey() async throws {
@@ -1326,6 +1352,23 @@ final class GenerativeModelTests: XCTestCase {
13261352
XCTAssertEqual(response.totalBillableCharacters, 16)
13271353
}
13281354

1355+
func testCountTokens_succeeds_detailed() async throws {
1356+
MockURLProtocol.requestHandler = try httpRequestHandler(
1357+
forResource: "unary-success-detailed-token-response",
1358+
withExtension: "json"
1359+
)
1360+
1361+
let response = try await model.countTokens("Why is the sky blue?")
1362+
1363+
XCTAssertEqual(response.totalTokens, 1837)
1364+
XCTAssertEqual(response.totalBillableCharacters, 117)
1365+
XCTAssertEqual(response.promptTokensDetails.count, 2)
1366+
XCTAssertEqual(response.promptTokensDetails[0].modality, .image)
1367+
XCTAssertEqual(response.promptTokensDetails[0].tokenCount, 1806)
1368+
XCTAssertEqual(response.promptTokensDetails[1].modality, .text)
1369+
XCTAssertEqual(response.promptTokensDetails[1].tokenCount, 31)
1370+
}
1371+
13291372
func testCountTokens_succeeds_allOptions() async throws {
13301373
MockURLProtocol.requestHandler = try httpRequestHandler(
13311374
forResource: "unary-success-total-tokens",

0 commit comments

Comments
 (0)