diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index c703cd959c3..1dd93860d56 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -16,6 +16,7 @@ package com.google.firebase.ai.type +import android.Manifest import android.Manifest.permission.RECORD_AUDIO import android.content.pm.PackageManager import android.media.AudioFormat @@ -62,6 +63,7 @@ internal constructor( private val session: DefaultClientWebSocketSession, @Blocking private val blockingDispatcher: CoroutineContext, private var audioHelper: AudioHelper? = null, + private var videoHelper: VideoHelper? = null, private val firebaseApp: FirebaseApp, ) { /** @@ -162,6 +164,66 @@ internal constructor( audioHelper?.release() audioHelper = null + videoHelper?.release() + videoHelper = null + } + } + + /** + * Stops the video conversation with the model. + * + * This only needs to be called after a previous call to [startVideoConversation]. + * + * If there is no video conversation currently active, this function does nothing. + */ + public fun stopVideoConversation() { + FirebaseAIException.catch { + if (!startedReceiving.getAndSet(false)) return@catch + + scope.cancel() + + videoHelper?.release() + videoHelper = null + } + } + + /** + * Starts a video conversation with the model, which can only be stopped using + * [stopVideoConversation] or [close]. + * + * @param cameraId The ID of the camera to use for the video stream. + */ + @RequiresPermission(Manifest.permission.CAMERA) + public suspend fun startVideoConversation(cameraId: String) { + val context = firebaseApp.applicationContext + if ( + ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) != + PackageManager.PERMISSION_GRANTED + ) { + throw PermissionMissingException("Camera access not provided by the user") + } + + FirebaseAIException.catchAsync { + if (scope.isActive) { + Log.w( + TAG, + "startVideoConversation called after a conversation has already started. " + + "Call stopVideoConversation to close the previous connection." + ) + return@catchAsync + } + + scope = CoroutineScope(blockingDispatcher + childJob()) + val cameraManager = + context.getSystemService(android.content.Context.CAMERA_SERVICE) + as android.hardware.camera2.CameraManager + videoHelper = VideoHelper.build(cameraManager) + videoHelper + ?.start(cameraId) + ?.buffer(UNLIMITED) + ?.onEach { sendMediaStream(listOf(MediaData(it, "image/jpeg"))) } + ?.catch { throw FirebaseAIException.from(it) } + ?.launchIn(scope) } } @@ -171,6 +233,9 @@ internal constructor( /** Indicates whether an audio conversation is being used for this session object. */ public fun isAudioConversationActive(): Boolean = (audioHelper != null) + /** Indicates whether a video conversation is being used for this session object. */ + public fun isVideoConversationActive(): Boolean = (videoHelper != null) + /** * Receives responses from the model for both streaming and standard requests. * @@ -314,6 +379,7 @@ internal constructor( FirebaseAIException.catchAsync { session.close() stopAudioConversation() + stopVideoConversation() } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/VideoHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/VideoHelper.kt new file mode 100644 index 00000000000..9cd6cdd396b --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/VideoHelper.kt @@ -0,0 +1,225 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +import android.Manifest +import android.graphics.Bitmap +import android.graphics.BitmapFactory +import android.graphics.ImageFormat +import android.hardware.camera2.CameraCaptureSession +import android.hardware.camera2.CameraDevice +import android.hardware.camera2.CameraManager +import android.media.ImageReader +import android.os.Handler +import android.os.Looper +import androidx.annotation.RequiresPermission +import java.io.ByteArrayOutputStream +import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException +import kotlin.math.max +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.cancel +import kotlinx.coroutines.channels.awaitClose +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.callbackFlow +import kotlinx.coroutines.flow.emptyFlow +import kotlinx.coroutines.launch +import kotlinx.coroutines.suspendCancellableCoroutine + +/** + * Helper class for streaming video from the camera. + * + * @see VideoHelper.build + * @see LiveSession.startVideoConversation + */ +@PublicPreviewAPI +internal class VideoHelper( + private val cameraManager: CameraManager, +) { + private var cameraDevice: CameraDevice? = null + private var imageReader: ImageReader? = null + private var session: CameraCaptureSession? = null + private val scope = CoroutineScope(Dispatchers.Default) + + private var released: Boolean = false + + /** + * Release the system resources on the camera. + * + * Once a [VideoHelper] has been "released", it can _not_ be used again. + * + * This method can safely be called multiple times, as it won't do anything if this instance has + * already been released. + */ + fun release() { + if (released) return + released = true + + session?.close() + imageReader?.close() + cameraDevice?.close() + scope.cancel() + } + + /** + * Start perpetually streaming the camera, and return the bytes read in a flow. + * + * Returns an empty flow if this [VideoHelper] has been [released][release]. + */ + @RequiresPermission(Manifest.permission.CAMERA) + suspend fun start(cameraId: String): Flow { + if (released) return emptyFlow() + + cameraDevice = openCamera(cameraId) + val cameraDevice = cameraDevice ?: return emptyFlow() + + val characteristics = cameraManager.getCameraCharacteristics(cameraId) + val streamConfigurationMap = + characteristics.get( + android.hardware.camera2.CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP + ) + val outputSizes = streamConfigurationMap?.getOutputSizes(ImageFormat.JPEG) + val size = outputSizes?.maxByOrNull { it.width * it.height } ?: return emptyFlow() + + imageReader = ImageReader.newInstance(size.width, size.height, ImageFormat.JPEG, 1) + val imageReader = imageReader ?: return emptyFlow() + + session = createCaptureSession(cameraDevice, imageReader) + val session = session ?: return emptyFlow() + + val captureRequest = + session.device.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW).apply { + addTarget(imageReader.surface) + } + session.setRepeatingRequest(captureRequest.build(), null, null) + + return callbackFlow { + val listener = + ImageReader.OnImageAvailableListener { reader -> + val image = reader.acquireLatestImage() + if (image != null) { + scope.launch { + val buffer = image.planes[0].buffer + val bytes = ByteArray(buffer.remaining()) + buffer.get(bytes) + image.close() + + val scaledBytes = scaleAndCompressImage(bytes) + trySend(scaledBytes) + } + } + } + imageReader.setOnImageAvailableListener(listener, null) + + awaitClose { imageReader.setOnImageAvailableListener(null, null) } + } + } + + private fun scaleAndCompressImage(bytes: ByteArray): ByteArray { + val options = BitmapFactory.Options().apply { inJustDecodeBounds = true } + BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options) + + val width = options.outWidth + val height = options.outHeight + val largestDimension = max(width, height) + + var inSampleSize = 1 + if (largestDimension > 2048) { + val halfLargestDimension = largestDimension / 2 + while ((halfLargestDimension / inSampleSize) >= 2048) { + inSampleSize *= 2 + } + } + + options.inSampleSize = inSampleSize + options.inJustDecodeBounds = false + var bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options) + + val scaledWidth = bitmap.width + val scaledHeight = bitmap.height + val scaledLargestDimension = max(scaledWidth, scaledHeight) + if (scaledLargestDimension > 2048) { + val scaleFactor = 2048.0f / scaledLargestDimension + val newWidth = (scaledWidth * scaleFactor).toInt() + val newHeight = (scaledHeight * scaleFactor).toInt() + bitmap = Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true) + } + + val outputStream = ByteArrayOutputStream() + bitmap.compress(Bitmap.CompressFormat.JPEG, 80, outputStream) + return outputStream.toByteArray() + } + + @RequiresPermission(Manifest.permission.CAMERA) + private suspend fun openCamera(cameraId: String): CameraDevice = + suspendCancellableCoroutine { cont -> + val handler = Handler(Looper.getMainLooper()) + cameraManager.openCamera( + cameraId, + object : CameraDevice.StateCallback() { + override fun onOpened(camera: CameraDevice) { + cont.resume(camera) + } + + override fun onDisconnected(camera: CameraDevice) { + camera.close() + } + + override fun onError(camera: CameraDevice, error: Int) { + camera.close() + cont.resumeWithException(RuntimeException("Failed to open camera. Error: $error")) + } + }, + handler + ) + } + + private suspend fun createCaptureSession( + cameraDevice: CameraDevice, + imageReader: ImageReader + ): CameraCaptureSession = suspendCancellableCoroutine { cont -> + cameraDevice.createCaptureSession( + listOf(imageReader.surface), + object : CameraCaptureSession.StateCallback() { + override fun onConfigured(session: CameraCaptureSession) { + cont.resume(session) + } + + override fun onConfigureFailed(session: CameraCaptureSession) { + cont.resumeWithException(RuntimeException("Failed to create capture session.")) + } + }, + null + ) + } + + companion object { + private val TAG = VideoHelper::class.java.simpleName + + /** + * Creates an instance of [VideoHelper] with the camera manager initialized. + * + * A separate build method is necessary so that we can properly propagate the required manifest + * permission, and throw exceptions when needed. + */ + @RequiresPermission(Manifest.permission.CAMERA) + fun build(cameraManager: CameraManager): VideoHelper { + return VideoHelper(cameraManager) + } + } +}