spring-projects · ThomasVitale · Oct 9, 2024
diff --git a/...s/spring-ai-openai/src/main/java/org/springframework/ai/openai/ImageResponseMetadata.java b/...s/spring-ai-openai/src/main/java/org/springframework/ai/openai/ImageResponseMetadata.java
diff --git a/.../spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java b/.../spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java
@@ -19,34 +19,37 @@
 import org.apache.commons.lang3.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.springframework.ai.audio.speech.Speech;
+import org.springframework.ai.audio.speech.SpeechModel;
+import org.springframework.ai.audio.speech.SpeechOptions;
+import org.springframework.ai.audio.speech.SpeechPrompt;
+import org.springframework.ai.audio.speech.SpeechResponse;
+import org.springframework.ai.audio.speech.StreamingSpeechModel;
 import org.springframework.ai.chat.metadata.RateLimit;
+import org.springframework.ai.model.ModelOptionsUtils;
 import org.springframework.ai.openai.api.OpenAiAudioApi;
 import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;
-import org.springframework.ai.openai.audio.speech.Speech;
-import org.springframework.ai.openai.audio.speech.SpeechModel;
-import org.springframework.ai.openai.audio.speech.SpeechPrompt;
-import org.springframework.ai.openai.audio.speech.SpeechResponse;
-import org.springframework.ai.openai.audio.speech.StreamingSpeechModel;
 import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMetadata;
 import org.springframework.ai.openai.metadata.support.OpenAiResponseHeaderExtractor;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.http.ResponseEntity;
+import org.springframework.lang.Nullable;
 import org.springframework.retry.support.RetryTemplate;
 import org.springframework.util.Assert;
 import reactor.core.publisher.Flux;
 
 /**
- * OpenAI audio speech client implementation for backed by {@link OpenAiAudioApi}.
+ * OpenAI audio speech client implementation backed by {@link OpenAiAudioApi}.
  *
  * @author Ahmed Yousri
  * @author Hyunjoon Choi
  * @author Thomas Vitale
  * @see OpenAiAudioApi
- * @since 1.0.0-M1
+ * @since 1.0.0
  */
 public class OpenAiAudioSpeechModel implements SpeechModel, StreamingSpeechModel {
 
-	private final Logger logger = LoggerFactory.getLogger(getClass());
+	private final static Logger logger = LoggerFactory.getLogger(OpenAiAudioSpeechModel.class);
 
 	/**
 	 * The default options used for the audio completion requests.
@@ -114,16 +117,10 @@ public OpenAiAudioSpeechModel(OpenAiAudioApi audioApi, OpenAiAudioSpeechOptions
 		this.retryTemplate = retryTemplate;
 	}
 
-	@Override
-	public byte[] call(String text) {
-		SpeechPrompt speechRequest = new SpeechPrompt(text);
-		return call(speechRequest).getResult().getOutput();
-	}
-
 	@Override
 	public SpeechResponse call(SpeechPrompt speechPrompt) {
-
-		OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt);
+		OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions(speechPrompt.getOptions(), this.defaultOptions);
+		OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt, requestSpeechOptions);
 
 		ResponseEntity<byte[]> speechEntity = this.retryTemplate
 			.execute(ctx -> this.audioApi.createSpeech(speechRequest));
@@ -149,53 +146,54 @@ public SpeechResponse call(SpeechPrompt speechPrompt) {
 	 */
 	@Override
 	public Flux<SpeechResponse> stream(SpeechPrompt speechPrompt) {
+		OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions(speechPrompt.getOptions(), this.defaultOptions);
+		OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt, requestSpeechOptions);
 
-		OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt);
-
-		Flux<ResponseEntity<byte[]>> speechEntity = this.retryTemplate
-			.execute(ctx -> this.audioApi.stream(speechRequest));
+		Flux<ResponseEntity<byte[]>> speechEntity = this.audioApi.stream(speechRequest);
 
-		return speechEntity.map(entity -> new SpeechResponse(new Speech(entity.getBody()),
+		return speechEntity.map(entity -> new SpeechResponse(
+				new Speech(entity.getBody() != null ? entity.getBody() : new byte[0]),
 				new OpenAiAudioSpeechResponseMetadata(OpenAiResponseHeaderExtractor.extractAiResponseHeaders(entity))));
 	}
 
-	private OpenAiAudioApi.SpeechRequest createRequest(SpeechPrompt request) {
-		OpenAiAudioSpeechOptions options = this.defaultOptions;
-
-		if (request.getOptions() != null) {
-			if (request.getOptions() instanceof OpenAiAudioSpeechOptions runtimeOptions) {
-				options = this.merge(runtimeOptions, options);
-			}
-			else {
-				throw new IllegalArgumentException("Prompt options are not of type SpeechOptions: "
-						+ request.getOptions().getClass().getSimpleName());
-			}
-		}
-
-		String input = StringUtils.isNotBlank(options.getInput()) ? options.getInput()
+	private OpenAiAudioApi.SpeechRequest createRequest(SpeechPrompt request,
+			OpenAiAudioSpeechOptions requestSpeechOptions) {
+		String input = StringUtils.isNotBlank(requestSpeechOptions.getInput()) ? requestSpeechOptions.getInput()
 				: request.getInstructions().getText();
 
 		OpenAiAudioApi.SpeechRequest.Builder requestBuilder = OpenAiAudioApi.SpeechRequest.builder()
-			.withModel(options.getModel())
+			.withModel(requestSpeechOptions.getModel())
 			.withInput(input)
-			.withVoice(options.getVoice())
-			.withResponseFormat(options.getResponseFormat())
-			.withSpeed(options.getSpeed());
+			.withResponseFormat(requestSpeechOptions.getResponseFormat())
+			.withSpeed(requestSpeechOptions.getSpeed())
+			.withVoice(requestSpeechOptions.getVoice());
 
 		return requestBuilder.build();
 	}
 
-	private OpenAiAudioSpeechOptions merge(OpenAiAudioSpeechOptions source, OpenAiAudioSpeechOptions target) {
-		OpenAiAudioSpeechOptions.Builder mergedBuilder = OpenAiAudioSpeechOptions.builder();
+	/**
+	 * Merge runtime and default {@link SpeechOptions} to compute the final options to use
+	 * in the request.
+	 */
+	private OpenAiAudioSpeechOptions mergeOptions(@Nullable SpeechOptions runtimeOptions,
+			OpenAiAudioSpeechOptions defaultOptions) {
+		var runtimeOptionsForProvider = ModelOptionsUtils.copyToTarget(runtimeOptions, SpeechOptions.class,
+				OpenAiAudioSpeechOptions.class);
 
-		mergedBuilder.withModel(source.getModel() != null ? source.getModel() : target.getModel());
-		mergedBuilder.withInput(source.getInput() != null ? source.getInput() : target.getInput());
-		mergedBuilder.withVoice(source.getVoice() != null ? source.getVoice() : target.getVoice());
-		mergedBuilder.withResponseFormat(
-				source.getResponseFormat() != null ? source.getResponseFormat() : target.getResponseFormat());
-		mergedBuilder.withSpeed(source.getSpeed() != null ? source.getSpeed() : target.getSpeed());
+		if (runtimeOptionsForProvider == null) {
+			return defaultOptions;
+		}
 
-		return mergedBuilder.build();
+		return OpenAiAudioSpeechOptions.builder()
+			// Handle portable options
+			.withModel(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getModel(), defaultOptions.getModel()))
+			// Handle OpenAI specific options
+			.withInput(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getInput(), defaultOptions.getInput()))
+			.withResponseFormat(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getResponseFormat(),
+					defaultOptions.getResponseFormat()))
+			.withSpeed(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getSpeed(), defaultOptions.getSpeed()))
+			.withVoice(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getVoice(), defaultOptions.getVoice()))
+			.build();
 	}
 
 }
diff --git a/...pring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java b/...pring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java
@@ -18,7 +18,7 @@
 
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
-import org.springframework.ai.model.ModelOptions;
+import org.springframework.ai.audio.speech.SpeechOptions;
 import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;
 import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;
 
@@ -27,10 +27,11 @@
  *
  * @author Ahmed Yousri
  * @author Hyunjoon Choi
- * @since 1.0.0-M1
+ * @author Thomas Vitale
+ * @since 1.0.0
  */
 @JsonInclude(JsonInclude.Include.NON_NULL)
-public class OpenAiAudioSpeechOptions implements ModelOptions {
+public class OpenAiAudioSpeechOptions implements SpeechOptions {
 
 	/**
 	 * ID of the model to use for generating the audio. One of the available TTS models:
@@ -105,6 +106,7 @@ public OpenAiAudioSpeechOptions build() {
 
 	}
 
+	@Override
 	public String getModel() {
 		return model;
 	}

diff --git a/...ring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java b/...ring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java
diff --git a/.../java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java b/.../java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java
@@ -16,41 +16,22 @@
 
 package org.springframework.ai.openai.metadata.audio;
 
+import org.springframework.ai.audio.speech.SpeechResponseMetadata;
 import org.springframework.ai.chat.metadata.EmptyRateLimit;
 import org.springframework.ai.chat.metadata.RateLimit;
-import org.springframework.ai.model.MutableResponseMetadata;
-import org.springframework.ai.model.ResponseMetadata;
-import org.springframework.ai.openai.api.OpenAiAudioApi;
 import org.springframework.lang.Nullable;
-import org.springframework.util.Assert;
-
-import java.util.HashMap;
 
 /**
  * Audio speech metadata implementation for {@literal OpenAI}.
  *
  * @author Ahmed Yousri
+ * @author Thomas Vitale
  * @see RateLimit
  */
-public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata {
+public class OpenAiAudioSpeechResponseMetadata extends SpeechResponseMetadata {
 
 	protected static final String AI_METADATA_STRING = "{ @type: %1$s, requestsLimit: %2$s }";
 
-	public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() {
-	};
-
-	public static OpenAiAudioSpeechResponseMetadata from(OpenAiAudioApi.StructuredResponse result) {
-		Assert.notNull(result, "OpenAI speech must not be null");
-		OpenAiAudioSpeechResponseMetadata speechResponseMetadata = new OpenAiAudioSpeechResponseMetadata();
-		return speechResponseMetadata;
-	}
-
-	public static OpenAiAudioSpeechResponseMetadata from(String result) {
-		Assert.notNull(result, "OpenAI speech must not be null");
-		OpenAiAudioSpeechResponseMetadata speechResponseMetadata = new OpenAiAudioSpeechResponseMetadata();
-		return speechResponseMetadata;
-	}
-
 	@Nullable
 	private RateLimit rateLimit;
 
@@ -62,17 +43,11 @@ public OpenAiAudioSpeechResponseMetadata(@Nullable RateLimit rateLimit) {
 		this.rateLimit = rateLimit;
 	}
 
-	@Nullable
 	public RateLimit getRateLimit() {
 		RateLimit rateLimit = this.rateLimit;
 		return rateLimit != null ? rateLimit : new EmptyRateLimit();
 	}
 
-	public OpenAiAudioSpeechResponseMetadata withRateLimit(RateLimit rateLimit) {
-		this.rateLimit = rateLimit;
-		return this;
-	}
-
 	@Override
 	public String toString() {
 		return AI_METADATA_STRING.formatted(getClass().getName(), getRateLimit());

diff --git a/...ng-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/package-info.java b/...ng-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+@NonNullApi
+@NonNullFields
+package org.springframework.ai.openai.metadata.audio;
+
+import org.springframework.lang.NonNullApi;
+import org.springframework.lang.NonNullFields;
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/package-info.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+@NonNullApi
+@NonNullFields
+package org.springframework.ai.openai;
+
+import org.springframework.lang.NonNullApi;
+import org.springframework.lang.NonNullFields;