diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/ImageResponseMetadata.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/ImageResponseMetadata.java deleted file mode 100644 index 3ec1ad510c0..00000000000 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/ImageResponseMetadata.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.springframework.ai.openai; - -public interface ImageResponseMetadata { - -} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java index 13057cb1a32..99efec0d9af 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java @@ -19,34 +19,37 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.ai.audio.speech.Speech; +import org.springframework.ai.audio.speech.SpeechModel; +import org.springframework.ai.audio.speech.SpeechOptions; +import org.springframework.ai.audio.speech.SpeechPrompt; +import org.springframework.ai.audio.speech.SpeechResponse; +import org.springframework.ai.audio.speech.StreamingSpeechModel; import org.springframework.ai.chat.metadata.RateLimit; +import org.springframework.ai.model.ModelOptionsUtils; import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat; -import org.springframework.ai.openai.audio.speech.Speech; -import org.springframework.ai.openai.audio.speech.SpeechModel; -import org.springframework.ai.openai.audio.speech.SpeechPrompt; -import org.springframework.ai.openai.audio.speech.SpeechResponse; -import org.springframework.ai.openai.audio.speech.StreamingSpeechModel; import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMetadata; import org.springframework.ai.openai.metadata.support.OpenAiResponseHeaderExtractor; import org.springframework.ai.retry.RetryUtils; import org.springframework.http.ResponseEntity; +import org.springframework.lang.Nullable; import org.springframework.retry.support.RetryTemplate; import org.springframework.util.Assert; import reactor.core.publisher.Flux; /** - * OpenAI audio speech client implementation for backed by {@link OpenAiAudioApi}. + * OpenAI audio speech client implementation backed by {@link OpenAiAudioApi}. * * @author Ahmed Yousri * @author Hyunjoon Choi * @author Thomas Vitale * @see OpenAiAudioApi - * @since 1.0.0-M1 + * @since 1.0.0 */ public class OpenAiAudioSpeechModel implements SpeechModel, StreamingSpeechModel { - private final Logger logger = LoggerFactory.getLogger(getClass()); + private final static Logger logger = LoggerFactory.getLogger(OpenAiAudioSpeechModel.class); /** * The default options used for the audio completion requests. @@ -114,16 +117,10 @@ public OpenAiAudioSpeechModel(OpenAiAudioApi audioApi, OpenAiAudioSpeechOptions this.retryTemplate = retryTemplate; } - @Override - public byte[] call(String text) { - SpeechPrompt speechRequest = new SpeechPrompt(text); - return call(speechRequest).getResult().getOutput(); - } - @Override public SpeechResponse call(SpeechPrompt speechPrompt) { - - OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt); + OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions(speechPrompt.getOptions(), this.defaultOptions); + OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt, requestSpeechOptions); ResponseEntity speechEntity = this.retryTemplate .execute(ctx -> this.audioApi.createSpeech(speechRequest)); @@ -149,53 +146,54 @@ public SpeechResponse call(SpeechPrompt speechPrompt) { */ @Override public Flux stream(SpeechPrompt speechPrompt) { + OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions(speechPrompt.getOptions(), this.defaultOptions); + OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt, requestSpeechOptions); - OpenAiAudioApi.SpeechRequest speechRequest = createRequest(speechPrompt); - - Flux> speechEntity = this.retryTemplate - .execute(ctx -> this.audioApi.stream(speechRequest)); + Flux> speechEntity = this.audioApi.stream(speechRequest); - return speechEntity.map(entity -> new SpeechResponse(new Speech(entity.getBody()), + return speechEntity.map(entity -> new SpeechResponse( + new Speech(entity.getBody() != null ? entity.getBody() : new byte[0]), new OpenAiAudioSpeechResponseMetadata(OpenAiResponseHeaderExtractor.extractAiResponseHeaders(entity)))); } - private OpenAiAudioApi.SpeechRequest createRequest(SpeechPrompt request) { - OpenAiAudioSpeechOptions options = this.defaultOptions; - - if (request.getOptions() != null) { - if (request.getOptions() instanceof OpenAiAudioSpeechOptions runtimeOptions) { - options = this.merge(runtimeOptions, options); - } - else { - throw new IllegalArgumentException("Prompt options are not of type SpeechOptions: " - + request.getOptions().getClass().getSimpleName()); - } - } - - String input = StringUtils.isNotBlank(options.getInput()) ? options.getInput() + private OpenAiAudioApi.SpeechRequest createRequest(SpeechPrompt request, + OpenAiAudioSpeechOptions requestSpeechOptions) { + String input = StringUtils.isNotBlank(requestSpeechOptions.getInput()) ? requestSpeechOptions.getInput() : request.getInstructions().getText(); OpenAiAudioApi.SpeechRequest.Builder requestBuilder = OpenAiAudioApi.SpeechRequest.builder() - .withModel(options.getModel()) + .withModel(requestSpeechOptions.getModel()) .withInput(input) - .withVoice(options.getVoice()) - .withResponseFormat(options.getResponseFormat()) - .withSpeed(options.getSpeed()); + .withResponseFormat(requestSpeechOptions.getResponseFormat()) + .withSpeed(requestSpeechOptions.getSpeed()) + .withVoice(requestSpeechOptions.getVoice()); return requestBuilder.build(); } - private OpenAiAudioSpeechOptions merge(OpenAiAudioSpeechOptions source, OpenAiAudioSpeechOptions target) { - OpenAiAudioSpeechOptions.Builder mergedBuilder = OpenAiAudioSpeechOptions.builder(); + /** + * Merge runtime and default {@link SpeechOptions} to compute the final options to use + * in the request. + */ + private OpenAiAudioSpeechOptions mergeOptions(@Nullable SpeechOptions runtimeOptions, + OpenAiAudioSpeechOptions defaultOptions) { + var runtimeOptionsForProvider = ModelOptionsUtils.copyToTarget(runtimeOptions, SpeechOptions.class, + OpenAiAudioSpeechOptions.class); - mergedBuilder.withModel(source.getModel() != null ? source.getModel() : target.getModel()); - mergedBuilder.withInput(source.getInput() != null ? source.getInput() : target.getInput()); - mergedBuilder.withVoice(source.getVoice() != null ? source.getVoice() : target.getVoice()); - mergedBuilder.withResponseFormat( - source.getResponseFormat() != null ? source.getResponseFormat() : target.getResponseFormat()); - mergedBuilder.withSpeed(source.getSpeed() != null ? source.getSpeed() : target.getSpeed()); + if (runtimeOptionsForProvider == null) { + return defaultOptions; + } - return mergedBuilder.build(); + return OpenAiAudioSpeechOptions.builder() + // Handle portable options + .withModel(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getModel(), defaultOptions.getModel())) + // Handle OpenAI specific options + .withInput(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getInput(), defaultOptions.getInput())) + .withResponseFormat(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getResponseFormat(), + defaultOptions.getResponseFormat())) + .withSpeed(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getSpeed(), defaultOptions.getSpeed())) + .withVoice(ModelOptionsUtils.mergeOption(runtimeOptionsForProvider.getVoice(), defaultOptions.getVoice())) + .build(); } } diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java index 8d6ca7c9de6..4f64613b1f7 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java @@ -18,7 +18,7 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import org.springframework.ai.model.ModelOptions; +import org.springframework.ai.audio.speech.SpeechOptions; import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat; import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice; @@ -27,10 +27,11 @@ * * @author Ahmed Yousri * @author Hyunjoon Choi - * @since 1.0.0-M1 + * @author Thomas Vitale + * @since 1.0.0 */ @JsonInclude(JsonInclude.Include.NON_NULL) -public class OpenAiAudioSpeechOptions implements ModelOptions { +public class OpenAiAudioSpeechOptions implements SpeechOptions { /** * ID of the model to use for generating the audio. One of the available TTS models: @@ -105,6 +106,7 @@ public OpenAiAudioSpeechOptions build() { } + @Override public String getModel() { return model; } diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java deleted file mode 100644 index 8cb21684d65..00000000000 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechPrompt.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2023 - 2024 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.springframework.ai.openai.audio.speech; - -import org.springframework.ai.model.ModelOptions; -import org.springframework.ai.model.ModelRequest; -import org.springframework.ai.openai.OpenAiAudioSpeechOptions; - -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -/** - * The {@link SpeechPrompt} class represents a request to the OpenAI Text-to-Speech (TTS) - * API. It contains a list of {@link SpeechMessage} objects, each representing a piece of - * text to be converted to speech. - * - * @author Ahmed Yousri - * @since 1.0.0-M1 - */ -public class SpeechPrompt implements ModelRequest { - - private OpenAiAudioSpeechOptions speechOptions; - - private final SpeechMessage message; - - public SpeechPrompt(String instructions) { - this(new SpeechMessage(instructions), OpenAiAudioSpeechOptions.builder().build()); - } - - public SpeechPrompt(String instructions, OpenAiAudioSpeechOptions speechOptions) { - this(new SpeechMessage(instructions), speechOptions); - } - - public SpeechPrompt(SpeechMessage speechMessage) { - this(speechMessage, OpenAiAudioSpeechOptions.builder().build()); - } - - public SpeechPrompt(SpeechMessage speechMessage, OpenAiAudioSpeechOptions speechOptions) { - this.message = speechMessage; - this.speechOptions = speechOptions; - } - - @Override - public SpeechMessage getInstructions() { - return this.message; - } - - @Override - public ModelOptions getOptions() { - return speechOptions; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof SpeechPrompt that)) - return false; - return Objects.equals(speechOptions, that.speechOptions) && Objects.equals(message, that.message); - } - - @Override - public int hashCode() { - return Objects.hash(speechOptions, message); - } - -} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java index efcb6ebca74..590a0f4da2b 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechResponseMetadata.java @@ -16,41 +16,22 @@ package org.springframework.ai.openai.metadata.audio; +import org.springframework.ai.audio.speech.SpeechResponseMetadata; import org.springframework.ai.chat.metadata.EmptyRateLimit; import org.springframework.ai.chat.metadata.RateLimit; -import org.springframework.ai.model.MutableResponseMetadata; -import org.springframework.ai.model.ResponseMetadata; -import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.lang.Nullable; -import org.springframework.util.Assert; - -import java.util.HashMap; /** * Audio speech metadata implementation for {@literal OpenAI}. * * @author Ahmed Yousri + * @author Thomas Vitale * @see RateLimit */ -public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata { +public class OpenAiAudioSpeechResponseMetadata extends SpeechResponseMetadata { protected static final String AI_METADATA_STRING = "{ @type: %1$s, requestsLimit: %2$s }"; - public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() { - }; - - public static OpenAiAudioSpeechResponseMetadata from(OpenAiAudioApi.StructuredResponse result) { - Assert.notNull(result, "OpenAI speech must not be null"); - OpenAiAudioSpeechResponseMetadata speechResponseMetadata = new OpenAiAudioSpeechResponseMetadata(); - return speechResponseMetadata; - } - - public static OpenAiAudioSpeechResponseMetadata from(String result) { - Assert.notNull(result, "OpenAI speech must not be null"); - OpenAiAudioSpeechResponseMetadata speechResponseMetadata = new OpenAiAudioSpeechResponseMetadata(); - return speechResponseMetadata; - } - @Nullable private RateLimit rateLimit; @@ -62,17 +43,11 @@ public OpenAiAudioSpeechResponseMetadata(@Nullable RateLimit rateLimit) { this.rateLimit = rateLimit; } - @Nullable public RateLimit getRateLimit() { RateLimit rateLimit = this.rateLimit; return rateLimit != null ? rateLimit : new EmptyRateLimit(); } - public OpenAiAudioSpeechResponseMetadata withRateLimit(RateLimit rateLimit) { - this.rateLimit = rateLimit; - return this; - } - @Override public String toString() { return AI_METADATA_STRING.formatted(getClass().getName(), getRateLimit()); diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/package-info.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/package-info.java new file mode 100644 index 00000000000..1869b8af251 --- /dev/null +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +@NonNullFields +package org.springframework.ai.openai.metadata.audio; + +import org.springframework.lang.NonNullApi; +import org.springframework.lang.NonNullFields; diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/package-info.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/package-info.java new file mode 100644 index 00000000000..796a36bd119 --- /dev/null +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +@NonNullFields +package org.springframework.ai.openai; + +import org.springframework.lang.NonNullApi; +import org.springframework.lang.NonNullFields; diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java index 0ff96b259f1..8799350deff 100644 --- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java @@ -18,6 +18,9 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.springframework.ai.audio.speech.SpeechPrompt; +import org.springframework.ai.audio.speech.SpeechResponse; +import org.springframework.ai.audio.speech.SpeechResponseMetadata; import org.springframework.ai.openai.OpenAiAudioSpeechOptions; import org.springframework.ai.openai.OpenAiTestConfiguration; import org.springframework.ai.openai.api.OpenAiAudioApi; @@ -48,7 +51,6 @@ void shouldSuccessfullyStreamAudioBytesForEmptyMessage() { void shouldProduceAudioBytesDirectlyFromMessage() { byte[] audioBytes = speechModel.call("Today is a wonderful day to build something people love!"); assertThat(audioBytes).hasSizeGreaterThan(0); - } @Test @@ -59,14 +61,18 @@ void shouldGenerateNonEmptyMp3AudioFromSpeechPrompt() { .withResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3) .withModel(OpenAiAudioApi.TtsModel.TTS_1.value) .build(); - SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", - speechOptions); + + SpeechPrompt speechPrompt = SpeechPrompt.builder() + .withMessage("Today is a wonderful day to build something people love!") + .withSpeechOptions(speechOptions) + .build(); + SpeechResponse response = speechModel.call(speechPrompt); + byte[] audioBytes = response.getResult().getOutput(); assertThat(response.getResults()).hasSize(1); assertThat(response.getResults().get(0).getOutput()).isNotEmpty(); assertThat(audioBytes).hasSizeGreaterThan(0); - } @Test @@ -77,20 +83,24 @@ void speechRateLimitTest() { .withResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3) .withModel(OpenAiAudioApi.TtsModel.TTS_1.value) .build(); - SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", - speechOptions); + + SpeechPrompt speechPrompt = SpeechPrompt.builder() + .withMessage("Today is a wonderful day to build something people love!") + .withSpeechOptions(speechOptions) + .build(); + SpeechResponse response = speechModel.call(speechPrompt); - OpenAiAudioSpeechResponseMetadata metadata = response.getMetadata(); - assertThat(metadata).isNotNull(); - assertThat(metadata.getRateLimit()).isNotNull(); - assertThat(metadata.getRateLimit().getRequestsLimit()).isPositive(); - assertThat(metadata.getRateLimit().getRequestsLimit()).isPositive(); + SpeechResponseMetadata metadata = response.getMetadata(); + assertThat(metadata).isNotNull(); + assertThat(metadata).isInstanceOf(OpenAiAudioSpeechResponseMetadata.class); + assertThat(((OpenAiAudioSpeechResponseMetadata) metadata).getRateLimit()).isNotNull(); + assertThat(((OpenAiAudioSpeechResponseMetadata) metadata).getRateLimit().getRequestsLimit()).isPositive(); + assertThat(((OpenAiAudioSpeechResponseMetadata) metadata).getRateLimit().getRequestsLimit()).isPositive(); } @Test void shouldStreamNonEmptyResponsesForValidSpeechPrompts() { - OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder() .withVoice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY) .withSpeed(SPEED) @@ -98,10 +108,14 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() { .withModel(OpenAiAudioApi.TtsModel.TTS_1.value) .build(); - SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", - speechOptions); + SpeechPrompt speechPrompt = SpeechPrompt.builder() + .withMessage("Today is a wonderful day to build something people love!") + .withSpeechOptions(speechOptions) + .build(); + Flux responseFlux = speechModel.stream(speechPrompt); assertThat(responseFlux).isNotNull(); + List responses = responseFlux.collectList().block(); assertThat(responses).isNotNull(); responses.forEach(response -> { @@ -110,4 +124,4 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() { }); } -} \ No newline at end of file +} diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java index 089c9c8240d..f2a294b7dc2 100644 --- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java @@ -18,6 +18,9 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; +import org.springframework.ai.audio.speech.SpeechPrompt; +import org.springframework.ai.audio.speech.SpeechResponse; +import org.springframework.ai.audio.speech.SpeechResponseMetadata; import org.springframework.ai.openai.OpenAiAudioSpeechModel; import org.springframework.ai.openai.OpenAiAudioSpeechOptions; import org.springframework.ai.openai.api.OpenAiAudioApi; @@ -44,11 +47,12 @@ /** * @author Ahmed Yousri + * @author Thomas Vitale */ @RestClientTest(OpenAiSpeechModelWithSpeechResponseMetadataTests.Config.class) public class OpenAiSpeechModelWithSpeechResponseMetadataTests { - private static String TEST_API_KEY = "sk-1234567890"; + private static final String TEST_API_KEY = "sk-1234567890"; private static final Float SPEED = 1.0f; @@ -65,7 +69,6 @@ void resetMockServer() { @Test void aiResponseContainsImageResponseMetadata() { - prepareMock(); OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder() @@ -75,16 +78,20 @@ void aiResponseContainsImageResponseMetadata() { .withModel(OpenAiAudioApi.TtsModel.TTS_1.value) .build(); - SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", - speechOptions); + SpeechPrompt speechPrompt = SpeechPrompt.builder() + .withMessage("Today is a wonderful day to build something people love!") + .withSpeechOptions(speechOptions) + .build(); SpeechResponse response = openAiSpeechClient.call(speechPrompt); byte[] audioBytes = response.getResult().getOutput(); assertThat(audioBytes).hasSizeGreaterThan(0); - OpenAiAudioSpeechResponseMetadata speechResponseMetadata = response.getMetadata(); + SpeechResponseMetadata speechResponseMetadata = response.getMetadata(); assertThat(speechResponseMetadata).isNotNull(); - var requestLimit = speechResponseMetadata.getRateLimit(); + assertThat(speechResponseMetadata).isInstanceOf(OpenAiAudioSpeechResponseMetadata.class); + var requestLimit = ((OpenAiAudioSpeechResponseMetadata) speechResponseMetadata).getRateLimit(); + assertThat(requestLimit).isNotNull(); Long requestsLimit = requestLimit.getRequestsLimit(); Long tokensLimit = requestLimit.getTokensLimit(); Long tokensRemaining = requestLimit.getTokensRemaining(); @@ -96,11 +103,9 @@ void aiResponseContainsImageResponseMetadata() { assertThat(tokensRemaining).isEqualTo(112358L); assertThat(requestsRemaining).isEqualTo(999L); assertThat(requestsReset).isEqualTo(Duration.parse("PT64H15M29S")); - } private void prepareMock() { - HttpHeaders httpHeaders = new HttpHeaders(); httpHeaders.set(OpenAiApiResponseHeaders.REQUESTS_LIMIT_HEADER.getName(), "4000"); httpHeaders.set(OpenAiApiResponseHeaders.REQUESTS_REMAINING_HEADER.getName(), "999"); @@ -114,7 +119,6 @@ private void prepareMock() { .andExpect(method(HttpMethod.POST)) .andExpect(header(HttpHeaders.AUTHORIZATION, "Bearer " + TEST_API_KEY)) .andRespond(withSuccess("Audio bytes as string", MediaType.APPLICATION_OCTET_STREAM).headers(httpHeaders)); - } @SpringBootConfiguration @@ -132,4 +136,4 @@ public OpenAiAudioApi openAiAudioApi(RestClient.Builder builder) { } -} \ No newline at end of file +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/Speech.java similarity index 66% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/Speech.java index 5921940212c..c6c35490467 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/Speech.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/Speech.java @@ -13,30 +13,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.springframework.ai.openai.audio.speech; +package org.springframework.ai.audio.speech; import org.springframework.ai.model.ModelResult; -import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechMetadata; -import org.springframework.lang.Nullable; +import org.springframework.util.Assert; import java.util.Arrays; import java.util.Objects; /** - * The Speech class represents the result of speech synthesis from an AI model. It - * implements the ModelResult interface with the output type of byte array. + * The result of speech synthesis from an AI model. * * @author Ahmed Yousri - * @since 1.0.0-M1 + * @author Thomas Vitale + * @since 1.0.0 */ public class Speech implements ModelResult { private final byte[] audio; - private OpenAiAudioSpeechMetadata speechMetadata; + private final SpeechMetadata speechMetadata; public Speech(byte[] audio) { + this(audio, SpeechMetadata.EMPTY); + } + + public Speech(byte[] audio, SpeechMetadata speechMetadata) { + Assert.notNull(audio, "audio cannot be null"); + Assert.notNull(speechMetadata, "speechMetadata cannot be null"); this.audio = audio; + this.speechMetadata = speechMetadata; } @Override @@ -45,13 +51,8 @@ public byte[] getOutput() { } @Override - public OpenAiAudioSpeechMetadata getMetadata() { - return speechMetadata != null ? speechMetadata : OpenAiAudioSpeechMetadata.NULL; - } - - public Speech withSpeechMetadata(@Nullable OpenAiAudioSpeechMetadata speechMetadata) { - this.speechMetadata = speechMetadata; - return this; + public SpeechMetadata getMetadata() { + return speechMetadata; } @Override @@ -70,7 +71,7 @@ public int hashCode() { @Override public String toString() { - return "Speech{" + "text=" + audio + ", speechMetadata=" + speechMetadata + '}'; + return "Speech{" + "text=, speechMetadata=" + speechMetadata + "}"; } -} \ No newline at end of file +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMessage.java similarity index 70% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMessage.java index dcc96251b63..e806beabdf4 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechMessage.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMessage.java @@ -13,43 +13,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.springframework.ai.openai.audio.speech; +package org.springframework.ai.audio.speech; + +import org.springframework.util.Assert; import java.util.Objects; /** * The {@link SpeechMessage} class represents a single text message to be converted to - * speech by the OpenAI TTS API. + * speech. * * @author Ahmed Yousri + * @author Thomas Vitale * @since 1.0.0-M1 */ public class SpeechMessage { - private String text; + private final String text; - /** - * Constructs a new {@link SpeechMessage} object with the given text. - * @param text the text to be converted to speech - */ public SpeechMessage(String text) { + Assert.hasText(text, "text cannot be null or empty"); this.text = text; } - /** - * Returns the text of this speech message. - * @return the text of this speech message - */ public String getText() { return text; } - /** - * Sets the text of this speech message. - * @param text the new text for this speech message - */ - public void setText(String text) { - this.text = text; + @Override + public String toString() { + return "SpeechMessage{" + "text='" + text + "'}"; } @Override @@ -66,4 +59,4 @@ public int hashCode() { return Objects.hash(text); } -} \ No newline at end of file +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechMetadata.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMetadata.java similarity index 56% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechMetadata.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMetadata.java index 85289d85408..d5ad0f79572 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/metadata/audio/OpenAiAudioSpeechMetadata.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechMetadata.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 - 2024 the original author or authors. + * Copyright 2024 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,21 +13,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.springframework.ai.openai.metadata.audio; +package org.springframework.ai.audio.speech; import org.springframework.ai.model.ResultMetadata; -public interface OpenAiAudioSpeechMetadata extends ResultMetadata { +/** + * Metadata associated with the results of an AI model for speech synthesis. + * + * @author Thomas Vitale + * @since 1.0.0 + */ +public interface SpeechMetadata extends ResultMetadata { - OpenAiAudioSpeechMetadata NULL = OpenAiAudioSpeechMetadata.create(); + SpeechMetadata EMPTY = SpeechMetadata.create(); - /** - * Factory method used to construct a new {@link OpenAiAudioSpeechMetadata} - * @return a new {@link OpenAiAudioSpeechMetadata} - */ - static OpenAiAudioSpeechMetadata create() { - return new OpenAiAudioSpeechMetadata() { + static SpeechMetadata create() { + return new SpeechMetadata() { }; } diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechModel.java similarity index 68% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechModel.java index 9d976fd7510..a253810a4c9 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechModel.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechModel.java @@ -14,17 +14,17 @@ * limitations under the License. */ -package org.springframework.ai.openai.audio.speech; +package org.springframework.ai.audio.speech; import org.springframework.ai.model.Model; /** - * The {@link SpeechModel} interface provides a way to interact with the OpenAI - * Text-to-Speech (TTS) API. It allows you to convert text input into lifelike spoken - * audio. + * API for interacting with AI models specialized in speech synthesis, converting textual + * input into lifelike spoken audio. * * @author Ahmed Yousri - * @since 1.0.0-M1 + * @author Thomas Vitale + * @since 1.0.0 */ @FunctionalInterface public interface SpeechModel extends Model { @@ -35,16 +35,16 @@ public interface SpeechModel extends Model { * @return the resulting audio bytes */ default byte[] call(String message) { - SpeechPrompt prompt = new SpeechPrompt(message); + SpeechPrompt prompt = SpeechPrompt.builder().withMessage(message).build(); return call(prompt).getResult().getOutput(); } /** - * Sends a speech request to the OpenAI TTS API and returns the resulting speech - * response. - * @param request the speech prompt containing the input text and other parameters + * Sends a speech request to the AI model and returns the resulting speech response. + * @param speechPrompt the speech prompt containing the input text and other + * parameters * @return the speech response containing the generated audio */ - SpeechResponse call(SpeechPrompt request); + SpeechResponse call(SpeechPrompt speechPrompt); } diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptions.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptions.java new file mode 100644 index 00000000000..f9ba94188d8 --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptions.java @@ -0,0 +1,32 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.springframework.ai.model.ModelOptions; +import org.springframework.lang.Nullable; + +/** + * SpeechOptions represent the common options, portable across different speech generation + * models. + * + * @author Thomas Vitale + */ +public interface SpeechOptions extends ModelOptions { + + @Nullable + String getModel(); + +} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptionsBuilder.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptionsBuilder.java new file mode 100644 index 00000000000..6c4627adf37 --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechOptionsBuilder.java @@ -0,0 +1,63 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.springframework.lang.Nullable; + +/** + * Build generic and portable {@link SpeechOptions} instances. + * + * @author Thomas Vitale + * @since 1.0.0 + */ +public class SpeechOptionsBuilder { + + private final DefaultSpeechOptions speechOptions = new DefaultSpeechOptions(); + + private SpeechOptionsBuilder() { + } + + public static SpeechOptionsBuilder builder() { + return new SpeechOptionsBuilder(); + } + + public SpeechOptionsBuilder withModel(@Nullable String model) { + speechOptions.setModel(model); + return this; + } + + public SpeechOptions build() { + return speechOptions; + } + + private static class DefaultSpeechOptions implements SpeechOptions { + + @Nullable + private String model; + + @Override + @Nullable + public String getModel() { + return model; + } + + public void setModel(@Nullable String model) { + this.model = model; + } + + } + +} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechPrompt.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechPrompt.java new file mode 100644 index 00000000000..83c765efc65 --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechPrompt.java @@ -0,0 +1,111 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.springframework.ai.model.ModelRequest; +import org.springframework.lang.Nullable; +import org.springframework.util.Assert; + +import java.util.Objects; + +/** + * Represents a request to a speech-synthesis AI model. + * + * @author Ahmed Yousri + * @author Thomas Vitale + * @since 1.0.0-M1 + */ +public class SpeechPrompt implements ModelRequest { + + private final SpeechOptions speechOptions; + + private final SpeechMessage message; + + public SpeechPrompt(SpeechMessage message, SpeechOptions speechOptions) { + Assert.notNull(message, "message cannot be null"); + Assert.notNull(speechOptions, "speechOptions cannot be null"); + this.message = message; + this.speechOptions = speechOptions; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public SpeechMessage getInstructions() { + return this.message; + } + + @Override + public SpeechOptions getOptions() { + return speechOptions; + } + + @Override + public String toString() { + return "SpeechPrompt{" + "message=" + message + ", speechOptions=" + speechOptions + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof SpeechPrompt that)) + return false; + return Objects.equals(speechOptions, that.speechOptions) && Objects.equals(message, that.message); + } + + @Override + public int hashCode() { + return Objects.hash(speechOptions, message); + } + + public static class Builder { + + @Nullable + private SpeechMessage message; + + @Nullable + private SpeechOptions speechOptions; + + private Builder() { + } + + public Builder withMessage(String message) { + this.message = new SpeechMessage(message); + return this; + } + + public Builder withMessage(SpeechMessage message) { + this.message = message; + return this; + } + + public Builder withSpeechOptions(@Nullable SpeechOptions speechOptions) { + this.speechOptions = speechOptions; + return this; + } + + public SpeechPrompt build() { + Assert.notNull(message, "message cannot be null"); + var options = speechOptions == null ? SpeechOptionsBuilder.builder().build() : speechOptions; + return new SpeechPrompt(message, options); + } + + } + +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponse.java similarity index 69% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponse.java index 028bbf22834..fe9f3a39645 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/SpeechResponse.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponse.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 - 2024 the original author or authors. + * Copyright 2024 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,26 +14,26 @@ * limitations under the License. */ -package org.springframework.ai.openai.audio.speech; +package org.springframework.ai.audio.speech; import org.springframework.ai.model.ModelResponse; -import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMetadata; +import org.springframework.util.Assert; -import java.util.Collections; import java.util.List; import java.util.Objects; /** - * Creates a new instance of SpeechResponse with the given speech result. + * The speech completion (i.e. speech generation) response returned by an AI provider. * * @author Ahmed Yousri - * @since 1.0.0-M1 + * @author Thomas Vitale + * @since 1.0.0 */ public class SpeechResponse implements ModelResponse { private final Speech speech; - private final OpenAiAudioSpeechResponseMetadata speechResponseMetadata; + private final SpeechResponseMetadata speechResponseMetadata; /** * Creates a new instance of SpeechResponse with the given speech result. @@ -41,7 +41,7 @@ public class SpeechResponse implements ModelResponse { * @see Speech */ public SpeechResponse(Speech speech) { - this(speech, OpenAiAudioSpeechResponseMetadata.NULL); + this(speech, new SpeechResponseMetadata()); } /** @@ -51,9 +51,11 @@ public SpeechResponse(Speech speech) { * @param speechResponseMetadata the speech response metadata to be set in the * SpeechResponse * @see Speech - * @see OpenAiAudioSpeechResponseMetadata + * @see SpeechResponseMetadata */ - public SpeechResponse(Speech speech, OpenAiAudioSpeechResponseMetadata speechResponseMetadata) { + public SpeechResponse(Speech speech, SpeechResponseMetadata speechResponseMetadata) { + Assert.notNull(speech, "speech cannot be null"); + Assert.notNull(speechResponseMetadata, "speechResponseMetadata cannot be null"); this.speech = speech; this.speechResponseMetadata = speechResponseMetadata; } @@ -65,14 +67,19 @@ public Speech getResult() { @Override public List getResults() { - return Collections.singletonList(speech); + return List.of(speech); } @Override - public OpenAiAudioSpeechResponseMetadata getMetadata() { + public SpeechResponseMetadata getMetadata() { return speechResponseMetadata; } + @Override + public String toString() { + return "SpeechResponse [" + "speechResponseMetadata=" + speechResponseMetadata + ", speech=" + speech + "]"; + } + @Override public boolean equals(Object o) { if (this == o) @@ -88,4 +95,4 @@ public int hashCode() { return Objects.hash(speech, speechResponseMetadata); } -} \ No newline at end of file +} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponseMetadata.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponseMetadata.java new file mode 100644 index 00000000000..d251ec851ba --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/SpeechResponseMetadata.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.springframework.ai.model.MutableResponseMetadata; + +/** + * Represents metadata associated with a speech response. It provides additional + * information about the generative response from an AI model. + * + * @author Thomas Vitale + * @since 1.0.0 + */ +public class SpeechResponseMetadata extends MutableResponseMetadata { + +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/StreamingSpeechModel.java similarity index 83% rename from models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java rename to spring-ai-core/src/main/java/org/springframework/ai/audio/speech/StreamingSpeechModel.java index a8ae06b0739..db790a0c60d 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/speech/StreamingSpeechModel.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/StreamingSpeechModel.java @@ -14,17 +14,17 @@ * limitations under the License. */ -package org.springframework.ai.openai.audio.speech; +package org.springframework.ai.audio.speech; import org.springframework.ai.model.StreamingModel; import reactor.core.publisher.Flux; /** - * The {@link StreamingSpeechModel} interface provides a way to interact with the OpenAI - * Text-to-Speech (TTS) API using a streaming approach, allowing you to receive the - * generated audio in a real-time fashion. + * API for interacting with AI models specialized in speech synthesis, converting textual + * input into lifelike spoken audio which is streamed in a real-time fashion. * * @author Ahmed Yousri + * @author Thomas Vitale * @since 1.0.0-M1 */ @FunctionalInterface @@ -36,7 +36,7 @@ public interface StreamingSpeechModel extends StreamingModel stream(String message) { - SpeechPrompt prompt = new SpeechPrompt(message); + SpeechPrompt prompt = SpeechPrompt.builder().withMessage(message).build(); return stream(prompt).map(SpeechResponse::getResult).map(Speech::getOutput); } @@ -50,4 +50,4 @@ default Flux stream(String message) { @Override Flux stream(SpeechPrompt prompt); -} \ No newline at end of file +} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/package-info.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/package-info.java new file mode 100644 index 00000000000..229906b857a --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/speech/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +@NonNullFields +package org.springframework.ai.audio.speech; + +import org.springframework.lang.NonNullApi; +import org.springframework.lang.NonNullFields; diff --git a/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechMessageTests.java b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechMessageTests.java new file mode 100644 index 00000000000..52bb586cca5 --- /dev/null +++ b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechMessageTests.java @@ -0,0 +1,49 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link SpeechMessage}. + * + * @author Thomas Vitale + */ +class SpeechMessageTests { + + @Test + void whenBuildSpeechMessage() { + var text = "They're taking the hobbits to Isengard!"; + var speechMessage = new SpeechMessage(text); + assertThat(speechMessage.getText()).isEqualTo(text); + } + + @Test + void whenTextIsNullThenThrow() { + assertThatThrownBy(() -> new SpeechMessage(null)).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text cannot be null or empty"); + } + + @Test + void whenTextIsEmptyThenThrow() { + assertThatThrownBy(() -> new SpeechMessage("")).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("text cannot be null or empty"); + } + +} \ No newline at end of file diff --git a/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechOptionsBuilderTests.java b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechOptionsBuilderTests.java new file mode 100644 index 00000000000..472db7d1543 --- /dev/null +++ b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechOptionsBuilderTests.java @@ -0,0 +1,36 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link SpeechOptionsBuilder}. + * + * @author Thomas Vitale + */ +class SpeechOptionsBuilderTests { + + @Test + void whenBuildSpeechOptions() { + var model = "speechinator"; + var speechOptions = SpeechOptionsBuilder.builder().withModel(model).build(); + assertThat(speechOptions.getModel()).isEqualTo(model); + } + +} \ No newline at end of file diff --git a/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechPromptTests.java b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechPromptTests.java new file mode 100644 index 00000000000..fe347134237 --- /dev/null +++ b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechPromptTests.java @@ -0,0 +1,34 @@ +package org.springframework.ai.audio.speech; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link SpeechPrompt}. + * + * @author Thomas Vitale + */ +class SpeechPromptTests { + + @Test + void whenMessageIsNullThenThrow() { + assertThatThrownBy(() -> new SpeechPrompt(null, SpeechOptionsBuilder.builder().build())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("message cannot be null"); + } + + @Test + void whenOptionsIsNullThenThrow() { + assertThatThrownBy(() -> new SpeechPrompt(new SpeechMessage("hobbits"), null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("speechOptions cannot be null"); + } + + @Test + void whenMessageIsNullInBuilderThenThrow() { + assertThatThrownBy(() -> SpeechPrompt.builder().build()).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("message cannot be null"); + } + +} \ No newline at end of file diff --git a/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechResponseTests.java b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechResponseTests.java new file mode 100644 index 00000000000..1588c5dac92 --- /dev/null +++ b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechResponseTests.java @@ -0,0 +1,39 @@ +package org.springframework.ai.audio.speech; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link SpeechResponse}. + * + * @author Thomas Vitale + */ +class SpeechResponseTests { + + @Test + void whenBuildSpeechResponse() { + var speech = new Speech(new byte[0]); + var speechResponse = new SpeechResponse(speech); + assertThat(speechResponse.getResult()).isEqualTo(speech); + assertThat(speechResponse.getResults()).isEqualTo(List.of(speech)); + assertThat(speechResponse.getMetadata()).isNotNull(); + } + + @Test + void whenSpeechIsNullThenThrow() { + assertThatThrownBy(() -> new SpeechResponse(null)).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("speech cannot be null"); + } + + @Test + void whenMetadataIsNullThenThrow() { + assertThatThrownBy(() -> new SpeechResponse(new Speech(new byte[0]), null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("speechResponseMetadata cannot be null"); + } + +} diff --git a/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechTests.java b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechTests.java new file mode 100644 index 00000000000..bbac4fdbca7 --- /dev/null +++ b/spring-ai-core/src/test/java/org/springframework/ai/audio/speech/SpeechTests.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.audio.speech; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link Speech}. + * + * @author Thomas Vitale + */ +class SpeechTests { + + @Test + void whenBuildSpeech() { + var audio = new byte[0]; + var speech = new Speech(audio); + assertThat(speech.getOutput()).isEqualTo(audio); + assertThat(speech.getMetadata()).isNotNull(); + } + + @Test + void whenSpeechIsNullThenThrow() { + assertThatThrownBy(() -> new Speech(null)).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("audio cannot be null"); + } + + @Test + void whenMetadataIsNullThenThrow() { + assertThatThrownBy(() -> new Speech(new byte[0], null)).isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("speechMetadata cannot be null"); + } + +} \ No newline at end of file diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/upgrade-notes.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/upgrade-notes.adoc index 3978b06288b..e66e4244d51 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/upgrade-notes.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/upgrade-notes.adoc @@ -1,7 +1,13 @@ [[upgrade-notes]] = Upgrading Notes -== Upgrading to 1.0.0.RC1 +== Upgrading to 1.0.0.M4 + +* The `SpeechModel` API, previously living in the `spring-ai-openai` module, has been generalized +and consolidated in the `spring-ai-core` module, ready for supporting more implementations. +As a consequence, the `OpenAiSpeechModel` implementation has been refactored to implement the new APIs. + +== Upgrading to 1.0.0.M3 * The type of the portable chat options (`frequencyPenalty`, `presencePenalty`, `temperature`, `topP`) has been changed from `Float` to `Double`. diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/openai/OpenAiAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/openai/OpenAiAutoConfiguration.java index e0586b50e20..e52983cf6a6 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/openai/OpenAiAutoConfiguration.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/openai/OpenAiAutoConfiguration.java @@ -213,7 +213,7 @@ public OpenAiAudioSpeechModel openAiAudioSpeechClient(OpenAiConnectionProperties var openAiAudioApi = new OpenAiAudioApi(resolved.baseUrl(), resolved.apiKey(), resolved.headers(), restClientBuilder, webClientBuilder, responseErrorHandler); - return new OpenAiAudioSpeechModel(openAiAudioApi, speechProperties.getOptions()); + return new OpenAiAudioSpeechModel(openAiAudioApi, speechProperties.getOptions(), retryTemplate); } @Bean