diff --git a/src/AiClient.php b/src/AiClient.php index ebfeec75..c1941fd1 100644 --- a/src/AiClient.php +++ b/src/AiClient.php @@ -362,6 +362,30 @@ public static function generateSpeechResult( return self::getConfiguredPromptBuilder($prompt, $modelOrConfig, $registry)->generateSpeechResult(); } + /** + * Generates sound using the traditional API approach. + * + * @since 1.4.0 + * + * @param Prompt $prompt The prompt content. + * @param ModelInterface|ModelConfig|null $modelOrConfig Optional specific model to use, + * or model configuration for auto-discovery, + * or null for defaults. + * @param ProviderRegistry|null $registry Optional custom registry. If null, uses default. + * @return GenerativeAiResult The generation result. + * + * @throws \InvalidArgumentException If the prompt format is invalid. + * @throws \RuntimeException If no suitable model is found. + */ + public static function generateSoundResult( + $prompt, + $modelOrConfig = null, + ?ProviderRegistry $registry = null + ): GenerativeAiResult { + self::validateModelOrConfigParameter($modelOrConfig); + return self::getConfiguredPromptBuilder($prompt, $modelOrConfig, $registry)->generateSoundResult(); + } + /** * Generates a video using the traditional API approach. * diff --git a/src/Builders/PromptBuilder.php b/src/Builders/PromptBuilder.php index 130fc574..24f120ff 100644 --- a/src/Builders/PromptBuilder.php +++ b/src/Builders/PromptBuilder.php @@ -25,6 +25,7 @@ use WordPress\AiClient\Providers\Models\DTO\ModelRequirements; use WordPress\AiClient\Providers\Models\Enums\CapabilityEnum; use WordPress\AiClient\Providers\Models\ImageGeneration\Contracts\ImageGenerationModelInterface; +use WordPress\AiClient\Providers\Models\SoundGeneration\Contracts\SoundGenerationModelInterface; use WordPress\AiClient\Providers\Models\SpeechGeneration\Contracts\SpeechGenerationModelInterface; use WordPress\AiClient\Providers\Models\TextGeneration\Contracts\TextGenerationModelInterface; use WordPress\AiClient\Providers\Models\TextToSpeechConversion\Contracts\TextToSpeechConversionModelInterface; @@ -758,6 +759,9 @@ private function inferCapabilityFromModelInterfaces(ModelInterface $model): ?Cap if ($model instanceof SpeechGenerationModelInterface) { return CapabilityEnum::speechGeneration(); } + if ($model instanceof SoundGenerationModelInterface) { + return CapabilityEnum::soundGeneration(); + } if ($model instanceof VideoGenerationModelInterface) { return CapabilityEnum::videoGeneration(); } @@ -871,6 +875,18 @@ public function isSupportedForSpeechGeneration(): bool return $this->isSupported(CapabilityEnum::speechGeneration()); } + /** + * Checks if the prompt is supported for sound generation. + * + * @since 1.4.0 + * + * @return bool True if sound generation is supported. + */ + public function isSupportedForSoundGeneration(): bool + { + return $this->isSupported(CapabilityEnum::soundGeneration()); + } + /** * Checks if the prompt is supported for music generation. * @@ -1012,6 +1028,18 @@ private function executeModelGeneration( return $model->generateSpeechResult($messages); } + if ($capability->isSoundGeneration()) { + if (!$model instanceof SoundGenerationModelInterface) { + throw new RuntimeException( + sprintf( + 'Model "%s" does not support sound generation.', + $model->metadata()->getId() + ) + ); + } + return $model->generateSoundResult($messages); + } + if ($capability->isVideoGeneration()) { if (!$model instanceof VideoGenerationModelInterface) { throw new RuntimeException( @@ -1102,6 +1130,24 @@ public function convertTextToSpeechResult(): GenerativeAiResult return $this->generateResult(CapabilityEnum::textToSpeechConversion()); } + /** + * Generates a sound result from the prompt. + * + * @since 1.4.0 + * + * @return GenerativeAiResult The generated result containing sound audio candidates. + * @throws InvalidArgumentException If the prompt or model validation fails. + * @throws RuntimeException If the model doesn't support sound generation. + */ + public function generateSoundResult(): GenerativeAiResult + { + // Include audio in output modalities + $this->includeOutputModalities(ModalityEnum::audio()); + + // Generate and return the result with sound generation capability + return $this->generateResult(CapabilityEnum::soundGeneration()); + } + /** * Generates a video result from the prompt. * @@ -1251,6 +1297,39 @@ public function generateSpeeches(?int $candidateCount = null): array return $this->generateSpeechResult()->toFiles(); } + /** + * Generates sound from the prompt. + * + * @since 1.4.0 + * + * @return File The generated sound audio file. + * @throws InvalidArgumentException If the prompt or model validation fails. + * @throws RuntimeException If no audio is generated. + */ + public function generateSound(): File + { + return $this->generateSoundResult()->toFile(); + } + + /** + * Generates multiple sound outputs from the prompt. + * + * @since 1.4.0 + * + * @param int|null $candidateCount The number of sound outputs to generate. + * @return list The generated sound audio files. + * @throws InvalidArgumentException If the prompt or model validation fails. + * @throws RuntimeException If no audio is generated. + */ + public function generateSounds(?int $candidateCount = null): array + { + if ($candidateCount !== null) { + $this->usingCandidateCount($candidateCount); + } + + return $this->generateSoundResult()->toFiles(); + } + /** * Generates a video from the prompt. * diff --git a/src/Providers/Models/Enums/CapabilityEnum.php b/src/Providers/Models/Enums/CapabilityEnum.php index 43fc8403..f5b50e5c 100644 --- a/src/Providers/Models/Enums/CapabilityEnum.php +++ b/src/Providers/Models/Enums/CapabilityEnum.php @@ -16,6 +16,7 @@ * @method static self textToSpeechConversion() Creates an instance for TEXT_TO_SPEECH_CONVERSION capability. * @method static self speechGeneration() Creates an instance for SPEECH_GENERATION capability. * @method static self musicGeneration() Creates an instance for MUSIC_GENERATION capability. + * @method static self soundGeneration() Creates an instance for SOUND_GENERATION capability. * @method static self videoGeneration() Creates an instance for VIDEO_GENERATION capability. * @method static self embeddingGeneration() Creates an instance for EMBEDDING_GENERATION capability. * @method static self chatHistory() Creates an instance for CHAT_HISTORY capability. @@ -24,6 +25,7 @@ * @method bool isTextToSpeechConversion() Checks if the capability is TEXT_TO_SPEECH_CONVERSION. * @method bool isSpeechGeneration() Checks if the capability is SPEECH_GENERATION. * @method bool isMusicGeneration() Checks if the capability is MUSIC_GENERATION. + * @method bool isSoundGeneration() Checks if the capability is SOUND_GENERATION. * @method bool isVideoGeneration() Checks if the capability is VIDEO_GENERATION. * @method bool isEmbeddingGeneration() Checks if the capability is EMBEDDING_GENERATION. * @method bool isChatHistory() Checks if the capability is CHAT_HISTORY. @@ -55,6 +57,11 @@ class CapabilityEnum extends AbstractEnum */ public const MUSIC_GENERATION = 'music_generation'; + /** + * Sound generation capability. + */ + public const SOUND_GENERATION = 'sound_generation'; + /** * Video generation capability. */ diff --git a/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationModelInterface.php b/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationModelInterface.php new file mode 100644 index 00000000..bf82dedd --- /dev/null +++ b/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationModelInterface.php @@ -0,0 +1,28 @@ + $prompt Array of messages containing the sound generation prompt. + * @return GenerativeAiResult Result containing generated sound audio. + */ + public function generateSoundResult(array $prompt): GenerativeAiResult; +} diff --git a/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationOperationModelInterface.php b/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationOperationModelInterface.php new file mode 100644 index 00000000..dcb3b293 --- /dev/null +++ b/src/Providers/Models/SoundGeneration/Contracts/SoundGenerationOperationModelInterface.php @@ -0,0 +1,28 @@ + $prompt Array of messages containing the sound generation prompt. + * @return GenerativeAiOperation The initiated sound generation operation. + */ + public function generateSoundOperation(array $prompt): GenerativeAiOperation; +} diff --git a/tests/unit/AiClientTest.php b/tests/unit/AiClientTest.php index c573c756..af4a4cfa 100644 --- a/tests/unit/AiClientTest.php +++ b/tests/unit/AiClientTest.php @@ -514,6 +514,7 @@ public function testTraditionalMethodsAcceptModelConfig(): void 'generateImageResult', 'convertTextToSpeechResult', 'generateSpeechResult', + 'generateSoundResult', 'generateVideoResult' ]; @@ -573,6 +574,7 @@ public function aiClientMethodsProvider(): array 'generateImageResult' => ['generateImageResult'], 'convertTextToSpeechResult' => ['convertTextToSpeechResult'], 'generateSpeechResult' => ['generateSpeechResult'], + 'generateSoundResult' => ['generateSoundResult'], 'generateVideoResult' => ['generateVideoResult'], ]; } @@ -705,6 +707,7 @@ public function testModelConfigPassedToAllMethods(): void 'generateImageResult', 'convertTextToSpeechResult', 'generateSpeechResult', + 'generateSoundResult', 'generateVideoResult' ]; diff --git a/tests/unit/Builders/PromptBuilderTest.php b/tests/unit/Builders/PromptBuilderTest.php index ce68a223..a1472e4d 100644 --- a/tests/unit/Builders/PromptBuilderTest.php +++ b/tests/unit/Builders/PromptBuilderTest.php @@ -27,6 +27,7 @@ use WordPress\AiClient\Providers\Models\DTO\SupportedOption; use WordPress\AiClient\Providers\Models\Enums\CapabilityEnum; use WordPress\AiClient\Providers\Models\Enums\OptionEnum; +use WordPress\AiClient\Providers\Models\SoundGeneration\Contracts\SoundGenerationModelInterface; use WordPress\AiClient\Providers\Models\SpeechGeneration\Contracts\SpeechGenerationModelInterface; use WordPress\AiClient\Providers\Models\TextGeneration\Contracts\TextGenerationModelInterface; use WordPress\AiClient\Providers\Models\TextToSpeechConversion\Contracts\TextToSpeechConversionModelInterface; @@ -145,6 +146,69 @@ public function generateSpeechResult(array $prompt): GenerativeAiResult }; } + /** + * Creates a mock model that implements both ModelInterface and SoundGenerationModelInterface. + * + * @param ModelMetadata $metadata The metadata for the model. + * @param GenerativeAiResult $result The result to return from generation. + * @return ModelInterface&SoundGenerationModelInterface The mock model. + */ + private function createSoundGenerationModel(ModelMetadata $metadata, GenerativeAiResult $result): ModelInterface + { + $providerMetadata = new ProviderMetadata( + 'mock-provider', + 'Mock Provider', + ProviderTypeEnum::cloud() + ); + + return new class ( + $metadata, + $providerMetadata, + $result + ) implements ModelInterface, SoundGenerationModelInterface { + private ModelMetadata $metadata; + private ProviderMetadata $providerMetadata; + private GenerativeAiResult $result; + private ModelConfig $config; + + public function __construct( + ModelMetadata $metadata, + ProviderMetadata $providerMetadata, + GenerativeAiResult $result + ) { + $this->metadata = $metadata; + $this->providerMetadata = $providerMetadata; + $this->result = $result; + $this->config = new ModelConfig(); + } + + public function metadata(): ModelMetadata + { + return $this->metadata; + } + + public function providerMetadata(): ProviderMetadata + { + return $this->providerMetadata; + } + + public function setConfig(ModelConfig $config): void + { + $this->config = $config; + } + + public function getConfig(): ModelConfig + { + return $this->config; + } + + public function generateSoundResult(array $prompt): GenerativeAiResult + { + return $this->result; + } + }; + } + /** * Creates a mock model that implements both ModelInterface and VideoGenerationModelInterface. * @@ -1980,6 +2044,69 @@ public function testGenerateSpeechResult(): void $this->assertTrue($modalities[0]->isAudio()); } + /** + * Tests generateSoundResult method. + * + * @return void + */ + public function testGenerateSoundResult(): void + { + $result = new GenerativeAiResult( + 'test-result', + [new Candidate( + new ModelMessage([new MessagePart(new File('data:audio/wav;base64,UklGRigE=', 'audio/wav'))]), + FinishReasonEnum::stop() + )], + new TokenUsage(100, 50, 150), + $this->createTestProviderMetadata(), + $this->createTestTextModelMetadata() + ); + + $metadata = $this->createMock(ModelMetadata::class); + $metadata->method('getId')->willReturn('test-model'); + + $model = $this->createSoundGenerationModel($metadata, $result); + + $builder = new PromptBuilder($this->registry, 'Generate sound'); + $builder->usingModel($model); + + $actualResult = $builder->generateSoundResult(); + $this->assertSame($result, $actualResult); + + // Verify audio modality was included + $reflection = new \ReflectionClass($builder); + $configProperty = $reflection->getProperty('modelConfig'); + $configProperty->setAccessible(true); + /** @var ModelConfig $config */ + $config = $configProperty->getValue($builder); + + $modalities = $config->getOutputModalities(); + $this->assertNotNull($modalities); + $this->assertTrue($modalities[0]->isAudio()); + } + + /** + * Tests generateSoundResult throws exception for unsupported model. + * + * @return void + */ + public function testGenerateSoundResultThrowsExceptionForUnsupportedModel(): void + { + $metadata = $this->createMock(ModelMetadata::class); + $metadata->method('getId')->willReturn('test-model'); + + $model = $this->createMock(ModelInterface::class); + $model->method('metadata')->willReturn($metadata); + + $builder = new PromptBuilder($this->registry, 'Generate sound'); + $builder->usingModel($model); + + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage('Model "test-model" does not support sound generation'); + + $builder->generateSoundResult(); + } + /** * Tests convertTextToSpeechResult method. * @@ -2737,6 +2864,84 @@ public function testGenerateSpeeches(): void $this->assertSame($files[2], $speechFiles[2]); } + /** + * Tests generateSound method. + * + * @return void + */ + public function testGenerateSound(): void + { + $file = new File('https://example.com/sound.mp3', 'audio/mp3'); + $messagePart = new MessagePart($file); + $message = new Message(MessageRoleEnum::model(), [$messagePart]); + $candidate = new Candidate($message, FinishReasonEnum::stop()); + + $result = new GenerativeAiResult( + 'test-result', + [$candidate], + new TokenUsage(100, 50, 150), + $this->createTestProviderMetadata(), + $this->createTestTextModelMetadata() + ); + + $metadata = $this->createMock(ModelMetadata::class); + $metadata->method('getId')->willReturn('test-model'); + + $model = $this->createSoundGenerationModel($metadata, $result); + + $builder = new PromptBuilder($this->registry, 'Generate sound'); + $builder->usingModel($model); + + $soundFile = $builder->generateSound(); + $this->assertSame($file, $soundFile); + } + + /** + * Tests generateSounds method. + * + * @return void + */ + public function testGenerateSounds(): void + { + $files = [ + new File('https://example.com/sound1.mp3', 'audio/mp3'), + new File('https://example.com/sound2.mp3', 'audio/mp3'), + new File('https://example.com/sound3.mp3', 'audio/mp3'), + ]; + + $candidates = []; + foreach ($files as $file) { + $candidates[] = new Candidate( + new Message(MessageRoleEnum::model(), [new MessagePart($file)]), + FinishReasonEnum::stop(), + 10 + ); + } + + $result = new GenerativeAiResult( + 'test-result-id', + $candidates, + new TokenUsage(100, 50, 150), + $this->createTestProviderMetadata(), + $this->createTestTextModelMetadata() + ); + + $metadata = $this->createMock(ModelMetadata::class); + $metadata->method('getId')->willReturn('test-model'); + + $model = $this->createSoundGenerationModel($metadata, $result); + + $builder = new PromptBuilder($this->registry, 'Generate sound'); + $builder->usingModel($model); + + $soundFiles = $builder->generateSounds(3); + + $this->assertCount(3, $soundFiles); + $this->assertSame($files[0], $soundFiles[0]); + $this->assertSame($files[1], $soundFiles[1]); + $this->assertSame($files[2], $soundFiles[2]); + } + /** * Tests appendPartToMessages creates new user message when empty. * @@ -3406,6 +3611,40 @@ public function testIsSupportedForSpeechGeneration(): void $this->assertTrue($builder->isSupportedForSpeechGeneration()); } + /** + * Tests isSupportedForSoundGeneration convenience method. + * + * @return void + */ + public function testIsSupportedForSoundGeneration(): void + { + $metadata = $this->createMock(ModelMetadata::class); + $metadata->method('getId')->willReturn('sound-model'); + $metadata->method('getSupportedCapabilities')->willReturn([ + CapabilityEnum::soundGeneration() + ]); + $metadata->method('getSupportedOptions')->willReturn([ + new SupportedOption(OptionEnum::inputModalities(), [ + [ModalityEnum::text()], + [ModalityEnum::text(), ModalityEnum::image()] + ]) + ]); + + $result = new GenerativeAiResult('test-id', [ + new Candidate( + new ModelMessage([new MessagePart(new File('https://example.com/sound.mp3', 'audio/mp3'))]), + FinishReasonEnum::stop() + ) + ], new TokenUsage(10, 5, 15), $this->createTestProviderMetadata(), $this->createTestTextModelMetadata()); + + $model = $this->createSoundGenerationModel($metadata, $result); + + $builder = new PromptBuilder($this->registry, 'Generate sound'); + $builder->usingModel($model); + + $this->assertTrue($builder->isSupportedForSoundGeneration()); + } + /** * Tests generateResult with provider specified. * diff --git a/tests/unit/Providers/Models/Enums/CapabilityEnumTest.php b/tests/unit/Providers/Models/Enums/CapabilityEnumTest.php index c0f006da..fbebb68a 100644 --- a/tests/unit/Providers/Models/Enums/CapabilityEnumTest.php +++ b/tests/unit/Providers/Models/Enums/CapabilityEnumTest.php @@ -38,6 +38,7 @@ protected function getExpectedValues(): array 'TEXT_TO_SPEECH_CONVERSION' => 'text_to_speech_conversion', 'SPEECH_GENERATION' => 'speech_generation', 'MUSIC_GENERATION' => 'music_generation', + 'SOUND_GENERATION' => 'sound_generation', 'VIDEO_GENERATION' => 'video_generation', 'EMBEDDING_GENERATION' => 'embedding_generation', 'CHAT_HISTORY' => 'chat_history',