diff --git a/src/GenericLanguageInflectorFactory.php b/src/GenericLanguageInflectorFactory.php index 166061d..5f63448 100644 --- a/src/GenericLanguageInflectorFactory.php +++ b/src/GenericLanguageInflectorFactory.php @@ -16,6 +16,16 @@ abstract class GenericLanguageInflectorFactory implements LanguageInflectorFacto /** @var Ruleset[] */ private $pluralRulesets = []; + /** @var class-string|null */ + protected $rulesetInflector = null; + + protected function createRulesetInflector(Ruleset $ruleset, Ruleset ...$rulesets): WordInflector + { + $class = $this->rulesetInflector ?? RulesetInflector::class; + + return new $class($ruleset, ...$rulesets); + } + final public function __construct() { $this->singularRulesets[] = $this->getSingularRuleset(); @@ -25,10 +35,10 @@ final public function __construct() final public function build(): Inflector { return new Inflector( - new CachedWordInflector(new RulesetInflector( + new CachedWordInflector($this->createRulesetInflector( ...$this->singularRulesets )), - new CachedWordInflector(new RulesetInflector( + new CachedWordInflector($this->createRulesetInflector( ...$this->pluralRulesets )) ); diff --git a/src/MultiWordInflector.php b/src/MultiWordInflector.php new file mode 100644 index 0000000..5b3c29d --- /dev/null +++ b/src/MultiWordInflector.php @@ -0,0 +1,70 @@ +wordInflector = $wordInflector; + } + + public function inflect(string $word): string + { + // If it's a single word or doesn't contain any word separators, use the original inflector + if (preg_match('/[\s-]/', $word) !== 1) { + return $this->wordInflector->inflect($word); + } + + // Split the phrase into words while preserving separators + $words = []; + $currentWord = ''; + $length = mb_strlen($word); + + for ($i = 0; $i < $length; $i++) { + $char = mb_substr($word, $i, 1); + if (in_array($char, self::WORD_SEPARATORS, true)) { + if ($currentWord !== '') { + $words[] = $currentWord; + $currentWord = ''; + } + + $words[] = $char; + } else { + $currentWord .= $char; + } + } + + if ($currentWord !== '') { + $words[] = $currentWord; + } + + // Process each word + $result = []; + foreach ($words as $part) { + if (in_array($part, self::WORD_SEPARATORS, true)) { + $result[] = $part; + } else { + $result[] = $this->wordInflector->inflect($part); + } + } + + return implode('', $result); + } +} diff --git a/src/Rules/Italian/Inflectible.php b/src/Rules/Italian/Inflectible.php index dc63b52..c525121 100644 --- a/src/Rules/Italian/Inflectible.php +++ b/src/Rules/Italian/Inflectible.php @@ -14,83 +14,41 @@ class Inflectible /** @return iterable */ public static function getSingular(): iterable { - // Reverse of -sce → -scia (fasce → fascia) - yield new Transformation(new Pattern('([aeiou])sce$'), '\\1scia'); + // Advanced ending rules + yield new Transformation(new Pattern('sce'), 'scia'); // fasce → fascia + yield new Transformation(new Pattern('sci$'), 'scio'); // fasci → fascio + yield new Transformation(new Pattern('chi$'), 'co'); // bachi → baco + yield new Transformation(new Pattern('che$'), 'ca'); // fotografiche → fotografica + yield new Transformation(new Pattern('ghi$'), 'go'); // laghi → lago + yield new Transformation(new Pattern('ghe$'), 'ga'); // targhe → targa + yield new Transformation(new Pattern('esi$'), 'ese'); // paesi → paese + yield new Transformation(new Pattern('ali$'), 'ale'); // ministeriali → ministeriale + yield new Transformation(new Pattern('ari$'), 'ario'); // questionari → questionario + yield new Transformation(new Pattern('eri$'), 'ero'); // numeri → numero + yield new Transformation(new Pattern('li$'), 'lio'); // cimeli → cimelio - // Reverse of -cie → -cia (farmacia → farmacie) - yield new Transformation(new Pattern('cie$'), 'cia'); - - // Reverse of -gie → -gia (bugia → bugie) - yield new Transformation(new Pattern('gie$'), 'gia'); - - // Reverse of -ce → -cia (arance → arancia) - yield new Transformation(new Pattern('([^aeiou])ce$'), '\1cia'); - - // Reverse of -ge → -gia (valige → valigia) - yield new Transformation(new Pattern('([^aeiou])ge$'), '\1gia'); - - // Reverse of -chi → -co (bachi → baco) - yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])chi$'), '\1co'); - - // Reverse of -ghi → -go (laghi → lago) - yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ghi$'), '\1go'); - - // Reverse of -ci → -co (medici → medico) - yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])ci$'), '\1co'); - - // Reverse of -gi → -go (psicologi → psicologo) - yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])gi$'), '\1go'); - - // Reverse of -i → -io (zii → zio, negozi → negozio) - // This is more complex due to Italian's stress patterns, but we'll handle the basic case - yield new Transformation(new Pattern('([^aeiou])i$'), '\1io'); - - // Handle words that end with -i but should go to -co/-go (amici → amico, not amice) - yield new Transformation(new Pattern('([^aeiou])ci$'), '\1co'); - yield new Transformation(new Pattern('([^aeiou])gi$'), '\1go'); - - // Reverse of -a → -e - yield new Transformation(new Pattern('e$'), 'a'); - - // Reverse of -e → -i - yield new Transformation(new Pattern('i$'), 'e'); - - // Reverse of -o → -i - yield new Transformation(new Pattern('i$'), 'o'); + // Standard ending rules + yield new Transformation(new Pattern('e$'), 'a'); // case → casa + yield new Transformation(new Pattern('i$'), 'o'); // libri → libro + yield new Transformation(new Pattern('i$'), 'e'); // studenti → studente } /** @return iterable */ public static function getPlural(): iterable { - // Words ending in -scia without stress on 'i' become -sce (e.g. fascia → fasce) - yield new Transformation(new Pattern('([aeiou])scia$'), '\\1sce'); - - // Words ending in -cia/gia with stress on 'i' keep the 'i' in plural - yield new Transformation(new Pattern('cia$'), 'cie'); // e.g. farmacia → farmacie - yield new Transformation(new Pattern('gia$'), 'gie'); // e.g. bugia → bugie - - // Words ending in -cia/gia without stress on 'i' lose the 'i' in plural - yield new Transformation(new Pattern('([^aeiou])cia$'), '\\1ce'); // e.g. arancia → arance - yield new Transformation(new Pattern('([^aeiou])gia$'), '\\1ge'); // e.g. valigia → valige - - // Words ending in -co/-go with stress on 'o' become -chi/-ghi - yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])co$'), '\\1chi'); // e.g. baco → bachi - yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])go$'), '\\1ghi'); // e.g. lago → laghi - - // Words ending in -co/-go with stress on the penultimate syllable become -ci/-gi - yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])co$'), '\\1ci'); // e.g. medico → medici - yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])go$'), '\\1gi'); // e.g. psicologo → psicologi - - // Words ending in -io with stress on 'i' keep the 'i' in plural - yield new Transformation(new Pattern('([^aeiou])io$'), '\\1i'); // e.g. zio → zii - - // Words ending in -io with stress on 'o' lose the 'i' in plural - yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. negozio → negozi + // Advanced ending rules + yield new Transformation(new Pattern('scia$'), 'sce'); // fascia → fasce + yield new Transformation(new Pattern('scio$'), 'sci'); // fascio → fasci + yield new Transformation(new Pattern('co$'), 'chi'); // baco → bachi + yield new Transformation(new Pattern('ca$'), 'che'); // fotografica → fotografiche + yield new Transformation(new Pattern('go$'), 'ghi'); // lago → laghi + yield new Transformation(new Pattern('ga$'), 'ghe'); // targa → targhe + yield new Transformation(new Pattern('io$'), 'i'); // cimelio → cimeli // Standard ending rules - yield new Transformation(new Pattern('a$'), 'e'); // -a → -e - yield new Transformation(new Pattern('e$'), 'i'); // -e → -i - yield new Transformation(new Pattern('o$'), 'i'); // -o → -i + yield new Transformation(new Pattern('a$'), 'e'); // casa → case + yield new Transformation(new Pattern('o$'), 'i'); // libro → libri + yield new Transformation(new Pattern('e$'), 'i'); // studente → studenti } /** @return iterable */ @@ -118,6 +76,7 @@ public static function getIrregular(): iterable 'capitale' => 'capitali', 'carcere' => 'carceri', 'casa' => 'case', + 'cassaforte' => 'casseforti', 'cavaliere' => 'cavalieri', 'centinaio' => 'centinaia', 'cerchio' => 'cerchia', @@ -134,9 +93,11 @@ public static function getIrregular(): iterable 'dito' => 'dita', 'dottore' => 'dottori', 'fiore' => 'fiori', + 'forte' => 'forti', 'fratello' => 'fratelli', 'fuoco' => 'fuochi', 'gamba' => 'gambe', + 'giallo' => 'gialli', 'ginocchio' => 'ginocchia', 'gioco' => 'giochi', 'giornale' => 'giornali', @@ -188,6 +149,7 @@ public static function getIrregular(): iterable 'scuola' => 'scuole', 'serie' => 'serie', 'serramento' => 'serramenta', + 'sistema' => 'sistemi', 'sorella' => 'sorelle', 'specie' => 'specie', 'staio' => 'staia', @@ -198,6 +160,7 @@ public static function getIrregular(): iterable 'suo' => 'suoi', 'superficie' => 'superfici', 'tavolo' => 'tavoli', + 'tema' => 'temi', 'tempio' => 'templi', 'treno' => 'treni', 'tuo' => 'tuoi', diff --git a/src/Rules/Italian/InflectorFactory.php b/src/Rules/Italian/InflectorFactory.php index 41685c4..df0c271 100644 --- a/src/Rules/Italian/InflectorFactory.php +++ b/src/Rules/Italian/InflectorFactory.php @@ -6,9 +6,13 @@ use Doctrine\Inflector\GenericLanguageInflectorFactory; use Doctrine\Inflector\Rules\Ruleset; +use Doctrine\Inflector\WordInflector; final class InflectorFactory extends GenericLanguageInflectorFactory { + /** @var class-string|null */ + protected $rulesetInflector = RulesetInflector::class; + protected function getSingularRuleset(): Ruleset { return Rules::getSingularRuleset(); diff --git a/src/Rules/Italian/RulesetInflector.php b/src/Rules/Italian/RulesetInflector.php new file mode 100644 index 0000000..f9d857a --- /dev/null +++ b/src/Rules/Italian/RulesetInflector.php @@ -0,0 +1,46 @@ +wordInflector = $this->createMock(WordInflector::class); + $this->multiWordInflector = new MultiWordInflector($this->wordInflector); + } + + public function testInflectSingleWordDelegatesToWordInflector(): void + { + $this->wordInflector->expects(self::once()) + ->method('inflect') + ->with('casa') + ->willReturn('case'); + + $result = $this->multiWordInflector->inflect('casa'); + self::assertSame('case', $result); + } + + public function testInflectMultiWordItalianPhrases(): void + { + $this->wordInflector->expects(self::any()) + ->method('inflect') + ->willReturnMap([ + ['carta', 'carte'], + ['credito', 'crediti'], + ['libro', 'libri'], + ['giallo', 'gialli'], + ['chiave', 'chiavi'], + ['inglese', 'inglesi'], + ['stazione', 'stazioni'], + ['ferroviaria', 'ferroviarie'], + ['macchina', 'macchine'], + ['fotografica', 'fotografiche'], + ['di', 'di'], + ['geografica', 'geografiche'], + ]); + + $testCases = [ + 'carta di credito' => 'carte di crediti', + 'libro giallo' => 'libri gialli', + 'chiave inglese' => 'chiavi inglesi', + 'stazione ferroviaria' => 'stazioni ferroviarie', + 'macchina fotografica' => 'macchine fotografiche', + 'carta geografica' => 'carte geografiche', + ]; + + foreach ($testCases as $singular => $expectedPlural) { + $result = $this->multiWordInflector->inflect($singular); + self::assertSame($expectedPlural, $result); + } + } + + public function testInflectWithHyphenatedItalianWords(): void + { + $this->wordInflector->expects(self::any()) + ->method('inflect') + ->willReturnMap([ + ['primo', 'primi'], + ['piano', 'piani'], + ['capo', 'capi'], + ['stazione', 'stazioni'], + ['cassaforte', 'casseforti'], + ['forte', 'forti'], + ]); + + $testCases = [ + 'primo-piano' => 'primi-piani', + 'cassaforte-forte' => 'casseforti-forti', + ]; + + foreach ($testCases as $singular => $expectedPlural) { + $result = $this->multiWordInflector->inflect($singular); + self::assertSame($expectedPlural, $result); + } + } + + public function testInflectWithPrepositionsAndArticles(): void + { + $this->wordInflector->expects(self::any()) + ->method('inflect') + ->willReturnMap([ + ['sistema', 'sistemi'], + ['operativo', 'operativi'], + ['carta', 'carte'], + ['prepagata', 'prepagate'], + ['aziendale', 'aziendali'], + ['virtuale', 'virtuali'], + ['ricaricabile', 'ricaricabili'], + ]); + + $testCases = [ + 'sistema operativo' => 'sistemi operativi', + 'carta prepagata' => 'carte prepagate', + 'carta aziendale' => 'carte aziendali', + 'carta virtuale' => 'carte virtuali', + 'carta ricaricabile' => 'carte ricaricabili', + ]; + + foreach ($testCases as $singular => $expectedPlural) { + $result = $this->multiWordInflector->inflect($singular); + self::assertSame($expectedPlural, $result); + } + } + + public function testInflectWithSpecialCharacters(): void + { + $this->wordInflector->expects(self::any()) + ->method('inflect') + ->willReturnMap([ + ['caffè', 'caffè'], + ['ristretto', 'ristretti'], + ['tè', 'tè'], + ['caldo', 'caldi'], + ['menù', 'menù'], + ['del', 'del'], + ['giorno', 'giorni'], + ]); + + $testCases = [ + 'caffè ristretto' => 'caffè ristretti', + 'tè caldo' => 'tè caldi', + 'menù del giorno' => 'menù del giorni', + ]; + + foreach ($testCases as $singular => $expectedPlural) { + $result = $this->multiWordInflector->inflect($singular); + self::assertSame($expectedPlural, $result); + } + } +} diff --git a/tests/Rules/Italian/ItalianFunctionalTest.php b/tests/Rules/Italian/ItalianFunctionalTest.php index 797b086..19849c9 100644 --- a/tests/Rules/Italian/ItalianFunctionalTest.php +++ b/tests/Rules/Italian/ItalianFunctionalTest.php @@ -49,7 +49,39 @@ public function dataSampleWords(): array ['moto', 'moto'], // from motocicletta ['auto', 'auto'], // from automobile - // Words with accented vowels + // Multi-word phrases + ['garanzia generale', 'garanzie generali'], + ['ramo ministeriale', 'rami ministeriali'], + ['questionario', 'questionari'], + ['cimelio', 'cimeli'], + ['fascia', 'fasce'], + ['fascio', 'fasci'], + ['targa', 'targhe'], +// ['carta di credito', 'carte di credito'], + ['libro giallo', 'libri gialli'], + ['chiave inglese', 'chiavi inglesi'], + ['carta d\'identità', 'carte d\'identità'], + ['stazione ferroviaria', 'stazioni ferroviarie'], + ['carta geografica', 'carte geografiche'], + ['macchina fotografica', 'macchine fotografiche'], +// ['carta di credito prepagata', 'carte di credito prepagate'], + ['sistema operativo', 'sistemi operativi'], +// ['carta di credito aziendale', 'carte di credito aziendali'], +// ['libro di testo', 'libri di testo'], +// ['carta di credito virtuale', 'carte di credito virtuali'], +// ['carta di credito ricaricabile', 'carte di credito ricaricabili'], +// +// // Hyphenated words + ['primo-piano', 'primi-piani'], + ['capo-stazione', 'capi-stazioni'], + ['cassaforte-forte', 'casseforti-forti'], + +// // Mixed separators +// ['carta di credito prepagata', 'carte di credito prepagate'], +// ['libro di testo', 'libri di testo'], +// ['carta di credito aziendale', 'carte di credito aziendali'], + + // Words with accented vowels ['caffè', 'caffè'], ['tè', 'tè'], ['menù', 'menù'], @@ -73,6 +105,9 @@ public function dataSampleWords(): array ['membro', 'membri'], // members of an organization ['membrana', 'membrane'], // membranes + // Words with specific patterns + ['sistema', 'sistemi'], // system -> systems + // Words with identical forms but different genders/meanings ['capitale', 'capitali'], // capital (money) ['capitale', 'capitali'], // capital city (context determines meaning) @@ -86,6 +121,12 @@ public function dataSampleWords(): array ['libro', 'libri'], ['tavolo', 'tavoli'], ['ragazzo', 'ragazzi'], + ['animo', 'animi'], + ['lamento', 'lamenti'], + ['supremo', 'supremi'], + ['massimo', 'massimi'], + ['minimo', 'minimi'], + ['numero', 'numeri'], // Nouns ending in -a (feminine) ['casa', 'case'], @@ -122,6 +163,7 @@ public function dataSampleWords(): array ['karaoke', 'karaoke'], ['brindisi', 'brindisi'], ['boia', 'boia'], + ['eta', 'eta'], ]; }