Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/GenericLanguageInflectorFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ abstract class GenericLanguageInflectorFactory implements LanguageInflectorFacto
/** @var Ruleset[] */
private $pluralRulesets = [];

/** @var class-string<WordInflector>|null */
protected $rulesetInflector = null;

protected function createRulesetInflector(Ruleset $ruleset, Ruleset ...$rulesets): WordInflector
{
$class = $this->rulesetInflector ?? RulesetInflector::class;

return new $class($ruleset, ...$rulesets);
}

final public function __construct()
{
$this->singularRulesets[] = $this->getSingularRuleset();
Expand All @@ -25,10 +35,10 @@ final public function __construct()
final public function build(): Inflector
{
return new Inflector(
new CachedWordInflector(new RulesetInflector(
new CachedWordInflector($this->createRulesetInflector(
...$this->singularRulesets
)),
new CachedWordInflector(new RulesetInflector(
new CachedWordInflector($this->createRulesetInflector(
...$this->pluralRulesets
))
);
Expand Down
70 changes: 70 additions & 0 deletions src/MultiWordInflector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php

declare(strict_types=1);

namespace Doctrine\Inflector;

use function implode;
use function in_array;
use function mb_strlen;
use function mb_substr;
use function preg_match;

/**
* Decorator that applies inflection to each word in a multi-word phrase.
*/
class MultiWordInflector implements WordInflector
{
public const WORD_SEPARATORS = [' ', '-'];

/** @var WordInflector */
private $wordInflector;

public function __construct(WordInflector $wordInflector)
{
$this->wordInflector = $wordInflector;
}

public function inflect(string $word): string
{
// If it's a single word or doesn't contain any word separators, use the original inflector
if (preg_match('/[\s-]/', $word) !== 1) {
return $this->wordInflector->inflect($word);
}

// Split the phrase into words while preserving separators
$words = [];
$currentWord = '';
$length = mb_strlen($word);

for ($i = 0; $i < $length; $i++) {
$char = mb_substr($word, $i, 1);
if (in_array($char, self::WORD_SEPARATORS, true)) {
if ($currentWord !== '') {
$words[] = $currentWord;
$currentWord = '';
}

$words[] = $char;
} else {
$currentWord .= $char;
}
}

if ($currentWord !== '') {
$words[] = $currentWord;
}

// Process each word
$result = [];
foreach ($words as $part) {
if (in_array($part, self::WORD_SEPARATORS, true)) {
$result[] = $part;
} else {
$result[] = $this->wordInflector->inflect($part);
}
}

return implode('', $result);
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class should come with tests

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have implemented the tests for the MultiWordInflector class.

101 changes: 32 additions & 69 deletions src/Rules/Italian/Inflectible.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,83 +14,41 @@ class Inflectible
/** @return iterable<Transformation> */
public static function getSingular(): iterable
{
// Reverse of -sce → -scia (fasce → fascia)
yield new Transformation(new Pattern('([aeiou])sce$'), '\\1scia');
// Advanced ending rules
yield new Transformation(new Pattern('sce'), 'scia'); // fasce → fascia
yield new Transformation(new Pattern('sci$'), 'scio'); // fasci → fascio
yield new Transformation(new Pattern('chi$'), 'co'); // bachi → baco
yield new Transformation(new Pattern('che$'), 'ca'); // fotografiche → fotografica
yield new Transformation(new Pattern('ghi$'), 'go'); // laghi → lago
yield new Transformation(new Pattern('ghe$'), 'ga'); // targhe → targa
yield new Transformation(new Pattern('esi$'), 'ese'); // paesi → paese
yield new Transformation(new Pattern('ali$'), 'ale'); // ministeriali → ministeriale
yield new Transformation(new Pattern('ari$'), 'ario'); // questionari → questionario
yield new Transformation(new Pattern('eri$'), 'ero'); // numeri → numero
yield new Transformation(new Pattern('li$'), 'lio'); // cimeli → cimelio

// Reverse of -cie → -cia (farmacia → farmacie)
yield new Transformation(new Pattern('cie$'), 'cia');

// Reverse of -gie → -gia (bugia → bugie)
yield new Transformation(new Pattern('gie$'), 'gia');

// Reverse of -ce → -cia (arance → arancia)
yield new Transformation(new Pattern('([^aeiou])ce$'), '\1cia');

// Reverse of -ge → -gia (valige → valigia)
yield new Transformation(new Pattern('([^aeiou])ge$'), '\1gia');

// Reverse of -chi → -co (bachi → baco)
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])chi$'), '\1co');

// Reverse of -ghi → -go (laghi → lago)
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ghi$'), '\1go');

// Reverse of -ci → -co (medici → medico)
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])ci$'), '\1co');

// Reverse of -gi → -go (psicologi → psicologo)
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])gi$'), '\1go');

// Reverse of -i → -io (zii → zio, negozi → negozio)
// This is more complex due to Italian's stress patterns, but we'll handle the basic case
yield new Transformation(new Pattern('([^aeiou])i$'), '\1io');

// Handle words that end with -i but should go to -co/-go (amici → amico, not amice)
yield new Transformation(new Pattern('([^aeiou])ci$'), '\1co');
yield new Transformation(new Pattern('([^aeiou])gi$'), '\1go');

// Reverse of -a → -e
yield new Transformation(new Pattern('e$'), 'a');

// Reverse of -e → -i
yield new Transformation(new Pattern('i$'), 'e');

// Reverse of -o → -i
yield new Transformation(new Pattern('i$'), 'o');
// Standard ending rules
yield new Transformation(new Pattern('e$'), 'a'); // case → casa
yield new Transformation(new Pattern('i$'), 'o'); // libri → libro
yield new Transformation(new Pattern('i$'), 'e'); // studenti → studente
}

/** @return iterable<Transformation> */
public static function getPlural(): iterable
{
// Words ending in -scia without stress on 'i' become -sce (e.g. fascia → fasce)
yield new Transformation(new Pattern('([aeiou])scia$'), '\\1sce');

// Words ending in -cia/gia with stress on 'i' keep the 'i' in plural
yield new Transformation(new Pattern('cia$'), 'cie'); // e.g. farmacia → farmacie
yield new Transformation(new Pattern('gia$'), 'gie'); // e.g. bugia → bugie

// Words ending in -cia/gia without stress on 'i' lose the 'i' in plural
yield new Transformation(new Pattern('([^aeiou])cia$'), '\\1ce'); // e.g. arancia → arance
yield new Transformation(new Pattern('([^aeiou])gia$'), '\\1ge'); // e.g. valigia → valige

// Words ending in -co/-go with stress on 'o' become -chi/-ghi
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])co$'), '\\1chi'); // e.g. baco → bachi
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])go$'), '\\1ghi'); // e.g. lago → laghi

// Words ending in -co/-go with stress on the penultimate syllable become -ci/-gi
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])co$'), '\\1ci'); // e.g. medico → medici
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])go$'), '\\1gi'); // e.g. psicologo → psicologi

// Words ending in -io with stress on 'i' keep the 'i' in plural
yield new Transformation(new Pattern('([^aeiou])io$'), '\\1i'); // e.g. zio → zii

// Words ending in -io with stress on 'o' lose the 'i' in plural
yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. negozio → negozi
// Advanced ending rules
yield new Transformation(new Pattern('scia$'), 'sce'); // fascia → fasce
yield new Transformation(new Pattern('scio$'), 'sci'); // fascio → fasci
yield new Transformation(new Pattern('co$'), 'chi'); // baco → bachi
yield new Transformation(new Pattern('ca$'), 'che'); // fotografica → fotografiche
yield new Transformation(new Pattern('go$'), 'ghi'); // lago → laghi
yield new Transformation(new Pattern('ga$'), 'ghe'); // targa → targhe
yield new Transformation(new Pattern('io$'), 'i'); // cimelio → cimeli

// Standard ending rules
yield new Transformation(new Pattern('a$'), 'e'); // -a-e
yield new Transformation(new Pattern('e$'), 'i'); // -e-i
yield new Transformation(new Pattern('o$'), 'i'); // -o-i
yield new Transformation(new Pattern('a$'), 'e'); // casacase
yield new Transformation(new Pattern('o$'), 'i'); // librolibri
yield new Transformation(new Pattern('e$'), 'i'); // studentestudenti
}

/** @return iterable<Substitution> */
Expand Down Expand Up @@ -118,6 +76,7 @@ public static function getIrregular(): iterable
'capitale' => 'capitali',
'carcere' => 'carceri',
'casa' => 'case',
'cassaforte' => 'casseforti',
'cavaliere' => 'cavalieri',
'centinaio' => 'centinaia',
'cerchio' => 'cerchia',
Expand All @@ -134,9 +93,11 @@ public static function getIrregular(): iterable
'dito' => 'dita',
'dottore' => 'dottori',
'fiore' => 'fiori',
'forte' => 'forti',
'fratello' => 'fratelli',
'fuoco' => 'fuochi',
'gamba' => 'gambe',
'giallo' => 'gialli',
'ginocchio' => 'ginocchia',
'gioco' => 'giochi',
'giornale' => 'giornali',
Expand Down Expand Up @@ -188,6 +149,7 @@ public static function getIrregular(): iterable
'scuola' => 'scuole',
'serie' => 'serie',
'serramento' => 'serramenta',
'sistema' => 'sistemi',
'sorella' => 'sorelle',
'specie' => 'specie',
'staio' => 'staia',
Expand All @@ -198,6 +160,7 @@ public static function getIrregular(): iterable
'suo' => 'suoi',
'superficie' => 'superfici',
'tavolo' => 'tavoli',
'tema' => 'temi',
'tempio' => 'templi',
'treno' => 'treni',
'tuo' => 'tuoi',
Expand Down
4 changes: 4 additions & 0 deletions src/Rules/Italian/InflectorFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@

use Doctrine\Inflector\GenericLanguageInflectorFactory;
use Doctrine\Inflector\Rules\Ruleset;
use Doctrine\Inflector\WordInflector;

final class InflectorFactory extends GenericLanguageInflectorFactory
{
/** @var class-string<WordInflector>|null */
protected $rulesetInflector = RulesetInflector::class;

protected function getSingularRuleset(): Ruleset
{
return Rules::getSingularRuleset();
Expand Down
46 changes: 46 additions & 0 deletions src/Rules/Italian/RulesetInflector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

declare(strict_types=1);

namespace Doctrine\Inflector\Rules\Italian;

use Doctrine\Inflector\MultiWordInflector;

use function implode;
use function preg_split;
use function strpos;

use const PREG_SPLIT_DELIM_CAPTURE;
use const PREG_SPLIT_NO_EMPTY;

class RulesetInflector extends \Doctrine\Inflector\RulesetInflector
{
public function inflect(string $word): string
{
// If it's a single word without spaces or hyphens, use the original inflector
if (strpos($word, ' ') === false && strpos($word, '-') === false) {
return parent::inflect($word);
}

// Split the phrase into words and process each one
$regex = '/([' . implode('', MultiWordInflector::WORD_SEPARATORS) . '])/';
$words = preg_split($regex, $word, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);

if ($words === false) {
return parent::inflect($word);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this fails, it feels like you should rather throw an exception.

https://3v4l.org/evCNt#vnull

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’d say that as a fallback it’s right for it to handle the string as if it were a single word, so I would leave it like that without raising exceptions, because in Italian there are often compound or multiple words, so if there were any unmanaged cases exceptions would be raised, whereas this way it would still be handled.

}

$result = [];
foreach ($words as $part) {
if ($part === ' ' || $part === '-') {
$result[] = $part;
continue;
}

// Process each word individually
$result[] = parent::inflect($part);
}

return implode('', $result);
}
}
5 changes: 3 additions & 2 deletions src/Rules/Italian/Uninflected.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ private static function getDefault(): iterable
'crisi',
'crocevia',
'dopocena',
'eta',
'film',
'foto',
'foto',
'fuchsia',
'fuchsia',
'gnu',
'gorilla',
Expand Down Expand Up @@ -69,8 +72,6 @@ private static function getDefault(): iterable
'virtù',
'virus',
'yogurt',
'foto',
'fuchsia',
];

foreach ($invariables as $word) {
Expand Down
Loading
Loading