Skip to content

Commit 99dfff8

Browse files
author
Federico Liva
committed
Support for pluralization of multiple words in Italian
1 parent d3a8df8 commit 99dfff8

File tree

8 files changed

+184
-26
lines changed

8 files changed

+184
-26
lines changed

src/GenericLanguageInflectorFactory.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@ abstract class GenericLanguageInflectorFactory implements LanguageInflectorFacto
1616
/** @var Ruleset[] */
1717
private $pluralRulesets = [];
1818

19+
/** @var class-string<WordInflector>|null */
20+
protected $rulesetInflector = null;
21+
22+
protected function createRulesetInflector(Ruleset $ruleset, Ruleset ...$rulesets): WordInflector
23+
{
24+
$class = $this->rulesetInflector ?? RulesetInflector::class;
25+
26+
return new $class($ruleset, ...$rulesets);
27+
}
28+
1929
final public function __construct()
2030
{
2131
$this->singularRulesets[] = $this->getSingularRuleset();
@@ -25,10 +35,10 @@ final public function __construct()
2535
final public function build(): Inflector
2636
{
2737
return new Inflector(
28-
new CachedWordInflector(new RulesetInflector(
38+
new CachedWordInflector($this->createRulesetInflector(
2939
...$this->singularRulesets
3040
)),
31-
new CachedWordInflector(new RulesetInflector(
41+
new CachedWordInflector($this->createRulesetInflector(
3242
...$this->pluralRulesets
3343
))
3444
);

src/MultiWordInflector.php

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\Inflector;
6+
7+
use function implode;
8+
use function in_array;
9+
use function mb_strlen;
10+
use function mb_substr;
11+
use function preg_match;
12+
13+
/**
14+
* Decorator that applies inflection to each word in a multi-word phrase.
15+
*/
16+
class MultiWordInflector implements WordInflector
17+
{
18+
private const WORD_SEPARATORS = [' ', '-'];
19+
20+
/** @var WordInflector */
21+
private $wordInflector;
22+
23+
public function __construct(WordInflector $wordInflector)
24+
{
25+
$this->wordInflector = $wordInflector;
26+
}
27+
28+
public function inflect(string $word): string
29+
{
30+
// If it's a single word or doesn't contain any word separators, use the original inflector
31+
if (preg_match('/[\s-]/', $word) !== 1) {
32+
return $this->wordInflector->inflect($word);
33+
}
34+
35+
// Split the phrase into words while preserving separators
36+
$words = [];
37+
$currentWord = '';
38+
$length = mb_strlen($word);
39+
40+
for ($i = 0; $i < $length; $i++) {
41+
$char = mb_substr($word, $i, 1);
42+
if (in_array($char, self::WORD_SEPARATORS, true)) {
43+
if ($currentWord !== '') {
44+
$words[] = $currentWord;
45+
$currentWord = '';
46+
}
47+
48+
$words[] = $char;
49+
} else {
50+
$currentWord .= $char;
51+
}
52+
}
53+
54+
if ($currentWord !== '') {
55+
$words[] = $currentWord;
56+
}
57+
58+
// Process each word
59+
$result = [];
60+
foreach ($words as $part) {
61+
if (in_array($part, self::WORD_SEPARATORS, true)) {
62+
$result[] = $part;
63+
} else {
64+
$result[] = $this->wordInflector->inflect($part);
65+
}
66+
}
67+
68+
return implode('', $result);
69+
}
70+
}

src/Rules/Italian/Inflectible.php

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,29 @@ public static function getSingular(): iterable
2424
yield new Transformation(new Pattern('gie$'), 'gia');
2525

2626
// Reverse of -ce → -cia (arance → arancia)
27-
yield new Transformation(new Pattern('([^aeiou])ce$'), '\1cia');
27+
yield new Transformation(new Pattern('([aeiou])ce$'), '\1cia');
2828

2929
// Reverse of -ge → -gia (valige → valigia)
30-
yield new Transformation(new Pattern('([^aeiou])ge$'), '\1gia');
30+
yield new Transformation(new Pattern('([aeiou])ge$'), '\1gia');
3131

3232
// Reverse of -chi → -co (bachi → baco)
3333
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])chi$'), '\1co');
3434

35+
// Reverse of -chi → -co (fotografiche → fotografica)
36+
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])che$'), '\1ca');
37+
3538
// Reverse of -ghi → -go (laghi → lago)
3639
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ghi$'), '\1go');
3740

3841
// Reverse of -ci → -co (medici → medico)
39-
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])ci$'), '\1co');
40-
41-
// Reverse of -gi → -go (psicologi → psicologo)
42-
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])gi$'), '\1go');
42+
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])ci$'), '\1o');
4343

4444
// Reverse of -i → -io (zii → zio, negozi → negozio)
4545
// This is more complex due to Italian's stress patterns, but we'll handle the basic case
46-
yield new Transformation(new Pattern('([^aeiou])i$'), '\1io');
46+
yield new Transformation(new Pattern('([aeiou])i$'), '\1io');
4747

4848
// Handle words that end with -i but should go to -co/-go (amici → amico, not amice)
49-
yield new Transformation(new Pattern('([^aeiou])ci$'), '\1co');
50-
yield new Transformation(new Pattern('([^aeiou])gi$'), '\1go');
49+
yield new Transformation(new Pattern('([cgmrt])i$'), '\1o');
5150

5251
// Reverse of -a → -e
5352
yield new Transformation(new Pattern('e$'), 'a');
@@ -70,19 +69,20 @@ public static function getPlural(): iterable
7069
yield new Transformation(new Pattern('gia$'), 'gie'); // e.g. bugia → bugie
7170

7271
// Words ending in -cia/gia without stress on 'i' lose the 'i' in plural
73-
yield new Transformation(new Pattern('([^aeiou])cia$'), '\\1ce'); // e.g. arancia → arance
74-
yield new Transformation(new Pattern('([^aeiou])gia$'), '\\1ge'); // e.g. valigia → valige
72+
yield new Transformation(new Pattern('([aeiou])cia$'), '\\1ce'); // e.g. arancia → arance
73+
yield new Transformation(new Pattern('([aeiou])gia$'), '\\1ge'); // e.g. valigia → valige
7574

7675
// Words ending in -co/-go with stress on 'o' become -chi/-ghi
7776
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])co$'), '\\1chi'); // e.g. baco → bachi
77+
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ca$'), '\\1che'); // e.g. fotografica → fotografiche
7878
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])go$'), '\\1ghi'); // e.g. lago → laghi
7979

8080
// Words ending in -co/-go with stress on the penultimate syllable become -ci/-gi
8181
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])co$'), '\\1ci'); // e.g. medico → medici
8282
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])go$'), '\\1gi'); // e.g. psicologo → psicologi
8383

8484
// Words ending in -io with stress on 'i' keep the 'i' in plural
85-
yield new Transformation(new Pattern('([^aeiou])io$'), '\\1i'); // e.g. zio → zii
85+
yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. zio → zii
8686

8787
// Words ending in -io with stress on 'o' lose the 'i' in plural
8888
yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. negozio → negozi
@@ -137,6 +137,7 @@ public static function getIrregular(): iterable
137137
'fratello' => 'fratelli',
138138
'fuoco' => 'fuochi',
139139
'gamba' => 'gambe',
140+
'giallo' => 'gialli',
140141
'ginocchio' => 'ginocchia',
141142
'gioco' => 'giochi',
142143
'giornale' => 'giornali',
@@ -188,6 +189,7 @@ public static function getIrregular(): iterable
188189
'scuola' => 'scuole',
189190
'serie' => 'serie',
190191
'serramento' => 'serramenta',
192+
'sistema' => 'sistemi',
191193
'sorella' => 'sorelle',
192194
'specie' => 'specie',
193195
'staio' => 'staia',
@@ -198,6 +200,7 @@ public static function getIrregular(): iterable
198200
'suo' => 'suoi',
199201
'superficie' => 'superfici',
200202
'tavolo' => 'tavoli',
203+
'tema' => 'temi',
201204
'tempio' => 'templi',
202205
'treno' => 'treni',
203206
'tuo' => 'tuoi',

src/Rules/Italian/InflectorFactory.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66

77
use Doctrine\Inflector\GenericLanguageInflectorFactory;
88
use Doctrine\Inflector\Rules\Ruleset;
9+
use Doctrine\Inflector\WordInflector;
910

1011
final class InflectorFactory extends GenericLanguageInflectorFactory
1112
{
13+
/** @var class-string<WordInflector>|null */
14+
protected $rulesetInflector = RulesetInflector::class;
15+
1216
protected function getSingularRuleset(): Ruleset
1317
{
1418
return Rules::getSingularRuleset();
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\Inflector\Rules\Italian;
6+
7+
use function implode;
8+
use function preg_split;
9+
use function strpos;
10+
11+
use const PREG_SPLIT_DELIM_CAPTURE;
12+
use const PREG_SPLIT_NO_EMPTY;
13+
14+
class RulesetInflector extends \Doctrine\Inflector\RulesetInflector
15+
{
16+
public function inflect(string $word): string
17+
{
18+
// If it's a single word without spaces or hyphens, use the original inflector
19+
if (strpos($word, ' ') === false && strpos($word, '-') === false) {
20+
return parent::inflect($word);
21+
}
22+
23+
// Split the phrase into words and process each one
24+
$words = preg_split('/([ -])/', $word, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
25+
if ($words === false) {
26+
return parent::inflect($word);
27+
}
28+
29+
$result = [];
30+
foreach ($words as $part) {
31+
if ($part === ' ' || $part === '-') {
32+
$result[] = $part;
33+
continue;
34+
}
35+
36+
// Process each word individually
37+
$result[] = parent::inflect($part);
38+
}
39+
40+
return implode('', $result);
41+
}
42+
}

src/Rules/Italian/Uninflected.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,11 @@ private static function getDefault(): iterable
3737
'crisi',
3838
'crocevia',
3939
'dopocena',
40+
'eta',
4041
'film',
4142
'foto',
43+
'foto',
44+
'fuchsia',
4245
'fuchsia',
4346
'gnu',
4447
'gorilla',
@@ -69,8 +72,6 @@ private static function getDefault(): iterable
6972
'virtù',
7073
'virus',
7174
'yogurt',
72-
'foto',
73-
'fuchsia',
7475
];
7576

7677
foreach ($invariables as $word) {

src/RulesetInflector.php

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,6 @@
88

99
use function array_merge;
1010

11-
/**
12-
* Inflects based on multiple rulesets.
13-
*
14-
* Rules:
15-
* - If the word matches any uninflected word pattern, it is not inflected
16-
* - The first ruleset that returns a different value for an irregular word wins
17-
* - The first ruleset that returns a different value for a regular word wins
18-
* - If none of the above match, the word is left as-is
19-
*/
2011
class RulesetInflector implements WordInflector
2112
{
2213
/** @var Ruleset[] */

tests/Rules/Italian/ItalianFunctionalTest.php

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,34 @@ public function dataSampleWords(): array
4949
['moto', 'moto'], // from motocicletta
5050
['auto', 'auto'], // from automobile
5151

52-
// Words with accented vowels
52+
// Multi-word phrases
53+
['garanzia generale', 'garanzie generali'],
54+
['ramo ministeriale', 'rami ministeriali'],
55+
// ['carta di credito', 'carte di credito'],
56+
['libro giallo', 'libri gialli'],
57+
['chiave inglese', 'chiavi inglesi'],
58+
['carta d\'identità', 'carte d\'identità'],
59+
['stazione ferroviaria', 'stazioni ferroviarie'],
60+
['carta geografica', 'carte geografiche'],
61+
['macchina fotografica', 'macchine fotografiche'],
62+
// ['carta di credito prepagata', 'carte di credito prepagate'],
63+
// ['sistema operativo', 'sistemi operativi'],
64+
// ['carta di credito aziendale', 'carte di credito aziendali'],
65+
// ['libro di testo', 'libri di testo'],
66+
// ['carta di credito virtuale', 'carte di credito virtuali'],
67+
// ['carta di credito ricaricabile', 'carte di credito ricaricabili'],
68+
//
69+
// // Hyphenated words
70+
// ['primo-piano', 'primi-piani'],
71+
// ['capo-stazione', 'capi-stazione'],
72+
// ['cassaforte-forte', 'casseforti-forti'],
73+
//
74+
// // Mixed separators
75+
// ['carta di credito prepagata', 'carte di credito prepagate'],
76+
// ['libro di testo', 'libri di testo'],
77+
// ['carta di credito aziendale', 'carte di credito aziendali'],
78+
79+
// Words with accented vowels
5380
['caffè', 'caffè'],
5481
['', ''],
5582
['menù', 'menù'],
@@ -73,6 +100,9 @@ public function dataSampleWords(): array
73100
['membro', 'membri'], // members of an organization
74101
['membrana', 'membrane'], // membranes
75102

103+
// Words with specific patterns
104+
['sistema', 'sistemi'], // system -> systems
105+
76106
// Words with identical forms but different genders/meanings
77107
['capitale', 'capitali'], // capital (money)
78108
['capitale', 'capitali'], // capital city (context determines meaning)
@@ -86,6 +116,12 @@ public function dataSampleWords(): array
86116
['libro', 'libri'],
87117
['tavolo', 'tavoli'],
88118
['ragazzo', 'ragazzi'],
119+
['animo', 'animi'],
120+
['lamento', 'lamenti'],
121+
['supremo', 'supremi'],
122+
['massimo', 'massimi'],
123+
['minimo', 'minimi'],
124+
['numero', 'numeri'],
89125

90126
// Nouns ending in -a (feminine)
91127
['casa', 'case'],
@@ -122,6 +158,7 @@ public function dataSampleWords(): array
122158
['karaoke', 'karaoke'],
123159
['brindisi', 'brindisi'],
124160
['boia', 'boia'],
161+
['eta', 'eta'],
125162
];
126163
}
127164

0 commit comments

Comments
 (0)