Skip to content

Commit 2e23870

Browse files
author
Federico Liva
committed
Simplification of Italian inflection rules
1 parent 0138449 commit 2e23870

File tree

5 files changed

+193
-76
lines changed

5 files changed

+193
-76
lines changed

src/MultiWordInflector.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616
class MultiWordInflector implements WordInflector
1717
{
18-
private const WORD_SEPARATORS = [' ', '-'];
18+
public const WORD_SEPARATORS = [' ', '-'];
1919

2020
/** @var WordInflector */
2121
private $wordInflector;

src/Rules/Italian/Inflectible.php

Lines changed: 29 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -14,83 +14,41 @@ class Inflectible
1414
/** @return iterable<Transformation> */
1515
public static function getSingular(): iterable
1616
{
17-
// Reverse of -sce → -scia (fasce → fascia)
18-
yield new Transformation(new Pattern('([aeiou])sce$'), '\\1scia');
17+
// Advanced ending rules
18+
yield new Transformation(new Pattern('sce'), 'scia'); // fasce → fascia
19+
yield new Transformation(new Pattern('sci$'), 'scio'); // fasci → fascio
20+
yield new Transformation(new Pattern('chi$'), 'co'); // bachi → baco
21+
yield new Transformation(new Pattern('che$'), 'ca'); // fotografiche → fotografica
22+
yield new Transformation(new Pattern('ghi$'), 'go'); // laghi → lago
23+
yield new Transformation(new Pattern('ghe$'), 'ga'); // targhe → targa
24+
yield new Transformation(new Pattern('esi$'), 'ese'); // paesi → paese
25+
yield new Transformation(new Pattern('ali$'), 'ale'); // ministeriali → ministeriale
26+
yield new Transformation(new Pattern('ari$'), 'ario'); // questionari → questionario
27+
yield new Transformation(new Pattern('eri$'), 'ero'); // numeri → numero
28+
yield new Transformation(new Pattern('li$'), 'lio'); // cimeli → cimelio
1929

20-
// Reverse of -cie → -cia (farmacia → farmacie)
21-
yield new Transformation(new Pattern('cie$'), 'cia');
22-
23-
// Reverse of -gie → -gia (bugia → bugie)
24-
yield new Transformation(new Pattern('gie$'), 'gia');
25-
26-
// Reverse of -ce → -cia (arance → arancia)
27-
yield new Transformation(new Pattern('([aeiou])ce$'), '\1cia');
28-
29-
// Reverse of -ge → -gia (valige → valigia)
30-
yield new Transformation(new Pattern('([aeiou])ge$'), '\1gia');
31-
32-
// Reverse of -chi → -co (bachi → baco)
33-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])chi$'), '\1co');
34-
35-
// Reverse of -chi → -co (fotografiche → fotografica)
36-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])che$'), '\1ca');
37-
38-
// Reverse of -ghi → -go (laghi → lago)
39-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ghi$'), '\1go');
40-
41-
// Reverse of -ci → -co (medici → medico)
42-
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])ci$'), '\1o');
43-
44-
// Reverse of -i → -io (zii → zio, negozi → negozio)
45-
// This is more complex due to Italian's stress patterns, but we'll handle the basic case
46-
yield new Transformation(new Pattern('([aeiou])i$'), '\1io');
47-
48-
// Handle words that end with -i but should go to -co/-go (amici → amico, not amice)
49-
yield new Transformation(new Pattern('([cgmrt])i$'), '\1o');
50-
51-
// Reverse of -a → -e
52-
yield new Transformation(new Pattern('e$'), 'a');
53-
54-
// Reverse of -e → -i
55-
yield new Transformation(new Pattern('i$'), 'e');
56-
57-
// Reverse of -o → -i
58-
yield new Transformation(new Pattern('i$'), 'o');
30+
// Standard ending rules
31+
yield new Transformation(new Pattern('e$'), 'a'); // case → casa
32+
yield new Transformation(new Pattern('i$'), 'o'); // libri → libro
33+
yield new Transformation(new Pattern('i$'), 'e'); // studenti → studente
5934
}
6035

6136
/** @return iterable<Transformation> */
6237
public static function getPlural(): iterable
6338
{
64-
// Words ending in -scia without stress on 'i' become -sce (e.g. fascia → fasce)
65-
yield new Transformation(new Pattern('([aeiou])scia$'), '\\1sce');
66-
67-
// Words ending in -cia/gia with stress on 'i' keep the 'i' in plural
68-
yield new Transformation(new Pattern('cia$'), 'cie'); // e.g. farmacia → farmacie
69-
yield new Transformation(new Pattern('gia$'), 'gie'); // e.g. bugia → bugie
70-
71-
// Words ending in -cia/gia without stress on 'i' lose the 'i' in plural
72-
yield new Transformation(new Pattern('([aeiou])cia$'), '\\1ce'); // e.g. arancia → arance
73-
yield new Transformation(new Pattern('([aeiou])gia$'), '\\1ge'); // e.g. valigia → valige
74-
75-
// Words ending in -co/-go with stress on 'o' become -chi/-ghi
76-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])co$'), '\\1chi'); // e.g. baco → bachi
77-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])ca$'), '\\1che'); // e.g. fotografica → fotografiche
78-
yield new Transformation(new Pattern('([bcdfghjklmnpqrstvwxyz][aeiou])go$'), '\\1ghi'); // e.g. lago → laghi
79-
80-
// Words ending in -co/-go with stress on the penultimate syllable become -ci/-gi
81-
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])co$'), '\\1ci'); // e.g. medico → medici
82-
yield new Transformation(new Pattern('([aeiou][bcdfghjklmnpqrstvwxyz])go$'), '\\1gi'); // e.g. psicologo → psicologi
83-
84-
// Words ending in -io with stress on 'i' keep the 'i' in plural
85-
yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. zio → zii
86-
87-
// Words ending in -io with stress on 'o' lose the 'i' in plural
88-
yield new Transformation(new Pattern('([aeiou])io$'), '\\1i'); // e.g. negozio → negozi
39+
// Advanced ending rules
40+
yield new Transformation(new Pattern('scia$'), 'sce'); // fascia → fasce
41+
yield new Transformation(new Pattern('scio$'), 'sci'); // fascio → fasci
42+
yield new Transformation(new Pattern('co$'), 'chi'); // baco → bachi
43+
yield new Transformation(new Pattern('ca$'), 'che'); // fotografica → fotografiche
44+
yield new Transformation(new Pattern('go$'), 'ghi'); // lago → laghi
45+
yield new Transformation(new Pattern('ga$'), 'ghe'); // targa → targhe
46+
yield new Transformation(new Pattern('io$'), 'i'); // cimelio → cimeli
8947

9048
// Standard ending rules
91-
yield new Transformation(new Pattern('a$'), 'e'); // -a-e
92-
yield new Transformation(new Pattern('e$'), 'i'); // -e-i
93-
yield new Transformation(new Pattern('o$'), 'i'); // -o-i
49+
yield new Transformation(new Pattern('a$'), 'e'); // casacase
50+
yield new Transformation(new Pattern('o$'), 'i'); // librolibri
51+
yield new Transformation(new Pattern('e$'), 'i'); // studentestudenti
9452
}
9553

9654
/** @return iterable<Substitution> */
@@ -118,6 +76,7 @@ public static function getIrregular(): iterable
11876
'capitale' => 'capitali',
11977
'carcere' => 'carceri',
12078
'casa' => 'case',
79+
'cassaforte' => 'casseforti',
12180
'cavaliere' => 'cavalieri',
12281
'centinaio' => 'centinaia',
12382
'cerchio' => 'cerchia',
@@ -134,6 +93,7 @@ public static function getIrregular(): iterable
13493
'dito' => 'dita',
13594
'dottore' => 'dottori',
13695
'fiore' => 'fiori',
96+
'forte' => 'forti',
13797
'fratello' => 'fratelli',
13898
'fuoco' => 'fuochi',
13999
'gamba' => 'gambe',

src/Rules/Italian/RulesetInflector.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
namespace Doctrine\Inflector\Rules\Italian;
66

7+
use Doctrine\Inflector\MultiWordInflector;
8+
79
use function implode;
810
use function preg_split;
911
use function strpos;
@@ -21,7 +23,9 @@ public function inflect(string $word): string
2123
}
2224

2325
// Split the phrase into words and process each one
24-
$words = preg_split('/([ -])/', $word, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
26+
$regex = '/([' . implode('', MultiWordInflector::WORD_SEPARATORS) . '])/';
27+
$words = preg_split($regex, $word, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
28+
2529
if ($words === false) {
2630
return parent::inflect($word);
2731
}

tests/MultiWordInflectorTest.php

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\Tests\Inflector;
6+
7+
use Doctrine\Inflector\MultiWordInflector;
8+
use Doctrine\Inflector\WordInflector;
9+
use PHPUnit\Framework\MockObject\MockObject;
10+
use PHPUnit\Framework\TestCase;
11+
12+
class MultiWordInflectorTest extends TestCase
13+
{
14+
/** @var WordInflector|MockObject */
15+
private $wordInflector;
16+
17+
/** @var MultiWordInflector */
18+
private $multiWordInflector;
19+
20+
protected function setUp(): void
21+
{
22+
$this->wordInflector = $this->createMock(WordInflector::class);
23+
$this->multiWordInflector = new MultiWordInflector($this->wordInflector);
24+
}
25+
26+
public function testInflectSingleWordDelegatesToWordInflector(): void
27+
{
28+
$this->wordInflector->expects(self::once())
29+
->method('inflect')
30+
->with('casa')
31+
->willReturn('case');
32+
33+
$result = $this->multiWordInflector->inflect('casa');
34+
self::assertSame('case', $result);
35+
}
36+
37+
public function testInflectMultiWordItalianPhrases(): void
38+
{
39+
$this->wordInflector->expects(self::any())
40+
->method('inflect')
41+
->willReturnMap([
42+
['carta', 'carte'],
43+
['credito', 'crediti'],
44+
['libro', 'libri'],
45+
['giallo', 'gialli'],
46+
['chiave', 'chiavi'],
47+
['inglese', 'inglesi'],
48+
['stazione', 'stazioni'],
49+
['ferroviaria', 'ferroviarie'],
50+
['macchina', 'macchine'],
51+
['fotografica', 'fotografiche'],
52+
['di', 'di'],
53+
['geografica', 'geografiche'],
54+
]);
55+
56+
$testCases = [
57+
'carta di credito' => 'carte di crediti',
58+
'libro giallo' => 'libri gialli',
59+
'chiave inglese' => 'chiavi inglesi',
60+
'stazione ferroviaria' => 'stazioni ferroviarie',
61+
'macchina fotografica' => 'macchine fotografiche',
62+
'carta geografica' => 'carte geografiche',
63+
];
64+
65+
foreach ($testCases as $singular => $expectedPlural) {
66+
$result = $this->multiWordInflector->inflect($singular);
67+
self::assertSame($expectedPlural, $result);
68+
}
69+
}
70+
71+
public function testInflectWithHyphenatedItalianWords(): void
72+
{
73+
$this->wordInflector->expects(self::any())
74+
->method('inflect')
75+
->willReturnMap([
76+
['primo', 'primi'],
77+
['piano', 'piani'],
78+
['capo', 'capi'],
79+
['stazione', 'stazioni'],
80+
['cassaforte', 'casseforti'],
81+
['forte', 'forti'],
82+
]);
83+
84+
$testCases = [
85+
'primo-piano' => 'primi-piani',
86+
'cassaforte-forte' => 'casseforti-forti',
87+
];
88+
89+
foreach ($testCases as $singular => $expectedPlural) {
90+
$result = $this->multiWordInflector->inflect($singular);
91+
self::assertSame($expectedPlural, $result);
92+
}
93+
}
94+
95+
public function testInflectWithPrepositionsAndArticles(): void
96+
{
97+
$this->wordInflector->expects(self::any())
98+
->method('inflect')
99+
->willReturnMap([
100+
['sistema', 'sistemi'],
101+
['operativo', 'operativi'],
102+
['carta', 'carte'],
103+
['prepagata', 'prepagate'],
104+
['aziendale', 'aziendali'],
105+
['virtuale', 'virtuali'],
106+
['ricaricabile', 'ricaricabili'],
107+
]);
108+
109+
$testCases = [
110+
'sistema operativo' => 'sistemi operativi',
111+
'carta prepagata' => 'carte prepagate',
112+
'carta aziendale' => 'carte aziendali',
113+
'carta virtuale' => 'carte virtuali',
114+
'carta ricaricabile' => 'carte ricaricabili',
115+
];
116+
117+
foreach ($testCases as $singular => $expectedPlural) {
118+
$result = $this->multiWordInflector->inflect($singular);
119+
self::assertSame($expectedPlural, $result);
120+
}
121+
}
122+
123+
public function testInflectWithSpecialCharacters(): void
124+
{
125+
$this->wordInflector->expects(self::any())
126+
->method('inflect')
127+
->willReturnMap([
128+
['caffè', 'caffè'],
129+
['ristretto', 'ristretti'],
130+
['', ''],
131+
['caldo', 'caldi'],
132+
['menù', 'menù'],
133+
['del', 'del'],
134+
['giorno', 'giorni'],
135+
]);
136+
137+
$testCases = [
138+
'caffè ristretto' => 'caffè ristretti',
139+
'tè caldo' => 'tè caldi',
140+
'menù del giorno' => 'menù del giorni',
141+
];
142+
143+
foreach ($testCases as $singular => $expectedPlural) {
144+
$result = $this->multiWordInflector->inflect($singular);
145+
self::assertSame($expectedPlural, $result);
146+
}
147+
}
148+
}

tests/Rules/Italian/ItalianFunctionalTest.php

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ public function dataSampleWords(): array
5252
// Multi-word phrases
5353
['garanzia generale', 'garanzie generali'],
5454
['ramo ministeriale', 'rami ministeriali'],
55+
['questionario', 'questionari'],
56+
['cimelio', 'cimeli'],
57+
['fascia', 'fasce'],
58+
['fascio', 'fasci'],
59+
['targa', 'targhe'],
5560
// ['carta di credito', 'carte di credito'],
5661
['libro giallo', 'libri gialli'],
5762
['chiave inglese', 'chiavi inglesi'],
@@ -60,17 +65,17 @@ public function dataSampleWords(): array
6065
['carta geografica', 'carte geografiche'],
6166
['macchina fotografica', 'macchine fotografiche'],
6267
// ['carta di credito prepagata', 'carte di credito prepagate'],
63-
// ['sistema operativo', 'sistemi operativi'],
68+
['sistema operativo', 'sistemi operativi'],
6469
// ['carta di credito aziendale', 'carte di credito aziendali'],
6570
// ['libro di testo', 'libri di testo'],
6671
// ['carta di credito virtuale', 'carte di credito virtuali'],
6772
// ['carta di credito ricaricabile', 'carte di credito ricaricabili'],
6873
//
6974
// // Hyphenated words
70-
// ['primo-piano', 'primi-piani'],
71-
// ['capo-stazione', 'capi-stazione'],
72-
// ['cassaforte-forte', 'casseforti-forti'],
73-
//
75+
['primo-piano', 'primi-piani'],
76+
['capo-stazione', 'capi-stazioni'],
77+
['cassaforte-forte', 'casseforti-forti'],
78+
7479
// // Mixed separators
7580
// ['carta di credito prepagata', 'carte di credito prepagate'],
7681
// ['libro di testo', 'libri di testo'],

0 commit comments

Comments
 (0)