-
-
Notifications
You must be signed in to change notification settings - Fork 134
Improved Italian pluralization with multi-word inflection #279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 2.2.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Doctrine\Inflector; | ||
|
|
||
| use function implode; | ||
| use function in_array; | ||
| use function mb_strlen; | ||
| use function mb_substr; | ||
| use function preg_match; | ||
|
|
||
| /** | ||
| * Decorator that applies inflection to each word in a multi-word phrase. | ||
| */ | ||
| class MultiWordInflector implements WordInflector | ||
| { | ||
| public const WORD_SEPARATORS = [' ', '-']; | ||
|
|
||
| /** @var WordInflector */ | ||
| private $wordInflector; | ||
|
|
||
| public function __construct(WordInflector $wordInflector) | ||
| { | ||
| $this->wordInflector = $wordInflector; | ||
| } | ||
|
|
||
| public function inflect(string $word): string | ||
| { | ||
| // If it's a single word or doesn't contain any word separators, use the original inflector | ||
| if (preg_match('/[\s-]/', $word) !== 1) { | ||
| return $this->wordInflector->inflect($word); | ||
| } | ||
|
|
||
| // Split the phrase into words while preserving separators | ||
| $words = []; | ||
| $currentWord = ''; | ||
| $length = mb_strlen($word); | ||
|
|
||
| for ($i = 0; $i < $length; $i++) { | ||
| $char = mb_substr($word, $i, 1); | ||
| if (in_array($char, self::WORD_SEPARATORS, true)) { | ||
| if ($currentWord !== '') { | ||
| $words[] = $currentWord; | ||
| $currentWord = ''; | ||
| } | ||
|
|
||
| $words[] = $char; | ||
| } else { | ||
| $currentWord .= $char; | ||
| } | ||
| } | ||
|
|
||
| if ($currentWord !== '') { | ||
| $words[] = $currentWord; | ||
| } | ||
|
|
||
| // Process each word | ||
| $result = []; | ||
| foreach ($words as $part) { | ||
| if (in_array($part, self::WORD_SEPARATORS, true)) { | ||
| $result[] = $part; | ||
| } else { | ||
| $result[] = $this->wordInflector->inflect($part); | ||
| } | ||
| } | ||
|
|
||
| return implode('', $result); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Doctrine\Inflector\Rules\Italian; | ||
|
|
||
| use Doctrine\Inflector\MultiWordInflector; | ||
|
|
||
| use function implode; | ||
| use function preg_split; | ||
| use function strpos; | ||
|
|
||
| use const PREG_SPLIT_DELIM_CAPTURE; | ||
| use const PREG_SPLIT_NO_EMPTY; | ||
|
|
||
| class RulesetInflector extends \Doctrine\Inflector\RulesetInflector | ||
| { | ||
| public function inflect(string $word): string | ||
| { | ||
| // If it's a single word without spaces or hyphens, use the original inflector | ||
| if (strpos($word, ' ') === false && strpos($word, '-') === false) { | ||
| return parent::inflect($word); | ||
| } | ||
|
|
||
| // Split the phrase into words and process each one | ||
| $regex = '/([' . implode('', MultiWordInflector::WORD_SEPARATORS) . '])/'; | ||
| $words = preg_split($regex, $word, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); | ||
|
|
||
| if ($words === false) { | ||
| return parent::inflect($word); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this fails, it feels like you should rather throw an exception.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’d say that as a fallback it’s right for it to handle the string as if it were a single word, so I would leave it like that without raising exceptions, because in Italian there are often compound or multiple words, so if there were any unmanaged cases exceptions would be raised, whereas this way it would still be handled. |
||
| } | ||
|
|
||
| $result = []; | ||
| foreach ($words as $part) { | ||
| if ($part === ' ' || $part === '-') { | ||
| $result[] = $part; | ||
| continue; | ||
| } | ||
|
|
||
| // Process each word individually | ||
| $result[] = parent::inflect($part); | ||
| } | ||
|
|
||
| return implode('', $result); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This class should come with tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have implemented the tests for the MultiWordInflector class.