Skip to content

Commit

Permalink
Merge pull request #3 from rhasspy/synesthesiam-20241024-multiple-rul…
Browse files Browse the repository at this point in the history
…esets

Use all available rulesets for formatting purposes
  • Loading branch information
synesthesiam authored Oct 24, 2024
2 parents 559d20f + efcd3f3 commit f51a7f6
Show file tree
Hide file tree
Showing 13 changed files with 253 additions and 148 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## 2.0.0

- Change `format_number` to return `FormatResult` instead of a `str`
- Remove `RulesetName` enum and add `FormatPurpose` instead
- Add `purpose` to `format_number`, which selects all relevant rulesets
- Allow multiple ruleset names in `format_number` (prefer using `purpose`)
- Require an `RbnfEngine` to have a single language

## 1.3.0

- Remove soft hyphens by default (U+00AD)
Expand Down
40 changes: 35 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,49 @@ This lets you spell out numbers for a large number of locales:
from unicode_rbnf import RbnfEngine

engine = RbnfEngine.for_language("en")
assert engine.format_number(1234) == "one thousand two hundred thirty-four"
assert engine.format_number(1234).text == "one thousand two hundred thirty-four"
```

Depending on the locale, different rulesets are supported as well:
Different formatting purposes are supported as well, depending on the locale:

``` python
from unicode_rbnf import RbnfEngine, RulesetName
from unicode_rbnf import RbnfEngine, FormatPurpose

engine = RbnfEngine.for_language("en")
assert engine.format_number(1999, RulesetName.YEAR) == "nineteen ninety-nine"
assert engine.format_number(11, RulesetName.ORDINAL) == "eleventh"
assert engine.format_number(1999, FormatPurpose.CARDINAL).text == "one thousand nine hundred ninety-nine"
assert engine.format_number(1999, FormatPurpose.YEAR).text == "nineteen ninety-nine"
assert engine.format_number(11, FormatPurpose.ORDINAL).text == "eleventh"
```

For locales with multiple genders, cases, etc., the different texts are accessible in the result of `format_number`:

``` python
from unicode_rbnf import RbnfEngine

engine = RbnfEngine.for_language("de")
print(engine.format_number(1))
```

Result:

```
FormatResult(
text='eins',
text_by_ruleset={
'spellout-numbering': 'eins',
'spellout-cardinal-neuter': 'ein',
'spellout-cardinal-masculine': 'ein',
'spellout-cardinal-feminine': 'eine',
'spellout-cardinal-n': 'einen',
'spellout-cardinal-r': 'einer',
'spellout-cardinal-s': 'eines',
'spellout-cardinal-m': 'einem'
}
)
```

The `text` property of the result holds the text of the ruleset with the shortest name (least specific).

## Supported locales

See: https://github.com/unicode-org/cldr/tree/release-44/common/rbnf
Expand Down
15 changes: 13 additions & 2 deletions tests/test_de.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,16 @@

def test_german():
engine = RbnfEngine.for_language("de")
assert engine.format_number(13) == "dreizehn"
assert engine.format_number(32) == "zweiunddreißig"
assert engine.format_number(13).text == "dreizehn"
assert engine.format_number(32).text == "zweiunddreißig"

# All genders, cases
assert set(engine.format_number(1).text_by_ruleset.values()) == {
"ein",
"eins",
"eine",
"eines",
"einer",
"einem",
"einen",
}
68 changes: 32 additions & 36 deletions tests/test_en.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,48 @@
from unicode_rbnf import RbnfEngine, RulesetName
from unicode_rbnf import RbnfEngine, FormatPurpose


def test_english():
engine = RbnfEngine.for_language("en")

assert engine.format_number(7) == "seven"
assert engine.format_number(15) == "fifteen"
assert engine.format_number(42) == "forty-two"
assert engine.format_number(100) == "one hundred"
assert engine.format_number(143) == "one hundred forty-three"
assert engine.format_number(1000) == "one thousand"
assert engine.format_number(1234) == "one thousand two hundred thirty-four"
assert engine.format_number(3144) == "three thousand one hundred forty-four"
assert engine.format_number(10000) == "ten thousand"
assert engine.format_number(83145) == "eighty-three thousand one hundred forty-five"
assert engine.format_number(100000) == "one hundred thousand"
assert engine.format_number(7).text == "seven"
assert engine.format_number(15).text == "fifteen"
assert engine.format_number(42).text == "forty-two"
assert engine.format_number(100).text == "one hundred"
assert engine.format_number(143).text == "one hundred forty-three"
assert engine.format_number(1000).text == "one thousand"
assert engine.format_number(1234).text == "one thousand two hundred thirty-four"
assert engine.format_number(3144).text == "three thousand one hundred forty-four"
assert engine.format_number(10000).text == "ten thousand"
assert (
engine.format_number(683146)
engine.format_number(83145).text
== "eighty-three thousand one hundred forty-five"
)

assert engine.format_number(100000).text == "one hundred thousand"
assert (
engine.format_number(683146).text
== "six hundred eighty-three thousand one hundred forty-six"
)
assert engine.format_number(1000000) == "one million"
assert engine.format_number(10000000) == "ten million"
assert engine.format_number(100000000) == "one hundred million"
assert engine.format_number(1000000000) == "one billion"

assert engine.format_number(1000000).text == "one million"
assert engine.format_number(10000000).text == "ten million"
assert engine.format_number(100000000).text == "one hundred million"
assert engine.format_number(1000000000).text == "one billion"

# Special rules
assert engine.format_number(-1) == "minus one"
assert engine.format_number(float("nan")) == "not a number"
assert engine.format_number(float("inf")) == "infinite"
assert engine.format_number(-1).text == "minus one"
assert engine.format_number(float("nan")).text == "not a number"
assert engine.format_number(float("inf")).text == "infinite"

# Fractions
assert (
engine.format_number(3.14, ruleset_name=RulesetName.CARDINAL)
== "three point fourteen"
)
assert (
engine.format_number("5.3", ruleset_name=RulesetName.CARDINAL)
== "five point three"
)
assert engine.format_number(3.14).text == "three point fourteen"
assert engine.format_number("5.3").text == "five point three"

# Ordinals
assert engine.format_number(20, ruleset_name=RulesetName.ORDINAL) == "twentieth"
assert engine.format_number(30, ruleset_name=RulesetName.ORDINAL) == "thirtieth"
assert engine.format_number(99, ruleset_name=RulesetName.ORDINAL) == "ninety-ninth"
assert engine.format_number(11, ruleset_name=RulesetName.ORDINAL) == "eleventh"
assert engine.format_number(20, FormatPurpose.ORDINAL).text == "twentieth"
assert engine.format_number(30, FormatPurpose.ORDINAL).text == "thirtieth"
assert engine.format_number(99, FormatPurpose.ORDINAL).text == "ninety-ninth"
assert engine.format_number(11, FormatPurpose.ORDINAL).text == "eleventh"

# Years
assert (
engine.format_number(1999, ruleset_name=RulesetName.YEAR)
== "nineteen ninety-nine"
)
assert engine.format_number(1999, FormatPurpose.YEAR).text == "nineteen ninety-nine"
40 changes: 22 additions & 18 deletions tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
SubRulePart,
SubType,
RbnfEngine,
RulesetName,
DEFAULT_LANGUAGE,
FormatResult,
)


Expand Down Expand Up @@ -38,12 +37,12 @@ def test_parse_ruleset_name():


def test_find_rule():
engine = RbnfEngine()
engine.add_rule(2, "two;")
engine.add_rule(20, "twenty[-→→];")
engine.add_rule(100, "←← hundred[ →→];")
engine = RbnfEngine("en")
engine.add_rule(2, "two;", "spellout-numbering")
engine.add_rule(20, "twenty[-→→];", "spellout-numbering")
engine.add_rule(100, "←← hundred[ →→];", "spellout-numbering")

ruleset = engine.rulesets[DEFAULT_LANGUAGE][RulesetName.CARDINAL]
ruleset = engine.rulesets["spellout-numbering"]

rule_2 = ruleset.find_rule(2)
assert rule_2 is not None
Expand All @@ -59,18 +58,23 @@ def test_find_rule():


def test_format_number():
engine = RbnfEngine()
engine.add_rule(2, "two;")
engine.add_rule(20, "twenty[-→→];")
engine.add_rule(100, "←← hundred[ →→];")

assert engine.format_number(222) == "two hundred twenty-two"
engine = RbnfEngine("en")
engine.add_rule(2, "two;", "spellout-cardinal")
engine.add_rule(20, "twenty[-→→];", "spellout-cardinal")
engine.add_rule(100, "←← hundred[ →→];", "spellout-cardinal")

assert engine.format_number(222) == FormatResult(
text="two hundred twenty-two",
text_by_ruleset={"spellout-cardinal": "two hundred twenty-two"},
)


def test_zero_rules():
engine = RbnfEngine()
engine.add_rule(0, "abc=%ruleset_2=def;", ruleset_name="ruleset_1")
engine.add_rule(0, " efg=%ruleset_3= hij;", ruleset_name="ruleset_2")
engine.add_rule(1, "one;", ruleset_name="ruleset_3")
engine = RbnfEngine("en")
engine.add_rule(0, "abc=%ruleset_2=def;", "ruleset_1")
engine.add_rule(0, " efg=%ruleset_3= hij;", "ruleset_2")
engine.add_rule(1, "one;", "ruleset_3")

assert engine.format_number(1, ruleset_name="ruleset_1") == "abc efgone hijdef"
assert (
engine.format_number(1, ruleset_names=["ruleset_1"]).text == "abc efgone hijdef"
)
13 changes: 10 additions & 3 deletions tests/test_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@

def test_german():
engine = RbnfEngine.for_language("es")
assert engine.format_number(5) == "cinco"
assert engine.format_number(2) == "dos"
assert engine.format_number(5.2) == "cinco coma dos"
assert engine.format_number(5).text == "cinco"
assert engine.format_number(2).text == "dos"
assert engine.format_number(5.2).text == "cinco coma dos"

# All genders
assert set(engine.format_number(1).text_by_ruleset.values()) == {
"un",
"uno",
"una",
}
2 changes: 1 addition & 1 deletion tests/test_fi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

def test_finnish():
engine = RbnfEngine.for_language("fi")
assert engine.format_number(25) == "kaksikymmentäviisi"
assert engine.format_number(25).text == "kaksikymmentäviisi"
10 changes: 8 additions & 2 deletions tests/test_fr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from unicode_rbnf import RbnfEngine, RulesetName
from unicode_rbnf import RbnfEngine


def test_french():
engine = RbnfEngine.for_language("fr")
assert engine.format_number(88) == "quatre-vingt-huit"
assert engine.format_number(88).text == "quatre-vingt-huit"

# All genders
assert set(engine.format_number(1).text_by_ruleset.values()) == {
"un",
"une",
}
2 changes: 1 addition & 1 deletion tests/test_ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

def test_romanian():
engine = RbnfEngine.for_language("ro")
assert engine.format_number(-100) == "minus una sută"
assert engine.format_number(-100).text == "minus una sută"
2 changes: 1 addition & 1 deletion unicode_rbnf/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.0
2.0.0
6 changes: 4 additions & 2 deletions unicode_rbnf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from pathlib import Path

from .engine import RbnfEngine, RulesetName
from .engine import FormatOptions, FormatPurpose, FormatResult, RbnfEngine

_DIR = Path(__file__).parent

__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()

__all__ = [
"__version__",
"FormatOptions",
"FormatPurpose",
"FormatResult",
"RbnfEngine",
"RulesetName",
]
14 changes: 8 additions & 6 deletions unicode_rbnf/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse

from unicode_rbnf import RbnfEngine, RulesetName
from unicode_rbnf import FormatPurpose, RbnfEngine


def main() -> None:
Expand All @@ -12,17 +12,19 @@ def main() -> None:
help="Language code",
)
parser.add_argument(
"--rule",
choices=[v.value for v in RulesetName],
help="Ruleset name",
"--purpose",
choices=[v.value for v in FormatPurpose],
default=FormatPurpose.CARDINAL,
help="Format purpose",
)
parser.add_argument("number", nargs="+", help="Number(s) to turn into words")
args = parser.parse_args()

engine = RbnfEngine.for_language(args.language)
for number_str in args.number:
words = engine.format_number(number_str, ruleset_name=args.rule)
print(words)
result = engine.format_number(number_str, purpose=FormatPurpose(args.purpose))
for ruleset, words in result.text_by_ruleset.items():
print(number_str, ruleset, words, sep="|")


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit f51a7f6

Please sign in to comment.