Skip to content
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ fractional and ordinal numbers, and more.
| `pl` (Polish) | ✅ | 🚧 | ✅ | ✅ |
| `pt` (Portuguese) | ✅ | ✅ | ✅ | ✅ |
| `mwl` (Mirandese) | ✅ | ✅ | ✅ | ✅ |
| `ast` (Asturian) | ✅ | ✅ | ✅ | ✅ |
| `ru` (Russian) | ✅ | 🚧 | ✅ | ✅ |
| `sv` (Swedish) | ✅ | ✅ | ✅ | ❌ |
| `sl` (Slovenian) | ✅ | 🚧 | ❌ | ❌ |
Expand Down
116 changes: 76 additions & 40 deletions ovos_number_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Optional
from typing import Union

from unicode_rbnf import RbnfEngine, FormatPurpose

from ovos_number_parser.numbers_ast import AST
from ovos_number_parser.numbers_az import numbers_to_digits_az, extract_number_az, is_fractional_az, pronounce_number_az
from ovos_number_parser.numbers_ca import numbers_to_digits_ca, pronounce_number_ca, is_fractional_ca, extract_number_ca
from ovos_number_parser.numbers_cs import numbers_to_digits_cs, pronounce_number_cs, is_fractional_cs, extract_number_cs
Expand All @@ -18,13 +20,11 @@
from ovos_number_parser.numbers_gl import pronounce_number_gl, extract_number_gl, is_fractional_gl, numbers_to_digits_gl
from ovos_number_parser.numbers_hu import pronounce_number_hu, pronounce_ordinal_hu
from ovos_number_parser.numbers_it import (extract_number_it, pronounce_number_it, is_fractional_it)
from ovos_number_parser.numbers_mwl import MWL
from ovos_number_parser.numbers_nl import numbers_to_digits_nl, pronounce_number_nl, pronounce_ordinal_nl, \
extract_number_nl, is_fractional_nl
from ovos_number_parser.numbers_pl import numbers_to_digits_pl, pronounce_number_pl, extract_number_pl, is_fractional_pl
from ovos_number_parser.numbers_pt import PortugueseVariant, pronounce_fraction_pt, numbers_to_digits_pt, \
pronounce_number_pt, is_fractional_pt, extract_number_pt, pronounce_ordinal_pt, is_ordinal_pt
from ovos_number_parser.numbers_mwl import pronounce_fraction_mwl, numbers_to_digits_mwl, \
pronounce_number_mwl, is_fractional_mwl, extract_number_mwl, pronounce_ordinal_mwl, is_ordinal_mwl
from ovos_number_parser.numbers_pt import PortugueseVariant, PT_PT, PT_BR
from ovos_number_parser.numbers_ru import numbers_to_digits_ru, pronounce_number_ru, extract_number_ru, is_fractional_ru
from ovos_number_parser.numbers_sl import pronounce_number_sl
from ovos_number_parser.numbers_sv import pronounce_number_sv, pronounce_ordinal_sv, extract_number_sv, \
Expand All @@ -33,7 +33,7 @@
from ovos_number_parser.util import Scale, GrammaticalGender, DigitPronunciation


def numbers_to_digits(utterance: str, lang: str, scale: Scale = Scale.LONG) -> str:
def numbers_to_digits(utterance: str, lang: str, scale: Optional[Scale] = None) -> str:
"""
Convert written numbers in a text string to their digit representations for the specified language and numerical scale.

Expand All @@ -50,6 +50,8 @@ def numbers_to_digits(utterance: str, lang: str, scale: Scale = Scale.LONG) -> s
"""
if lang.startswith("az"):
return numbers_to_digits_az(utterance)
if lang.startswith("ast"):
return AST.numbers_to_digits(utterance)
if lang.startswith("ca"):
return numbers_to_digits_ca(utterance)
if lang.startswith("gl"):
Expand All @@ -69,9 +71,9 @@ def numbers_to_digits(utterance: str, lang: str, scale: Scale = Scale.LONG) -> s
if lang.startswith("pl"):
return numbers_to_digits_pl(utterance)
if lang.startswith("pt"):
return numbers_to_digits_pt(utterance, scale=scale)
return PT_PT.numbers_to_digits(utterance, scale=scale)
if lang.startswith("mwl"):
return numbers_to_digits_mwl(utterance, scale=scale)
return MWL.numbers_to_digits(utterance, scale=scale)
Comment on lines 71 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check all instances of language detection for Portuguese variants
rg -n 'def (numbers_to_digits|pronounce_number|.*_to_\w+)' ovos_number_parser/__init__.py | head -20

Repository: OpenVoiceOS/ovos-number-parser

Length of output: 222


🏁 Script executed:

#!/bin/bash
# Search for all Portuguese-related dispatch logic
rg -n '(PT_PT|"pt"|"br"|lang\.)' ovos_number_parser/__init__.py -B 2 -A 2

Repository: OpenVoiceOS/ovos-number-parser

Length of output: 11500


Fix Portuguese variant handling in numbers_to_digits, is_fractional, and is_ordinal.

The dispatch logic is inconsistent across functions. pronounce_number, pronounce_fraction, pronounce_ordinal, and extract_number all check "br" in lang.lower() to route Brazilian Portuguese separately, but numbers_to_digits (line 72), is_fractional (line 341), and is_ordinal (line 368) always route to PT_PT regardless of whether the language is pt-BR or similar. This means Brazilian Portuguese users receive Portugal-specific behavior for these three functions.

🤖 Prompt for AI Agents
In ovos_number_parser/__init__.py around lines 72 to 75, the Portuguese dispatch
always uses PT_PT causing pt-BR variants to get Portugal-specific behavior;
update the routing so that if "br" is in lang.lower() it returns
PT_BR.numbers_to_digits(utterance, scale=scale) otherwise return
PT_PT.numbers_to_digits(...). Apply the same pattern to is_fractional and
is_ordinal (check "br" in lang.lower() and route to PT_BR for Brazilian
Portuguese, PT_PT otherwise) to make the dispatch consistent with
pronounce_number/pronounce_fraction/pronounce_ordinal/extract_number.

if lang.startswith("ru"):
return numbers_to_digits_ru(utterance)
if lang.startswith("uk"):
Expand All @@ -81,10 +83,12 @@ def numbers_to_digits(utterance: str, lang: str, scale: Scale = Scale.LONG) -> s

def pronounce_number(number: Union[int, float], lang: str,
places: int = 3,
short_scale: bool = True,
scientific: bool = False, ordinals: bool = False,
short_scale: Optional[bool] = None, # DEPRECATED
scientific: bool = False,
ordinals: bool = False,
digits: DigitPronunciation = DigitPronunciation.FULL_NUMBER,
gender: GrammaticalGender = GrammaticalGender.MASCULINE) -> str:
gender: GrammaticalGender = GrammaticalGender.MASCULINE,
scale: Optional[Scale] = None) -> str:
"""
Return the spoken representation of a number in the specified language.

Expand All @@ -106,13 +110,19 @@ def pronounce_number(number: Union[int, float], lang: str,
Raises:
NotImplementedError: If the specified language is not supported.
"""
scale = Scale.SHORT if short_scale else Scale.LONG # TODO migrate function kwarg to accept Scale enum
scale = scale or Scale.SHORT
if short_scale is not None:
# TODO log warning
pass
short_scale = scale == Scale.SHORT
if lang.startswith("en"):
return pronounce_number_en(number, places, short_scale, scientific, ordinals)
if lang.startswith("az"):
return pronounce_number_az(number, places, short_scale, scientific, ordinals)
if lang.startswith("ca"):
return pronounce_number_ca(number, places)
if lang.startswith("ast"):
return AST.pronounce_number(number, places, scale, ordinals, digits, gender)
if lang.startswith("cs"):
return pronounce_number_en(number, places, short_scale, scientific, ordinals)
if lang.startswith("da"):
Expand All @@ -138,14 +148,11 @@ def pronounce_number(number: Union[int, float], lang: str,
if lang.startswith("pl"):
return pronounce_number_pl(number, places, short_scale, scientific, ordinals)
if lang.startswith("pt"):
variant = PortugueseVariant.BR if "br" in lang.lower() else PortugueseVariant.PT
return pronounce_number_pt(number, places, scale=scale,
variant=variant, ordinals=ordinals,
digits=digits, gender=gender)
if "br" in lang.lower():
return PT_BR.pronounce_number(number, places, scale, ordinals, digits, gender)
return PT_PT.pronounce_number(number, places, scale, ordinals, digits, gender)
if lang.startswith("mwl"):
return pronounce_number_mwl(number, places,
scale=scale, ordinals=ordinals,
digits=digits, gender=gender)
return MWL.pronounce_number(number, places, scale, ordinals, digits, gender)
if lang.startswith("ru"):
return pronounce_number_ru(number, places, short_scale, scientific, ordinals)
if lang.startswith("sl"):
Expand All @@ -163,7 +170,7 @@ def pronounce_number(number: Union[int, float], lang: str,
raise NotImplementedError(f"Unsupported language: '{lang}'") from err


def pronounce_fraction(fraction_word: str, lang: str, scale: Scale = Scale.LONG) -> str:
def pronounce_fraction(fraction_word: str, lang: str, scale: Optional[Scale] = None) -> str:
"""
Return the spoken form of a fraction string (e.g., "1/2" as "one half") for the specified language and numerical scale.

Expand All @@ -178,17 +185,20 @@ def pronounce_fraction(fraction_word: str, lang: str, scale: Scale = Scale.LONG)
NotImplementedError: If the specified language is not supported.
"""
if lang.startswith("pt"):
variant = PortugueseVariant.BR if "br" in lang.lower() else PortugueseVariant.PT
return pronounce_fraction_pt(fraction_word, scale=scale, variant=variant)
return PT_BR.pronounce_fraction(fraction_word, scale=scale) if "br" in lang.lower() \
else PT_PT.pronounce_fraction(fraction_word, scale=scale)
elif lang.startswith("ast"):
return AST.pronounce_fraction(fraction_word, scale=scale)
elif lang.startswith("mwl"):
return pronounce_fraction_mwl(fraction_word, scale=scale)
return MWL.pronounce_fraction(fraction_word, scale=scale)
else:
raise NotImplementedError(f"unsupported language: {lang}")


def pronounce_ordinal(number: Union[int, float], lang: str,
short_scale: bool = True,
gender: GrammaticalGender = GrammaticalGender.MASCULINE) -> str:
short_scale: Optional[bool] = None, # DEPRECATED
gender: GrammaticalGender = GrammaticalGender.MASCULINE,
scale: Optional[Scale] = None) -> str:
"""
Return the spoken ordinal form of a number in the specified language.

Expand All @@ -204,12 +214,18 @@ def pronounce_ordinal(number: Union[int, float], lang: str,
Raises:
NotImplementedError: If the language is not supported.
"""
scale = Scale.SHORT if short_scale else Scale.LONG # TODO migrate function kwarg to accept Scale enum
scale = scale or Scale.SHORT
if short_scale is not None:
# TODO log warning
pass
short_scale = scale == Scale.SHORT
if lang.startswith("pt"):
variant = PortugueseVariant.BR if "br" in lang.lower() else PortugueseVariant.PT
return pronounce_ordinal_pt(number, scale=scale, variant=variant, gender=gender)
return PT_BR.pronounce_ordinal(number, scale=scale, gender=gender) if "br" in lang.lower() \
else PT_PT.pronounce_ordinal(number, scale=scale, gender=gender)
if lang.startswith("mwl"):
return pronounce_ordinal_mwl(number, scale=scale, gender=gender)
return MWL.pronounce_ordinal(number, scale=scale, gender=gender)
if lang.startswith("ast"):
return AST.pronounce_ordinal(number, scale=scale, gender=gender)
if lang.startswith("da"):
return pronounce_ordinal_da(number)
if lang.startswith("de"):
Expand All @@ -229,7 +245,10 @@ def pronounce_ordinal(number: Union[int, float], lang: str,
raise NotImplementedError(f"Unsupported language: '{lang}'") from err


def extract_number(text: str, lang: str, short_scale: bool = True, ordinals: bool = False) -> Union[int, float, bool]:
def extract_number(text: str, lang: str,
short_scale: Optional[bool] = None, # DEPRECATED
ordinals: bool = False,
scale: Optional[Scale] = None) -> Union[int, float, bool]:
"""Takes in a string and extracts a number.

Assumes only 1 number is in the string, does NOT handle multiple numbers
Expand All @@ -247,7 +266,11 @@ def extract_number(text: str, lang: str, short_scale: bool = True, ordinals: boo
(int, float or False): The number extracted or False if the input
text contains no numbers
"""
scale = Scale.SHORT if short_scale else Scale.LONG # TODO migrate function kwarg to accept Scale enum
scale = scale or Scale.SHORT
if short_scale is not None:
# TODO log warning
pass
short_scale = scale == Scale.SHORT
if lang.startswith("en"):
return extract_number_en(text, short_scale, ordinals)
if lang.startswith("az"):
Expand Down Expand Up @@ -277,10 +300,12 @@ def extract_number(text: str, lang: str, short_scale: bool = True, ordinals: boo
if lang.startswith("pl"):
return extract_number_pl(text, short_scale, ordinals)
if lang.startswith("pt"):
variant = PortugueseVariant.BR if "br" in lang.lower() else PortugueseVariant.PT
return extract_number_pt(text, scale=scale, ordinals=ordinals, variant=variant)
return PT_BR.extract_number(text, scale=scale, ordinals=ordinals) if "br" in lang.lower() \
else PT_PT.extract_number(text, scale=scale, ordinals=ordinals)
if lang.startswith("mwl"):
return extract_number_mwl(text, scale=scale, ordinals=ordinals)
return MWL.extract_number(text, scale=scale, ordinals=ordinals)
if lang.startswith("ast"):
return AST.extract_number(text, scale=scale, ordinals=ordinals)
if lang.startswith("ru"):
return extract_number_ru(text, short_scale, ordinals)
if lang.startswith("sv"):
Expand All @@ -290,22 +315,29 @@ def extract_number(text: str, lang: str, short_scale: bool = True, ordinals: boo
raise NotImplementedError(f"Unsupported language: '{lang}'")


def is_fractional(input_str: str, lang: str, short_scale: bool = True) -> Union[bool, float]:
def is_fractional(input_str: str, lang: str,
short_scale: Optional[bool] = None, # DEPRECATED
scale: Optional[Scale] = None) -> Union[bool, float]:
"""
This function takes the given text and checks if it is a fraction.
Used by most of the number exractors.
Used by most of the number extractors.

Will return False on phrases that *contain* a fraction. Only detects
exact matches. To pull a fraction from a string, see extract_number()

Args:
input_str (str): the string to check if fractional
short_scale (bool): use short scale if True, long scale if False
short_scale (bool): DEPRECATED, use scale enum instead
lang (str, optional): an optional BCP-47 language code, if omitted
the default language will be used.
Returns:
(bool) or (float): False if not a fraction, otherwise the fraction
"""
scale = scale or Scale.SHORT
if short_scale is not None:
# TODO log warning
pass
short_scale = scale == Scale.SHORT
if lang.startswith("en"):
return is_fractional_en(input_str, short_scale)
if lang.startswith("az"):
Expand Down Expand Up @@ -333,9 +365,11 @@ def is_fractional(input_str: str, lang: str, short_scale: bool = True) -> Union[
if lang.startswith("pl"):
return is_fractional_pl(input_str, short_scale)
if lang.startswith("pt"):
return is_fractional_pt(input_str)
return PT_PT.is_fractional(input_str)
if lang.startswith("mwl"):
return is_fractional_mwl(input_str)
return MWL.is_fractional(input_str)
if lang.startswith("ast"):
return AST.is_fractional(input_str)
if lang.startswith("ru"):
return is_fractional_ru(input_str, short_scale)
if lang.startswith("sv"):
Expand All @@ -358,9 +392,11 @@ def is_ordinal(input_str: str, lang: str) -> Union[bool, float]:
corresponding to the ordinal
"""
if lang.startswith("pt"):
return is_ordinal_pt(input_str)
return PT_PT.is_ordinal(input_str)
if lang.startswith("mwl"):
return is_ordinal_mwl(input_str)
return MWL.is_ordinal(input_str)
if lang.startswith("ast"):
return AST.is_ordinal(input_str)
if lang.startswith("en"):
return is_ordinal_en(input_str)
if lang.startswith("de"):
Expand Down
Loading
Loading