diff --git a/isodoc.gemspec b/isodoc.gemspec index 8bcdf877..e5aeca22 100644 --- a/isodoc.gemspec +++ b/isodoc.gemspec @@ -28,6 +28,8 @@ Gem::Specification.new do |spec| end spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0") + spec.add_dependency "base64" + spec.add_dependency "bigdecimal" spec.add_dependency "html2doc", "~> 1.8.1" # spec.add_dependency "isodoc-i18n", "~> 1.1.0" # already in relaton-render and mn-requirements # spec.add_dependency "relaton-cli" diff --git a/lib/isodoc/presentation_function/math.rb b/lib/isodoc/presentation_function/math.rb index 23062a93..47cefb0d 100644 --- a/lib/isodoc/presentation_function/math.rb +++ b/lib/isodoc/presentation_function/math.rb @@ -21,21 +21,86 @@ def mathml(docxml) # TwitterCldr::DataReaders::NumberDataReader.new(locale).symbols def localize_maths(node, locale) node.xpath(".//m:mn", MATHML).each do |x| - x.children = @numfmt - .localized_number(x.text, locale: locale, - precision: num_precision(x.text)) + x.children = + if fmt = x["data-metanorma-numberformat"] + x.delete("data-metanorma-numberformat") + explicit_number_formatter(x, locale, fmt) + else implicit_number_formatter(x, locale) + end rescue ArgumentError + rescue Error => e + warn "Failure to localised MathML/mn\n#{node.parent.to_xml}\n#{e}" end end + def normalise_number(num) + n = BigDecimal(num).to_s("F") + /\.\d/.match?(num) or n.sub!(/\.\d+$/, "") + n + end + + def implicit_number_formatter(num, locale) + fmt = { digit_count: num_totaldigits(num.text) }.compact + n = normalise_number(num.text) + # Plurimath confused by exponent notation + #warn "IMPLICIT: precision: #{num_precision(num.text)} ; symbols: #{fmt}, n: #{n}; output: #{@numfmt.localized_number(n, locale:, format: fmt, precision: num_precision(num.text))}" + @numfmt.localized_number(n, locale:, format: fmt, + precision: num_precision(num.text)) + end + + def numberformat_extract(options) + options.gsub!(/([a-z_]+)='/, %('\\1=)) + CSV.parse_line(options, quote_char: "'").each_with_object({}) do |x, acc| + m = /^(.+?)=(.+)?$/.match(x) or next + acc[m[1].to_sym] = m[2].sub(/^(["'])(.+)\1$/, "\\2") + end + end + + def numberformat_type(ret) + %i(precision digit_count group_digits fraction_group_digits).each do |i| + ret[i] &&= ret[i].to_i + end + %i(notation exponent_sign locale).each do |i| + ret[i] &&= ret[i].to_sym + end + ret + end + + def explicit_number_formatter(num, locale, options) + ret = numberformat_type(numberformat_extract(options)) + l = ret[:locale] || locale + precision, symbols, digit_count = explicit_number_formatter_cfg(num, ret) + n = normalise_number(num.text) + # Plurimath confused by exponent notation + #warn "EXPLICIT: precision: #{precision} ; symbols: #{symbols}, n: #{n}; output: #{Plurimath::NumberFormatter.new(l, localizer_symbols: symbols).localized_number(n, precision:, format: symbols.merge(digit_count:))}" + Plurimath::NumberFormatter.new(l, localizer_symbols: symbols) + .localized_number(n, precision:, + format: symbols.merge(digit_count:)) + end + + def explicit_number_formatter_cfg(num, fmt) + symbols = twitter_cldr_localiser_symbols.dup.merge(fmt) + precision = symbols[:precision]&.to_i || num_precision(num.text) + symbols[:precision] or digit_count = num_totaldigits(num.text) + [precision, symbols, digit_count] + end + def num_precision(num) - precision = 0 - /\./.match?(num) and precision = - twitter_cldr_localiser_symbols[:precision] || - num.sub(/^.*\./, "").size + precision = nil + /\.(?!\d+e)/.match?(num) and + precision = twitter_cldr_localiser_symbols[:precision] || + num.sub(/^.*\./, "").size precision end + def num_totaldigits(num) + totaldigits = nil + /\.(?=\d+e)/.match?(num) and + totaldigits = twitter_cldr_localiser_symbols[:digit_count] || + num.sub(/^.*\./, "").sub(/e.*$/, "").size + totaldigits + end + def twitter_cldr_localiser_symbols {} end diff --git a/spec/isodoc/presentation_xml_maths_spec.rb b/spec/isodoc/presentation_xml_maths_spec.rb index f8a8cbf4..2ac6868a 100644 --- a/spec/isodoc/presentation_xml_maths_spec.rb +++ b/spec/isodoc/presentation_xml_maths_spec.rb @@ -258,7 +258,7 @@ end context "overrides localisation of numbers in MathML" do - it "overrides localisation of numbers in MathML, with no grouping of digits" do + it "with no grouping of digits" do input = <<~INPUT @@ -295,10 +295,8 @@ .sub(%r{.*}m, ""))) .to be_equivalent_to xmlpp(output2) end - end - context "overrides localisation of numbers in MathML" do - it "overrides localisation of numbers in MathML with grouping of digits" do + it "with grouping of digits" do input = <<~INPUT @@ -320,7 +318,7 @@ Table of contents -

... +

... 6=42=12=14=96=77=26=45=15 64=21=21=49=67=72;64$51$5 3=00=00 @@ -337,6 +335,231 @@ .to be_equivalent_to xmlpp(output1) end end + context "overrides localisation of numbers in MathML" do + # before do + # allow_any_instance_of(IsoDoc::PresentationXMLConvert) + # .to(receive(:twitter_cldr_localiser_symbols) + # .and_return({ + # fraction_group_digits: 2, + # fraction_group: "'", + # precision: 2, + # })) + # end + let(:input) do + <<~INPUT + + + test + + +

+ + 0.31e2 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.1e1 + + + 0.11e1 + + + 0.1100e1 + + + 0.1e22 + + + 0.10e20 + + + 0.10e-18 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.1e1 + + + 0.11e1 + + + 0.1100e1 + + + 0.1e22 + + + 0.10e20 + + + 0.10e-18 + + ... + + 642121496772.6451564515 + + + 642121496772.6451564515 + + + 642121496772.6451564515 + + 30000 + + + INPUT + end + + it "with data-metanorma-numberformat attributes and default precision" do + allow_any_instance_of(IsoDoc::PresentationXMLConvert) + .to(receive(:twitter_cldr_localiser_symbols) + .and_return({ + fraction_group_digits: 2, + fraction_group: "'", + precision: 2, + })) + + output1 = <<~OUTPUT + + + test + + + Table of contents +

+ 31;00 + 32=74=28;74 + 32=74=28.74 + 32=74=28.74 + 32=74=28.74 + 32=74=28.74'3 + 32=74=28.74 + 32=74=28,74 + 32=74=28,74'3 + 1,00 + 1,10 + 1,10 + 10=00=00=00=00=00=00=00=00=00=00,00 + 10=00=00=00=00=00=00=00=00=00,00 + 0,00 + 3.27e5 + 3,27e5 + 3,27'4e5 + 1,00e0 + 1,10e0 + 1,10e0 + 1,00e+21 + 1,00e+19 + 1,00e-19 + ... + 642x121x496x772,6451y564 + 6,4212y150 × 10^+11 + 642x121x496x772,64 + 30'000,00 +

+
+
+ OUTPUT + TwitterCldr.reset_locale_fallbacks + + expect(xmlpp(strip_guid(IsoDoc::PresentationXMLConvert + .new({ localizenumber: "#=#0;##$#" } + .merge(presxml_options)) + .convert("test", input, true)) + .sub(%r{.*}m, ""))) + .to be_equivalent_to xmlpp(output1) + end + + it "with data-metanorma-numberformat attributes and no default precision" do + allow_any_instance_of(IsoDoc::PresentationXMLConvert) + .to(receive(:twitter_cldr_localiser_symbols) + .and_return({ + fraction_group_digits: 2, + fraction_group: "'", + })) + + output1 = <<~OUTPUT + + + test + + + Table of contents +

+ 31 + 327x428,74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'3 + 327x428.74'32'87'84'32'99'2 + 327x428,74'32'87'84'32'99'2 + 327x428,74'3 + 1 + 1,1 + 1,10'0 + 1x000x000x000x000x000x000x000 + 10x000x000x000x000x000x000 + 0,0 + 3.27'42'87'43'28'78'43'30'28e5 + 3,27'42'87'43'28'78'43'30'28e5 + 3,27'4e5 + 1e0 + 1,1e0 + 1,10'0e0 + 1e+21 + 1,00'00'00'00'00'00'00'00'00'00e+19 + 9,0e-20 + ... + 642x121x496x772,6451y564 + 6,4212y150 × 10^+11 + 642x121x496x772,64'51'56'45'15 + 30'000 +

+
+
+ OUTPUT + TwitterCldr.reset_locale_fallbacks + + expect(xmlpp(strip_guid(IsoDoc::PresentationXMLConvert + .new({ localizenumber: "#=#0;##$#" } + .merge(presxml_options)) + .convert("test", input, true)) + .sub(%r{.*}m, ""))) + .to be_equivalent_to xmlpp(output1) + end + end it "propagates boldface into MathML" do input = <<~INPUT