From 8b0006218c9d17c927b5102d0d1d44d8da6db1ec Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Tue, 25 Jun 2024 01:28:35 +1000 Subject: [PATCH 1/2] process data-metanorma-numberformat arguments on MathML/mn: https://github.com/metanorma/metanorma-standoc/issues/882 --- isodoc.gemspec | 2 + lib/isodoc/presentation_function/math.rb | 37 ++++++++++- spec/isodoc/presentation_xml_maths_spec.rb | 73 ++++++++++++++++++++-- 3 files changed, 104 insertions(+), 8 deletions(-) diff --git a/isodoc.gemspec b/isodoc.gemspec index 8bcdf877..e5aeca22 100644 --- a/isodoc.gemspec +++ b/isodoc.gemspec @@ -28,6 +28,8 @@ Gem::Specification.new do |spec| end spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0") + spec.add_dependency "base64" + spec.add_dependency "bigdecimal" spec.add_dependency "html2doc", "~> 1.8.1" # spec.add_dependency "isodoc-i18n", "~> 1.1.0" # already in relaton-render and mn-requirements # spec.add_dependency "relaton-cli" diff --git a/lib/isodoc/presentation_function/math.rb b/lib/isodoc/presentation_function/math.rb index 23062a93..21301152 100644 --- a/lib/isodoc/presentation_function/math.rb +++ b/lib/isodoc/presentation_function/math.rb @@ -21,13 +21,44 @@ def mathml(docxml) # TwitterCldr::DataReaders::NumberDataReader.new(locale).symbols def localize_maths(node, locale) node.xpath(".//m:mn", MATHML).each do |x| - x.children = @numfmt - .localized_number(x.text, locale: locale, - precision: num_precision(x.text)) + x.children = + if fmt = x["data-metanorma-numberformat"] + explicit_number_formatter(x, locale, fmt) + else + @numfmt.localized_number(x.text, locale:, + precision: num_precision(x.text)) + end rescue ArgumentError end end + def numberformat_extract(options) + CSV.parse_line(options).each_with_object({}) do |x, acc| + m = /^(.+?)=(.+)?$/.match(x) or next + acc[m[1].to_sym] = m[2].sub(/^(["'])(.+)\1$/, "\\2") + end + end + + def numberformat_type(ret) + %i(precision digitcount group_digits fraction_group_digits).each do |i| + ret[i] &&= ret[i].to_i + end + %i(notation exponent_sign locale).each do |i| + ret[i] &&= ret[i].to_sym + end + ret + end + + def explicit_number_formatter(num, locale, options) + num.delete("data-metanorma-numberformat") + ret = numberformat_type(numberformat_extract(options)) + l = ret[:locale] || locale + precision = ret[:precision]&.to_i || num_precision(num.text) + symbols = twitter_cldr_localiser_symbols.merge(ret) + Plurimath::NumberFormatter.new(l, localizer_symbols: symbols) + .localized_number(num.text, precision:, format: symbols) + end + def num_precision(num) precision = 0 /\./.match?(num) and precision = diff --git a/spec/isodoc/presentation_xml_maths_spec.rb b/spec/isodoc/presentation_xml_maths_spec.rb index f8a8cbf4..711d5c91 100644 --- a/spec/isodoc/presentation_xml_maths_spec.rb +++ b/spec/isodoc/presentation_xml_maths_spec.rb @@ -258,7 +258,7 @@ end context "overrides localisation of numbers in MathML" do - it "overrides localisation of numbers in MathML, with no grouping of digits" do + it "with no grouping of digits" do input = <<~INPUT @@ -295,10 +295,8 @@ .sub(%r{.*}m, ""))) .to be_equivalent_to xmlpp(output2) end - end - context "overrides localisation of numbers in MathML" do - it "overrides localisation of numbers in MathML with grouping of digits" do + it "with grouping of digits" do input = <<~INPUT @@ -320,7 +318,7 @@ Table of contents -

... +

... 6=42=12=14=96=77=26=45=15 64=21=21=49=67=72;64$51$5 3=00=00 @@ -338,6 +336,71 @@ end end + context "overrides localisation of numbers in MathML" do + let(:additional_symbols) do + { + fraction_group_digits: 2, + fraction_group: "'", + precision: 2, + } + end + + before do + allow_any_instance_of(IsoDoc::PresentationXMLConvert) + .to(receive(:twitter_cldr_localiser_symbols) + .and_return(additional_symbols)) + end + + it "with data-metanorma-numberformat attributes" do + input = <<~INPUT + + + test + + +

+ ... + + 642121496772.6451564515 + + + 642121496772.6451564515 + + + 642121496772.6451564515 + + 30000 + + + INPUT + output1 = <<~OUTPUT + + + test + + + Table of contents +

+ ... + 642x121x496x772'6451y564 + 6'4212y150 × 10^+11 + 642x121x496x772'64 + 30'000 +

+
+
+ OUTPUT + TwitterCldr.reset_locale_fallbacks + + expect(xmlpp(strip_guid(IsoDoc::PresentationXMLConvert + .new({ localizenumber: "#=#0;##$#" } + .merge(presxml_options)) + .convert("test", input, true)) + .sub(%r{.*}m, ""))) + .to be_equivalent_to xmlpp(output1) + end + end + it "propagates boldface into MathML" do input = <<~INPUT From 420f564affd1425d5d56ed29dd436ccc6addb326 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Wed, 26 Jun 2024 14:49:34 +1000 Subject: [PATCH 2/2] https://github.com/metanorma/metanorma-standoc/issues/882 --- lib/isodoc/presentation_function/math.rb | 60 ++++-- spec/isodoc/presentation_xml_maths_spec.rb | 216 ++++++++++++++++++--- 2 files changed, 235 insertions(+), 41 deletions(-) diff --git a/lib/isodoc/presentation_function/math.rb b/lib/isodoc/presentation_function/math.rb index 21301152..47cefb0d 100644 --- a/lib/isodoc/presentation_function/math.rb +++ b/lib/isodoc/presentation_function/math.rb @@ -23,24 +23,41 @@ def localize_maths(node, locale) node.xpath(".//m:mn", MATHML).each do |x| x.children = if fmt = x["data-metanorma-numberformat"] + x.delete("data-metanorma-numberformat") explicit_number_formatter(x, locale, fmt) - else - @numfmt.localized_number(x.text, locale:, - precision: num_precision(x.text)) + else implicit_number_formatter(x, locale) end rescue ArgumentError + rescue Error => e + warn "Failure to localised MathML/mn\n#{node.parent.to_xml}\n#{e}" end end + def normalise_number(num) + n = BigDecimal(num).to_s("F") + /\.\d/.match?(num) or n.sub!(/\.\d+$/, "") + n + end + + def implicit_number_formatter(num, locale) + fmt = { digit_count: num_totaldigits(num.text) }.compact + n = normalise_number(num.text) + # Plurimath confused by exponent notation + #warn "IMPLICIT: precision: #{num_precision(num.text)} ; symbols: #{fmt}, n: #{n}; output: #{@numfmt.localized_number(n, locale:, format: fmt, precision: num_precision(num.text))}" + @numfmt.localized_number(n, locale:, format: fmt, + precision: num_precision(num.text)) + end + def numberformat_extract(options) - CSV.parse_line(options).each_with_object({}) do |x, acc| + options.gsub!(/([a-z_]+)='/, %('\\1=)) + CSV.parse_line(options, quote_char: "'").each_with_object({}) do |x, acc| m = /^(.+?)=(.+)?$/.match(x) or next acc[m[1].to_sym] = m[2].sub(/^(["'])(.+)\1$/, "\\2") end end def numberformat_type(ret) - %i(precision digitcount group_digits fraction_group_digits).each do |i| + %i(precision digit_count group_digits fraction_group_digits).each do |i| ret[i] &&= ret[i].to_i end %i(notation exponent_sign locale).each do |i| @@ -50,23 +67,40 @@ def numberformat_type(ret) end def explicit_number_formatter(num, locale, options) - num.delete("data-metanorma-numberformat") ret = numberformat_type(numberformat_extract(options)) l = ret[:locale] || locale - precision = ret[:precision]&.to_i || num_precision(num.text) - symbols = twitter_cldr_localiser_symbols.merge(ret) + precision, symbols, digit_count = explicit_number_formatter_cfg(num, ret) + n = normalise_number(num.text) + # Plurimath confused by exponent notation + #warn "EXPLICIT: precision: #{precision} ; symbols: #{symbols}, n: #{n}; output: #{Plurimath::NumberFormatter.new(l, localizer_symbols: symbols).localized_number(n, precision:, format: symbols.merge(digit_count:))}" Plurimath::NumberFormatter.new(l, localizer_symbols: symbols) - .localized_number(num.text, precision:, format: symbols) + .localized_number(n, precision:, + format: symbols.merge(digit_count:)) + end + + def explicit_number_formatter_cfg(num, fmt) + symbols = twitter_cldr_localiser_symbols.dup.merge(fmt) + precision = symbols[:precision]&.to_i || num_precision(num.text) + symbols[:precision] or digit_count = num_totaldigits(num.text) + [precision, symbols, digit_count] end def num_precision(num) - precision = 0 - /\./.match?(num) and precision = - twitter_cldr_localiser_symbols[:precision] || - num.sub(/^.*\./, "").size + precision = nil + /\.(?!\d+e)/.match?(num) and + precision = twitter_cldr_localiser_symbols[:precision] || + num.sub(/^.*\./, "").size precision end + def num_totaldigits(num) + totaldigits = nil + /\.(?=\d+e)/.match?(num) and + totaldigits = twitter_cldr_localiser_symbols[:digit_count] || + num.sub(/^.*\./, "").sub(/e.*$/, "").size + totaldigits + end + def twitter_cldr_localiser_symbols {} end diff --git a/spec/isodoc/presentation_xml_maths_spec.rb b/spec/isodoc/presentation_xml_maths_spec.rb index 711d5c91..2ac6868a 100644 --- a/spec/isodoc/presentation_xml_maths_spec.rb +++ b/spec/isodoc/presentation_xml_maths_spec.rb @@ -335,30 +335,96 @@ .to be_equivalent_to xmlpp(output1) end end - context "overrides localisation of numbers in MathML" do - let(:additional_symbols) do - { - fraction_group_digits: 2, - fraction_group: "'", - precision: 2, - } - end - - before do - allow_any_instance_of(IsoDoc::PresentationXMLConvert) - .to(receive(:twitter_cldr_localiser_symbols) - .and_return(additional_symbols)) - end - - it "with data-metanorma-numberformat attributes" do - input = <<~INPUT + # before do + # allow_any_instance_of(IsoDoc::PresentationXMLConvert) + # .to(receive(:twitter_cldr_localiser_symbols) + # .and_return({ + # fraction_group_digits: 2, + # fraction_group: "'", + # precision: 2, + # })) + # end + let(:input) do + <<~INPUT test

+ + 0.31e2 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.1e1 + + + 0.11e1 + + + 0.1100e1 + + + 0.1e22 + + + 0.10e20 + + + 0.10e-18 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.3274287432878432992e6 + + + 0.1e1 + + + 0.11e1 + + + 0.1100e1 + + + 0.1e22 + + + 0.10e20 + + + 0.10e-18 + ... 642121496772.6451564515 @@ -373,22 +439,116 @@ INPUT + end + + it "with data-metanorma-numberformat attributes and default precision" do + allow_any_instance_of(IsoDoc::PresentationXMLConvert) + .to(receive(:twitter_cldr_localiser_symbols) + .and_return({ + fraction_group_digits: 2, + fraction_group: "'", + precision: 2, + })) + + output1 = <<~OUTPUT + + + test + + + Table of contents +

+ 31;00 + 32=74=28;74 + 32=74=28.74 + 32=74=28.74 + 32=74=28.74 + 32=74=28.74'3 + 32=74=28.74 + 32=74=28,74 + 32=74=28,74'3 + 1,00 + 1,10 + 1,10 + 10=00=00=00=00=00=00=00=00=00=00,00 + 10=00=00=00=00=00=00=00=00=00,00 + 0,00 + 3.27e5 + 3,27e5 + 3,27'4e5 + 1,00e0 + 1,10e0 + 1,10e0 + 1,00e+21 + 1,00e+19 + 1,00e-19 + ... + 642x121x496x772,6451y564 + 6,4212y150 × 10^+11 + 642x121x496x772,64 + 30'000,00 +

+
+
+ OUTPUT + TwitterCldr.reset_locale_fallbacks + + expect(xmlpp(strip_guid(IsoDoc::PresentationXMLConvert + .new({ localizenumber: "#=#0;##$#" } + .merge(presxml_options)) + .convert("test", input, true)) + .sub(%r{.*}m, ""))) + .to be_equivalent_to xmlpp(output1) + end + + it "with data-metanorma-numberformat attributes and no default precision" do + allow_any_instance_of(IsoDoc::PresentationXMLConvert) + .to(receive(:twitter_cldr_localiser_symbols) + .and_return({ + fraction_group_digits: 2, + fraction_group: "'", + })) + output1 = <<~OUTPUT - - test - - - Table of contents -

+ + test + + + Table of contents +

+ 31 + 327x428,74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'32'87'84'32'99'2 + 327x428.74'3 + 327x428.74'32'87'84'32'99'2 + 327x428,74'32'87'84'32'99'2 + 327x428,74'3 + 1 + 1,1 + 1,10'0 + 1x000x000x000x000x000x000x000 + 10x000x000x000x000x000x000 + 0,0 + 3.27'42'87'43'28'78'43'30'28e5 + 3,27'42'87'43'28'78'43'30'28e5 + 3,27'4e5 + 1e0 + 1,1e0 + 1,10'0e0 + 1e+21 + 1,00'00'00'00'00'00'00'00'00'00e+19 + 9,0e-20 ... - 642x121x496x772'6451y564 - 6'4212y150 × 10^+11 - 642x121x496x772'64 + 642x121x496x772,6451y564 + 6,4212y150 × 10^+11 + 642x121x496x772,64'51'56'45'15 30'000

-
-
+ +
OUTPUT TwitterCldr.reset_locale_fallbacks