From a5a16d817151df4328b1d2dd49e5bde425af8594 Mon Sep 17 00:00:00 2001 From: Nick Nicholas Date: Wed, 20 Dec 2023 00:58:09 +1100 Subject: [PATCH 1/4] Linebreak processing in MathML in Presentation XML: https://github.com/metanorma/metanorma-standoc/issues/111 --- Gemfile.devel | 1 + lib/isodoc/function/inline.rb | 3 +- lib/isodoc/presentation_function/math.rb | 27 +- spec/isodoc/blocks_notes_spec.rb | 710 +++++++++++++++++++++ spec/isodoc/blocks_spec.rb | 707 -------------------- spec/isodoc/presentation_xml_maths_spec.rb | 513 +++++++++++++++ spec/isodoc/presentation_xml_spec.rb | 422 ------------ spec/isodoc/section_spec.rb | 252 ++++---- 8 files changed, 1368 insertions(+), 1267 deletions(-) create mode 100644 Gemfile.devel create mode 100644 spec/isodoc/blocks_notes_spec.rb create mode 100644 spec/isodoc/presentation_xml_maths_spec.rb diff --git a/Gemfile.devel b/Gemfile.devel new file mode 100644 index 00000000..e15cb95d --- /dev/null +++ b/Gemfile.devel @@ -0,0 +1 @@ +gem "html2doc", git: "https://github.com/metanorma/html2doc", branch: "main" diff --git a/lib/isodoc/function/inline.rb b/lib/isodoc/function/inline.rb index 1202a2fc..fd52fda3 100644 --- a/lib/isodoc/function/inline.rb +++ b/lib/isodoc/function/inline.rb @@ -87,12 +87,11 @@ def stem_parse(node, out) MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze def mathml_parse(node) - node.at("./m:math", MATHML)&.to_xml + node.xpath("./m:math", MATHML).map(&:to_xml).join end def asciimath_parse(node) a = node.at(ns("./asciimath"))&.text || node.text - "#{@openmathdelim}#{HTMLEntities.new.encode(a)}" \ "#{@closemathdelim}" end diff --git a/lib/isodoc/presentation_function/math.rb b/lib/isodoc/presentation_function/math.rb index fd468b60..0702b404 100644 --- a/lib/isodoc/presentation_function/math.rb +++ b/lib/isodoc/presentation_function/math.rb @@ -7,8 +7,9 @@ class PresentationXMLConvert < ::IsoDoc::Convert MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze def mathml(docxml) + docxml.xpath("//m:math", MATHML).each { |f| mathml_linebreak(f) } locale = twitter_cldr_localiser - docxml.xpath("//m:math", MATHML).each do |f| + docxml.xpath("//m:math", MATHML).each do |f| # rubocop:disable Style/CombinableLoops mathml1(f, locale) end end @@ -77,8 +78,7 @@ def twitter_cldr_localiser end def parse_localize_number - return {} unless @localizenumber - + @localizenumber or return {} m = %r{(?[^#])?(?#+0)(?.)(?#+)(?[^#])?} .match(@localizenumber) or return {} ret = { decimal: m[:decimal], group_digits: m[:groupdigits].size, @@ -90,17 +90,13 @@ def parse_localize_number end def asciimath_dup(node) - return if @suppressasciimathdup || node.parent.at(ns("./asciimath")) - + @suppressasciimathdup || node.parent.at(ns("./asciimath")) and return math = node.to_xml.gsub(/ xmlns=["'][^"']+["']/, "") .gsub(%r{<[^:/>]+:}, "<").gsub(%r{]+:}, "#{ret}" + node.next = "#{@c.encode(ret, :basic)}" rescue StandardError => e - warn "Failure to convert MathML to AsciiMath" - warn node.parent.to_xml - warn e + warn "Failure to convert MathML to AsciiMath\n#{node.parent.to_xml}\n#{e}" end def maths_just_numeral(node) @@ -114,6 +110,17 @@ def maths_just_numeral(node) def mathml1(node, locale) mathml_style_inherit(node) + mathml_number(node, locale) + end + + def mathml_linebreak(node) + node.at(".//*/@linebreak") or return + node.replace(Plurimath::Math + .parse(node.to_xml, :mathml) + .to_mathml(split_on_linebreak: true)) + end + + def mathml_number(node, locale) justnumeral = node.elements.size == 1 && node.elements.first.name == "mn" justnumeral or asciimath_dup(node) localize_maths(node, locale) diff --git a/spec/isodoc/blocks_notes_spec.rb b/spec/isodoc/blocks_notes_spec.rb new file mode 100644 index 00000000..45a08ce5 --- /dev/null +++ b/spec/isodoc/blocks_notes_spec.rb @@ -0,0 +1,710 @@ +require "spec_helper" + +RSpec.describe IsoDoc do + it "processes unlabelled notes" do + input = <<~INPUT + + + +

These results are based on a study carried out on three different types of kernel.

+
+ +

These results are based on a study carried out on three different types of kernel.

+
+
+
+ INPUT + presxml = <<~OUTPUT + + + + + Table of contents + + + + NOTE 1 +

+ These results are based on a study carried out on three different + types of kernel. +

+
+ +

+ These results are based on a study carried out on three different + types of kernel. +

+
+
+
+
+ OUTPUT + html = <<~OUTPUT + #{HTML_HDR} +
+
+

Foreword

+
+

+ NOTE 1 +   These results are based on a study carried out on three + different types of kernel. +

+
+
+

+   These results are based on a study carried out on three + different types of kernel. +

+
+
+ + + + OUTPUT + doc = <<~OUTPUT + +