Skip to content

Commit

Permalink
Merge pull request #556 from metanorma/fix/math-linebreak
Browse files Browse the repository at this point in the history
Linebreak processing in MathML in Presentation XML: https://github.co…
  • Loading branch information
opoudjis authored Dec 21, 2023
2 parents 1aa7f77 + ae0c666 commit d2e1ddc
Show file tree
Hide file tree
Showing 13 changed files with 1,668 additions and 1,434 deletions.
1 change: 1 addition & 0 deletions Gemfile.devel
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
gem "html2doc", git: "https://github.com/metanorma/html2doc", branch: "main"
6 changes: 4 additions & 2 deletions lib/isodoc/function/inline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,14 @@ def stem_parse(node, out)
MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze

def mathml_parse(node)
node.at("./m:math", MATHML)&.to_xml
# node.xpath("./m:math", MATHML).map(&:to_xml).join
node.xpath(ns("./asciimath | ./latexmath")).each(&:remove)
node.xpath(ns("./br")).each { |e| e.namespace = nil }
node.elements
end

def asciimath_parse(node)
a = node.at(ns("./asciimath"))&.text || node.text

"#{@openmathdelim}#{HTMLEntities.new.encode(a)}" \
"#{@closemathdelim}"
end
Expand Down
5 changes: 5 additions & 0 deletions lib/isodoc/html_function/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ def convert1(docxml, filename, dir)
end.join("\n")
end

def preprocess_xslt(docxml)

super
end

def make_body1(body, _docxml)
return if @bare

Expand Down
156 changes: 0 additions & 156 deletions lib/isodoc/presentation_function/bibdata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,10 @@
module IsoDoc
class PresentationXMLConvert < ::IsoDoc::Convert
def bibdata(docxml)
toc_metadata(docxml)
fonts_metadata(docxml)
preprocess_xslt_insert(docxml)
docid_prefixes(docxml)
a = bibdata_current(docxml) or return
address_precompose(a)
bibdata_i18n(a)
a.next =
"<localized-strings>#{i8n_name(trim_hash(@i18n.get), '').join}" \
"</localized-strings>"
end

def extension_insert(xml, path = [])
ins = extension_insert_pt(xml)
path.each do |n|
ins = ins.at(ns("./#{n}")) || ins.add_child("<#{n}/>").first
end
ins
end

def extension_insert_pt(xml)
xml.at(ns("//metanorma-extension")) ||
xml.at(ns("//bibdata"))&.after("<metanorma-extension/>")
&.next_element ||
xml.root.elements.first.before("<metanorma-extension/>")
.previous_element
end

def preprocess_xslt_insert(docxml)
content = ""
p = passthrough_xslt and content += p
p = preprocess_xslt_read and content += File.read(p)
content.empty? and return
ins = extension_insert(docxml, %w(render))
ins << content
end

def passthrough_xslt
@output_formats.nil? and return nil
@output_formats.empty? and return nil
@output_formats.each_key.with_object([]) do |k, m|
m << <<~XSLT
<preprocess-xslt format="#{k}">
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml" version="1.0">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="no"/>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="*[local-name() = 'passthrough']">
<xsl:if test="contains(@formats,',#{k},')"> <!-- delimited -->
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
</preprocess-xslt>
XSLT
end.join("\n")
end

# read in from file, but with `<preprocess-xslt @format="">` wrapper
def preprocess_xslt_read
html_doc_path("preprocess.xslt")
end

def toc_metadata(docxml)
@tocfigures || @toctables || @tocrecommendations or return
ins = extension_insert(docxml)
@tocfigures and
ins << "<toc type='figure'><title>#{@i18n.toc_figures}</title></toc>"
@toctables and
ins << "<toc type='table'><title>#{@i18n.toc_tables}</title></toc>"
@tocfigures and
ins << "<toc type='recommendation'><title>#{@i18n.toc_recommendations}" \
"</title></toc>"
end

def address_precompose(bib)
Expand All @@ -92,26 +18,6 @@ def address_precompose(bib)
end
end

def fonts_metadata(xmldoc)
ins = presmeta_insert_pt(xmldoc)
@fontist_fonts and CSV.parse_line(@fontist_fonts, col_sep: ";")
.map(&:strip).reverse.each do |f|
ins.next = presmeta("fonts", f)
end
@fontlicenseagreement and
ins.next = presmeta("font-license-agreement", @fontlicenseagreement)
end

def presmeta_insert_pt(xmldoc)
xmldoc.at(ns("//presentation-metadata")) ||
xmldoc.at(ns("//metanorma-extension")) || xmldoc.at(ns("//bibdata"))
end

def presmeta(name, value)
"<presentation-metadata><name>#{name}</name><value>#{value}</value>" \
"</presentation-metadata>"
end

def address_precompose1(addr)
ret = []
addr.xpath(ns("./street")).each { |s| ret << to_xml(s.children) }
Expand Down Expand Up @@ -176,67 +82,5 @@ def tag_translate(tag, lang, value)
tag.next["language"] = lang
tag.next.children = value
end

def i18n_tag(key, value)
"<localized-string key='#{key}' language='#{@lang}'>#{value}" \
"</localized-string>"
end

def i18n_safe(key)
key.to_s.gsub(/\s|\./, "_")
end

def i8n_name(hash, pref)
case hash
when Hash then i8n_name1(hash, pref)
when Array
hash.reject { |a| blank?(a) }.each_with_object([])
.with_index do |(v1, g), i|
i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x }
end
else [i18n_tag(pref, hash)]
end
end

def i8n_name1(hash, pref)
hash.reject { |_k, v| blank?(v) }.each_with_object([]) do |(k, v), g|
case v
when Hash then i8n_name(v, i18n_safe(k)).each { |x| g << x }
when Array
v.reject { |a| blank?(a) }.each_with_index do |v1, i|
i8n_name(v1, "#{i18n_safe(k)}.#{i}").each { |x| g << x }
end
else
g << i18n_tag("#{pref}#{pref.empty? ? '' : '.'}#{i18n_safe(k)}", v)
end
end
end

# https://stackoverflow.com/a/31822406
def blank?(elem)
elem.nil? || (elem.respond_to?(:empty?) && elem.empty?)
end

def trim_hash(hash)
loop do
h_new = trim_hash1(hash)
break hash if hash == h_new

hash = h_new
end
end

def trim_hash1(hash)
hash.is_a?(Hash) or return hash
hash.each_with_object({}) do |(k, v), g|
blank?(v) and next
g[k] = case v
when Hash then trim_hash1(hash[k])
when Array
hash[k].map { |a| trim_hash1(a) }.reject { |a| blank?(a) }
else v
end
end
end
end
end
33 changes: 23 additions & 10 deletions lib/isodoc/presentation_function/math.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ class PresentationXMLConvert < ::IsoDoc::Convert
MATHML = { "m" => "http://www.w3.org/1998/Math/MathML" }.freeze

def mathml(docxml)
docxml.xpath("//m:math", MATHML).each { |f| mathml_linebreak(f) }
locale = twitter_cldr_localiser
docxml.xpath("//m:math", MATHML).each do |f|
docxml.xpath("//m:math", MATHML).each do |f| # rubocop:disable Style/CombinableLoops
mathml1(f, locale)
end
end
Expand Down Expand Up @@ -77,8 +78,7 @@ def twitter_cldr_localiser
end

def parse_localize_number
return {} unless @localizenumber

@localizenumber or return {}
m = %r{(?<group>[^#])?(?<groupdigits>#+0)(?<decimal>.)(?<fractdigits>#+)(?<fractgroup>[^#])?}
.match(@localizenumber) or return {}
ret = { decimal: m[:decimal], group_digits: m[:groupdigits].size,
Expand All @@ -90,17 +90,13 @@ def parse_localize_number
end

def asciimath_dup(node)
return if @suppressasciimathdup || node.parent.at(ns("./asciimath"))

@suppressasciimathdup || node.parent.at(ns("./asciimath")) and return
math = node.to_xml.gsub(/ xmlns=["'][^"']+["']/, "")
.gsub(%r{<[^:/>]+:}, "<").gsub(%r{</[^:/>]+:}, "</")
ret = Plurimath::Math.parse(math, "mathml").to_asciimath
ret = HTMLEntities.new.encode(ret, :basic)
node.next = "<asciimath>#{ret}</asciimath>"
node.next = "<asciimath>#{@c.encode(ret, :basic)}</asciimath>"
rescue StandardError => e
warn "Failure to convert MathML to AsciiMath"
warn node.parent.to_xml
warn e
warn "Failure to convert MathML to AsciiMath\n#{node.parent.to_xml}\n#{e}"
end

def maths_just_numeral(node)
Expand All @@ -114,6 +110,23 @@ def maths_just_numeral(node)

def mathml1(node, locale)
mathml_style_inherit(node)
mathml_number(node, locale)
end

def mathml_linebreak(node)
node.at(".//*/@linebreak") or return
m = Plurimath::Math.parse(node.to_xml, :mathml)
.to_mathml(split_on_linebreak: true)
ret = Nokogiri::XML("<m>#{m}</m>").root
ret.elements.each_with_index do |e, i|
i.zero? or e.previous = "<br/>"
end
node.replace(<<~OUTPUT)
<math-with-linebreak>#{ret.children}</math-with-linebreak><math-no-linebreak>#{node.to_xml}</math-no-linebreak>
OUTPUT
end

def mathml_number(node, locale)
justnumeral = node.elements.size == 1 && node.elements.first.name == "mn"
justnumeral or asciimath_dup(node)
localize_maths(node, locale)
Expand Down
Loading

0 comments on commit d2e1ddc

Please sign in to comment.