diff --git a/src/ufonormalizer/__init__.py b/src/ufonormalizer/__init__.py index 1f16a99..87dcb94 100644 --- a/src/ufonormalizer/__init__.py +++ b/src/ufonormalizer/__init__.py @@ -4,11 +4,9 @@ import binascii import time import os -import re import shutil from xml.etree import cElementTree as ET import plistlib -import textwrap import datetime import glob from collections import OrderedDict @@ -838,9 +836,7 @@ def _normalizeGlifNote(element, writer): return if not value.strip(): return - writer.beginElement("note") - writer.text(value) - writer.endElement("note") + writer.simpleElement("note", value=xmlEscapeText(value)) def _normalizeGlifOutlineFormat1(element, writer): @@ -1261,29 +1257,6 @@ def data(self, text): line = "" % text self.raw(line) - def text(self, text): - text = text.strip("\n") - text = dedent_tabs(text) - text = text.strip() - text = xmlEscapeText(text) - paragraphs = [] - for paragraph in text.splitlines(): - if not paragraph: - paragraphs.append("") - else: - paragraph = textwrap.wrap( - paragraph.rstrip(), - width=xmlTextMaxLineLength, - expand_tabs=False, - replace_whitespace=False, - drop_whitespace=False, - break_long_words=False, - break_on_hyphens=False - ) - paragraphs.extend(paragraph) - for line in paragraphs: - self.raw(line) - def simpleElement(self, tag, attrs=None, value=None): if attrs: attrs = self.attributesToString(attrs) @@ -1450,63 +1423,6 @@ def xmlConvertInt(value): return str(value) -# --------------- -# Text Operations -# --------------- - -WHITESPACE_ONLY_RE = re.compile(r'^[\s\t]+$', re.MULTILINE) -LEADING_WHITESPACE_RE = re.compile(r'(^(?:\s{4}|\t)*)(?:[^\t\n])', re.MULTILINE) - - -def dedent_tabs(text): - """ - Based on `textwrap.dedent`, but modified to only work on tabs and 4-space indents - - Remove any common leading tabs from every line in `text`. - This can be used to make triple-quoted strings line up with the left - edge of the display, while still presenting them in the source code - in indented form. - - Entirely blank lines are normalized to a newline character. - """ - # Look for the longest leading string of spaces and tabs common to - # all lines. - margin = None - text = WHITESPACE_ONLY_RE.sub('', text) - indents = LEADING_WHITESPACE_RE.findall(text) - for indent in indents: - if margin is None: - margin = indent - - # Current line more deeply indented than previous winner: - # no change (previous winner is still on top). - elif indent.startswith(margin): - pass - - # Current line consistent with and no deeper than previous winner: - # it's the new winner. - elif margin.startswith(indent): - margin = indent - - # Find the largest common whitespace between current line and previous - # winner. - else: - for i, (x, y) in enumerate(zip(margin, indent)): - if x != y: - margin = margin[:i] - break - - # sanity check (testing/debugging only) - if 0 and margin: - for line in text.split("\n"): - assert not line or line.startswith(margin), \ - "line = %r, margin = %r" % (line, margin) - - if margin: - text = re.sub(r'(?m)^' + margin, '', text) - return text - - # --------------- # Path Operations # --------------- diff --git a/tests/data/glif/format2.glif b/tests/data/glif/format2.glif index 83c5f6d..19cba54 100644 --- a/tests/data/glif/format2.glif +++ b/tests/data/glif/format2.glif @@ -35,7 +35,5 @@ 1,0,0,0.5 - - arbitrary text about the glyph - + arbitrary text about the glyph diff --git a/tests/test_ufonormalizer.py b/tests/test_ufonormalizer.py index 4589f80..f242409 100644 --- a/tests/test_ufonormalizer.py +++ b/tests/test_ufonormalizer.py @@ -107,9 +107,7 @@ 1,0,0,0.5 - - arbitrary text about the glyph - + arbitrary text about the glyph ''' @@ -767,25 +765,35 @@ def test_normalizeGLIF_lib_undefined(self): self.assertEqual(writer.getText(), '') def test_normalizeGLIF_note_defined(self): + """ Serialization of notes is non-fancy: we take the note text and + use it, unchanged, as the body of the element. In previous + version of ufonormalizer we would break the user text into lines. See + https://github.com/unified-font-object/ufoNormalizer/issues/85 for some + background. + """ + element = ET.fromstring("Blah") writer = XMLWriter(declaration=None) _normalizeGlifNote(element, writer) - self.assertEqual(writer.getText(), "\n\tBlah\n") + self.assertEqual(writer.getText(), "Blah") - element = ET.fromstring(" Blah \t\n\t ") + # encode accent correctly + element = ET.fromstring( + tobytes("Don't forget to check the béziers!!", + encoding="utf8")) writer = XMLWriter(declaration=None) _normalizeGlifNote(element, writer) - self.assertEqual(writer.getText(), "\n\tBlah\n") + self.assertEqual( + writer.getText(), + "Don't forget to check the b\xe9ziers!!") - element = ET.fromstring( - tobytes("Don't forget to check the béziers!!", - encoding="utf8")) + # trailing whitespace is preserved + element = ET.fromstring(" Blah \t\n\t ") writer = XMLWriter(declaration=None) _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tDon't forget to check the b\xe9ziers!!\n") + self.assertEqual(writer.getText(), " Blah \t\n\t ") + # multiline strings are preserved element = ET.fromstring( tobytes("A quick brown fox jumps over the lazy dog.\n" "Příliš žluťoučký kůň úpěl ďábelské ódy.", @@ -794,64 +802,24 @@ def test_normalizeGLIF_note_defined(self): _normalizeGlifNote(element, writer) self.assertEqual( writer.getText(), - "\n\tA quick brown fox jumps over the lazy dog.\n\t" + "A quick brown fox jumps over the lazy dog.\n" "P\u0159\xedli\u0161 \u017elu\u0165ou\u010dk\xfd k\u016f\u0148 " - "\xfap\u011bl \u010f\xe1belsk\xe9 \xf3dy.\n") - - element = ET.fromstring( - " Line1 \t\n\n Line3\t ") - writer = XMLWriter(declaration=None) - _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tLine1\n\t\n\t Line3\n") - - # Normalizer should not indent Line2 and Line3 more than already indented - element = ET.fromstring( - "\n\tLine1\n\tLine2\n\tLine3\n") - writer = XMLWriter(declaration=None) - _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tLine1\n\tLine2\n\tLine3\n") - - # Normalizer should keep the extra tab in line 2 - element = ET.fromstring( - "\n\tLine1\n\t\tLine2\n\tLine3\n") - writer = XMLWriter(declaration=None) - _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tLine1\n\t\tLine2\n\tLine3\n") + "\xfap\u011bl \u010f\xe1belsk\xe9 \xf3dy.") - # Normalizer should keep the extra spaces on line 2 + # Everything is always preserved element = ET.fromstring( - "\n\tLine1\n\t Line2\n\tLine3\n") + "\n\tLine1\n\t\tLine2\n\t Line3\n") writer = XMLWriter(declaration=None) _normalizeGlifNote(element, writer) self.assertEqual( writer.getText(), - "\n\tLine1\n\t Line2\n\tLine3\n") + "\n\tLine1\n\t\tLine2\n\t Line3\n") - # Normalizer should remove the extra tab all lines have in common, - # but leave the additional tab on line 2 - element = ET.fromstring( - "\n\t\tLine1\n\t\t\tLine2\n\t\tLine3\n") + # correctly escape xml + element = ET.fromstring("escape<br />me!") writer = XMLWriter(declaration=None) _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tLine1\n\t\tLine2\n\tLine3\n") - - # Normalizer should remove the extra 4-space all lines have in common, - # but leave the additional 4-space on line 2 - element = ET.fromstring( - "\n Line1\n Line2\n Line3\n") - writer = XMLWriter(declaration=None) - _normalizeGlifNote(element, writer) - self.assertEqual( - writer.getText(), - "\n\tLine1\n\t Line2\n\tLine3\n") + self.assertEqual(writer.getText(), "escape<br />me!") def test_normalizeGLIF_note_undefined(self): element = ET.fromstring("")