From c3386e8d38061ce70a3232e53812204241610d0e Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 00:09:47 +0100 Subject: [PATCH 1/9] Add tox and Dockerfile config. - Update README with command to run tests with tox and docker. Not all tests are passing. Failures (that aren't XFAILS) are mainly due to line endings. Test results expect \r\n but are only getting \n. Guessing previous tests were all run on windows? --- Dockerfile | 5 +++++ README.md | 2 ++ tox.ini | 10 ++++++++++ 3 files changed, 17 insertions(+) create mode 100644 Dockerfile create mode 100644 tox.ini diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..38d3eb2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM fkrull/multi-python + +WORKDIR /app + +COPY . . diff --git a/README.md b/README.md index e8b17f1..ff25a1b 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,8 @@ existing reference output files in `tests/rtf-as-html` and `tests/rtf-as-html`. The empty or missing output files indicate where functionality is missing, which nicely indicates possible places to jump in if you want to help. +To run tests quietly with docker and tox `docker run --rm $(docker build -q .) tox`. Tests run against python 2.7 and python 3.6 at the moment. + Dependencies ============ diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..5005b42 --- /dev/null +++ b/tox.ini @@ -0,0 +1,10 @@ +[tox] +envlist = py27,py36 + +[testenv] +deps = + pytest + six + beautifulsoup4 +commands = + pytest -v tests/test_readrtf15.py \ No newline at end of file From a772d897b380a71b42992312c0c80f69bbaf8957 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 00:19:26 +0100 Subject: [PATCH 2/9] Fix tests failing due to line endings. - Tried to apply smallest possible change. I will eventually do something about the formatting. --- pyth/plugins/xhtml/writer.py | 12 +++++++----- tests/test_readrtf15.py | 8 +++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/pyth/plugins/xhtml/writer.py b/pyth/plugins/xhtml/writer.py index eeb5edb..772b09d 100644 --- a/pyth/plugins/xhtml/writer.py +++ b/pyth/plugins/xhtml/writer.py @@ -1,6 +1,8 @@ """ Render documents as XHTML fragments """ +import os + from pyth import document from pyth.format import PythWriter @@ -17,11 +19,11 @@ class XHTMLWriter(PythWriter): @classmethod - def write(klass, document, target=None, cssClasses=True, pretty=False): + def write(klass, document, target=None, cssClasses=True, pretty=False, newline=os.linesep): if target is None: target = six.BytesIO() - writer = XHTMLWriter(document, target, cssClasses, pretty) + writer = XHTMLWriter(document, target, cssClasses, pretty, newline) final = writer.go() final.seek(0) @@ -37,12 +39,12 @@ def write(klass, document, target=None, cssClasses=True, pretty=False): return final - - def __init__(self, doc, target, cssClasses=True, pretty=False): + def __init__(self, doc, target, cssClasses=True, pretty=False, newline=os.linesep): self.document = doc self.target = target self.cssClasses = cssClasses self.pretty = pretty + self.newline = newline self.paragraphDispatch = { document.List: self._list, document.Paragraph: self._paragraph @@ -154,7 +156,7 @@ def render(self, target): if self.tag is not None: target.write(('' % self.tag).encode("utf-8")) - + def attrString(self): return " ".join( diff --git a/tests/test_readrtf15.py b/tests/test_readrtf15.py index 0acecf5..a45c8f8 100644 --- a/tests/test_readrtf15.py +++ b/tests/test_readrtf15.py @@ -6,6 +6,8 @@ """ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals + import glob import os import os.path @@ -15,6 +17,10 @@ from pyth.plugins.xhtml.writer import XHTMLWriter, write_html_file from pyth.plugins.plaintext.writer import PlaintextWriter + +TEST_LINE_SEP = '\r\n' # Reference Outputs use CRLF + + class TestRtfHTML(unittest.TestCase): pass # will be filled dynamically now: @@ -45,7 +51,7 @@ def testmethod(self): # the test method to be added write_html_file(outputfilename, the_testoutput, print_msg=False) elif writer == 'txt': with open(outputfilename, "wt") as f: - PlaintextWriter.write(document, f) + PlaintextWriter.write(document, f, newline=TEST_LINE_SEP) #--- compute test output: with open(outputfilename, "rb") as input: From 29228d6df64c7c0be41d0c7627ce2f3e343b2a48 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 00:23:31 +0100 Subject: [PATCH 3/9] Fix language attribution on github --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitattributes b/.gitattributes index c9d44ad..e4f9f71 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ *.rtf eol=crlf +* linguist-vendored +*.py linguist-vendored=false From 87b6d4ef07fda312457083f3e6055493cb0662ba Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 01:08:58 +0100 Subject: [PATCH 4/9] Project config updates --- .dockerignore | 1 + .gitignore | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7b2511e --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +pyproject.toml \ No newline at end of file diff --git a/.gitignore b/.gitignore index b661fef..40cbba9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ *.py[co] *.egg-info tests/currentoutput/ +.devcontainer +pyproject.toml +poetry.lock From 4776d211bd8450819d045619d68ffa7d479f06cb Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 01:09:18 +0100 Subject: [PATCH 5/9] Tox now runs xhtml tests --- tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 5005b42..fb76316 100644 --- a/tox.ini +++ b/tox.ini @@ -7,4 +7,5 @@ deps = six beautifulsoup4 commands = - pytest -v tests/test_readrtf15.py \ No newline at end of file + pytest -v tests/test_readrtf15.py + pytest -v tests/test_readxhtml.py \ No newline at end of file From 2d04d391f312128ad32795c523f00715b2f41b66 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 01:25:18 +0100 Subject: [PATCH 6/9] Fix failing xhtml tests Main fixes: - Requires lxml to pass these tests - The rest was all changes to the BeautifulSoup API - convertEntities is no longer a valid parameter, and the value passed to it is unnecessary. The functionality is replaced by the features kwarg, which in this case I set to xml. - Silenced some deprecation warnings by changing fromEncoding to from_encoding -- from_encoding is effectively ignored in python 3, but probably still required in python 2 - NavigabeString class exists on the beautifulsoup4 module, so reference was updated. --- pyth/plugins/xhtml/reader.py | 15 +++++++-------- tox.ini | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pyth/plugins/xhtml/reader.py b/pyth/plugins/xhtml/reader.py index fc27f86..245aec6 100644 --- a/pyth/plugins/xhtml/reader.py +++ b/pyth/plugins/xhtml/reader.py @@ -3,7 +3,7 @@ """ from __future__ import absolute_import -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, NavigableString import six from pyth import document @@ -26,9 +26,8 @@ def __init__(self, source, css_source=None, encoding="utf-8", link_callback=None def go(self): soup = BeautifulSoup(self.source, - convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES, - fromEncoding=self.encoding, - smartQuotesTo=None) + features="xml", + from_encoding=self.encoding) # Make sure the document content doesn't use multi-lines soup = self.format(soup) doc = document.Document() @@ -58,12 +57,12 @@ def format(self, soup): text = six.text_type(node) lines = [x.strip() for x in text.splitlines()] text = ' '.join(lines) - node.replaceWith(BeautifulSoup.BeautifulSoup(text)) - soup = BeautifulSoup.BeautifulSoup(six.text_type(soup)) + node.replaceWith(BeautifulSoup(text, features="xml")) + soup = BeautifulSoup(six.text_type(soup), features="xml") # replace all
tag by newline character for node in soup.findAll('br'): node.replaceWith("\n") - soup = BeautifulSoup.BeautifulSoup(six.text_type(soup)) + soup = BeautifulSoup(six.text_type(soup), features="xml") return soup def is_bold(self, node): @@ -143,7 +142,7 @@ def process_into(self, node, obj): Process a BeautifulSoup node and fill its elements into a pyth base object. """ - if isinstance(node, BeautifulSoup.NavigableString): + if isinstance(node, NavigableString): text = self.process_text(node) if text: obj.append(text) diff --git a/tox.ini b/tox.ini index fb76316..00ffc10 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ deps = pytest six beautifulsoup4 + lxml commands = pytest -v tests/test_readrtf15.py pytest -v tests/test_readxhtml.py \ No newline at end of file From 2669ee53c7be9fbe412346ddd27ab0d58490a564 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 02:30:58 +0100 Subject: [PATCH 7/9] Fix failing LaTeX tests - Added latex tests to tox.ini Most of these were bytes vs unicode issues. Tried using the unicode_literals import where possible, but turns out there are some peculiar interactions between it and raw strings. Ended up not using the import in the Latex writer. --- pyth/plugins/latex/writer.py | 24 ++++++++++++------------ pyth/plugins/rst/writer.py | 34 +++++++++++++++++----------------- tests/test_readxhtml.py | 16 ++++++++-------- tests/test_writelatex.py | 23 ++++++++++++----------- tox.ini | 4 +++- 5 files changed, 52 insertions(+), 49 deletions(-) diff --git a/pyth/plugins/latex/writer.py b/pyth/plugins/latex/writer.py index 1369350..591b125 100644 --- a/pyth/plugins/latex/writer.py +++ b/pyth/plugins/latex/writer.py @@ -6,7 +6,7 @@ """ from __future__ import absolute_import -from six import StringIO +import six import docutils.core from pyth import document @@ -15,7 +15,6 @@ class LatexWriter(PythWriter): - @classmethod def write(klass, document, target=None, stylesheet=""): """ @@ -37,7 +36,7 @@ def __init__(self, doc, target=None, stylesheet=""): """ self.document = doc self.stylesheet = stylesheet - self.target = target if target is not None else StringIO() + self.target = target if target is not None else six.BytesIO() @property def full_stylesheet(self): @@ -57,19 +56,20 @@ def full_stylesheet(self): } """ % (self.document.properties.get("title"), self.document.properties.get("author"), - self.document.properties.get("subject")) + self.document.properties.get("subject"), + ) return latex_fragment + self.stylesheet def go(self): rst = RSTWriter.write(self.document).getvalue() - settings = dict(input_encoding="UTF-8", - output_encoding="UTF-8", - stylesheet="stylesheet.tex") - latex = docutils.core.publish_string(rst, - writer_name="latex", - settings_overrides=settings) + settings = dict( + input_encoding="UTF-8", output_encoding="UTF-8", stylesheet="stylesheet.tex" + ) + latex = docutils.core.publish_string( + rst, writer_name="latex", settings_overrides=settings + ) # We don't want to keep an \input command in the latex file - latex = latex.replace(r"\input{stylesheet.tex}", - self.full_stylesheet) + # assert False, '{}, {}'.format(type(rb"\input{stylesheet.tex}"), type(six.ensure_binary(self.full_stylesheet))) + latex = latex.replace(six.ensure_binary(r"\input{stylesheet.tex}"), six.ensure_binary(self.full_stylesheet)) self.target.write(latex) return self.target diff --git a/pyth/plugins/rst/writer.py b/pyth/plugins/rst/writer.py index de42c44..cfb311d 100644 --- a/pyth/plugins/rst/writer.py +++ b/pyth/plugins/rst/writer.py @@ -1,9 +1,9 @@ """ Render documents as reStructuredText. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import six -from six import StringIO +from six import BytesIO from pyth import document from pyth.format import PythWriter @@ -15,7 +15,7 @@ class RSTWriter(PythWriter): @classmethod def write(klass, document, target=None): if target is None: - target = StringIO() + target = BytesIO() writer = RSTWriter(document, target) return writer.go() @@ -28,10 +28,10 @@ def __init__(self, doc, target): document.Paragraph: self.paragraph} def go(self): - for (i, paragraph) in enumerate(self.document.content): + for _, paragraph in enumerate(self.document.content): handler = self.paragraphDispatch[paragraph.__class__] handler(paragraph) - self.target.write("\n") + self.target.write(b"\n") # Heh heh, remove final paragraph spacing self.target.seek(-2, 1) @@ -43,35 +43,35 @@ def text(self, text): """ process a pyth text and return the formatted string """ - ret = u"".join(text.content) + ret = "".join(text.content) if 'url' in text.properties: - return u"`%s`_" % ret + return "`%s`_" % ret if 'bold' in text.properties: - return u"**%s**" % ret + return "**%s**" % ret if 'italic' in text.properties: - return u"*%s*" % ret + return "*%s*" % ret if 'sub' in text.properties: - return six.u(r"\ :sub:`%s`\ " % ret) + return r"\ :sub:`%s`\ " % ret if 'super' in text.properties: - return six.u(r"\ :sup:`%s`\ " % ret) + return r"\ :sup:`%s`\ " % ret return ret - def paragraph(self, paragraph, prefix=""): + def paragraph(self, paragraph, prefix=b""): """ process a pyth paragraph into the target """ content = [] for text in paragraph.content: content.append(self.text(text)) - content = u"".join(content).encode("utf-8") + content = "".join(content).encode("utf-8") - for line in content.split("\n"): - self.target.write(" " * self.indent) + for line in content.split(b"\n"): + self.target.write(b" " * self.indent) self.target.write(prefix) self.target.write(line) - self.target.write("\n") + self.target.write(b"\n") if prefix: - prefix = " " + prefix = b" " # handle the links if any('url' in text.properties for text in paragraph.content): diff --git a/tests/test_readxhtml.py b/tests/test_readxhtml.py index 5038834..6e0019b 100644 --- a/tests/test_readxhtml.py +++ b/tests/test_readxhtml.py @@ -18,8 +18,8 @@ def test_basic(self): """ xhtml = "
" doc = XHTMLReader.read(xhtml) - self.assert_(isinstance(doc, pyth.document.Document)) - self.assert_(not doc.content) + self.assertTrue(isinstance(doc, pyth.document.Document)) + self.assertTrue(not doc.content) def test_paragraphs(self): """ @@ -27,14 +27,14 @@ def test_paragraphs(self): """ xhtml = "

p0

p1

p2

" doc = XHTMLReader.read(xhtml) - self.assert_(len(doc.content) == 3) + self.assertTrue(len(doc.content) == 3) for i, p in enumerate(doc.content): - self.assert_(isinstance(p, pyth.document.Paragraph)) - self.assert_(len(p.content) == 1) - self.assert_(isinstance(p.content[0], pyth.document.Text)) + self.assertTrue(isinstance(p, pyth.document.Paragraph)) + self.assertTrue(len(p.content) == 1) + self.assertTrue(isinstance(p.content[0], pyth.document.Text)) text = p.content[0] - self.assert_(len(text.content) == 1) - self.assert_(text.content[0] == 'p%d' % i) + self.assertTrue(len(text.content) == 1) + self.assertTrue(text.content[0] == 'p%d' % i) def test_bold(self): """ diff --git a/tests/test_writelatex.py b/tests/test_writelatex.py index 73c7aac..99a3b4a 100644 --- a/tests/test_writelatex.py +++ b/tests/test_writelatex.py @@ -1,12 +1,13 @@ """ unit tests of the latex writer """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import unittest +import six from pyth.plugins.latex.writer import LatexWriter -from pyth.plugins.python.reader import * +from pyth.plugins.python.reader import PythonReader, P, T, BOLD, ITALIC class TestWriteLatex(unittest.TestCase): @@ -22,19 +23,19 @@ def test_paragraph(self): """ Try a single paragraph document """ - doc = PythonReader.read(P[u"the text"]) + doc = PythonReader.read(P["the text"]) latex = LatexWriter.write(doc).getvalue() - assert "the text" in latex + assert six.ensure_binary("the text") in latex def test_bold(self): - doc = PythonReader.read([P[T(BOLD)[u"bold text"]]]) + doc = PythonReader.read([P[T(BOLD)["bold text"]]]) latex = LatexWriter.write(doc).getvalue() - assert r"\textbf{bold text}" in latex, latex + assert six.ensure_binary(r"\textbf{bold text}") in latex, latex def test_italic(self): - doc = PythonReader.read([P[T(ITALIC)[u"italic text"]]]) + doc = PythonReader.read([P[T(ITALIC)["italic text"]]]) latex = LatexWriter.write(doc).getvalue() - assert r"\emph{italic text}" in latex, latex + assert six.ensure_binary(r"\emph{italic text}") in latex, latex def test_metadata(self): """ @@ -46,6 +47,6 @@ def test_metadata(self): doc["title"] = "The Title" latex = LatexWriter.write(doc).getvalue() - assert "pdfauthor={The Author}" in latex, latex - assert "pdfsubject={The Subject}" in latex, latex - assert "pdftitle={The Title}" in latex, latex + assert six.ensure_binary("pdfauthor={The Author}") in latex, latex + assert six.ensure_binary("pdfsubject={The Subject}") in latex, latex + assert six.ensure_binary("pdftitle={The Title}") in latex, latex diff --git a/tox.ini b/tox.ini index 00ffc10..50f3212 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,8 @@ deps = six beautifulsoup4 lxml + docutils commands = pytest -v tests/test_readrtf15.py - pytest -v tests/test_readxhtml.py \ No newline at end of file + pytest -v tests/test_readxhtml.py + pytest -v tests/test_writelatex.py \ No newline at end of file From 8da9e256c302426d9e382beea5c3772c5dd30c32 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 02:52:07 +0100 Subject: [PATCH 8/9] Fix failing PDF tests - Install pdftohtml in test Dockerfile - Add pdf tests to tox.ini - Use six.BytesIO instead of StringIO - coerce some strings to bytes where needed --- Dockerfile | 6 ++++++ pyth/plugins/pdf/writer.py | 4 ++-- tests/test_writepdf.py | 22 ++++++++++++---------- tox.ini | 4 +++- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 38d3eb2..ac786d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,4 +2,10 @@ FROM fkrull/multi-python WORKDIR /app +RUN apt update && apt install pdftohtml -y + +COPY tox.ini . + +RUN tox -v; exit 0 + COPY . . diff --git a/pyth/plugins/pdf/writer.py b/pyth/plugins/pdf/writer.py index be45290..97c6704 100644 --- a/pyth/plugins/pdf/writer.py +++ b/pyth/plugins/pdf/writer.py @@ -3,7 +3,7 @@ """ from __future__ import absolute_import -from six import StringIO +import six import cgi # For escape() from pyth import document @@ -34,7 +34,7 @@ def write(klass, document, target=None, paragraphStyle=None): story = writer.go() if target is None: - target = StringIO() + target = six.BytesIO() doc = SimpleDocTemplate(target) doc.build(story) diff --git a/tests/test_writepdf.py b/tests/test_writepdf.py index 6f89560..31ca237 100644 --- a/tests/test_writepdf.py +++ b/tests/test_writepdf.py @@ -3,11 +3,13 @@ """ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals import unittest import subprocess import tempfile import os +import six from bs4 import BeautifulSoup @@ -57,41 +59,41 @@ def test_paragraph(self): """ Try a simple document with one paragraph """ - doc = PythonReader.read(P[u"the text"]) + doc = PythonReader.read(P["the text"]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - assert "the text" in html + assert six.ensure_binary("the text") in html def test_bold(self): - doc = PythonReader.read([P[T(BOLD)[u"bold text"]]]) + doc = PythonReader.read([P[T(BOLD)["bold text"]]]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, features='xml') node = soup.find("b") assert node assert node.string == "bold text" def test_italic(self): - doc = PythonReader.read([P[T(ITALIC)[u"italic text"]]]) + doc = PythonReader.read([P[T(ITALIC)["italic text"]]]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, features='xml') node = soup.find("i") assert node assert node.string == "italic text" def test_latex(self): - doc = PythonReader.read(P[u"the-text"]) + doc = PythonReader.read(P["the-text"]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - assert "the-text" in html, html + assert six.ensure_binary("the-text") in html, html def test_rst(self): - doc = PythonReader.read(P[u"the-text"]) + doc = PythonReader.read(P["the-text"]) pdf = PDFWriter.write(doc).getvalue() print(pdf) html = self.pdf_to_html(pdf) - assert "the-text" in html, html + assert six.ensure_binary("the-text") in html, html if __name__ == '__main__': diff --git a/tox.ini b/tox.ini index 50f3212..cecc51a 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,9 @@ deps = beautifulsoup4 lxml docutils + reportlab commands = pytest -v tests/test_readrtf15.py pytest -v tests/test_readxhtml.py - pytest -v tests/test_writelatex.py \ No newline at end of file + pytest -v tests/test_writelatex.py + pytest -v tests/test_writepdf.py \ No newline at end of file From 903169201c0f217b53ced4e9c71a8893406623b6 Mon Sep 17 00:00:00 2001 From: Shaun Hegarty Date: Thu, 12 May 2022 02:52:34 +0100 Subject: [PATCH 9/9] I think that progress is worth a minor version increment --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fd92b06..b69aee7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup(name="pyth3", - version="0.7", + version="0.7.1", packages = find_packages(), zip_safe = False,