Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyproject.toml
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
*.rtf eol=crlf
* linguist-vendored
*.py linguist-vendored=false
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
*.py[co]
*.egg-info
tests/currentoutput/
.devcontainer
pyproject.toml
poetry.lock
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM fkrull/multi-python

WORKDIR /app

RUN apt update && apt install pdftohtml -y

COPY tox.ini .

RUN tox -v; exit 0

COPY . .
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ existing reference output files in `tests/rtf-as-html` and `tests/rtf-as-html`.
The empty or missing output files indicate where functionality is missing,
which nicely indicates possible places to jump in if you want to help.

To run tests quietly with docker and tox `docker run --rm $(docker build -q .) tox`. Tests run against python 2.7 and python 3.6 at the moment.


Dependencies
============
Expand Down
24 changes: 12 additions & 12 deletions pyth/plugins/latex/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
from __future__ import absolute_import

from six import StringIO
import six
import docutils.core

from pyth import document
Expand All @@ -15,7 +15,6 @@


class LatexWriter(PythWriter):

@classmethod
def write(klass, document, target=None, stylesheet=""):
"""
Expand All @@ -37,7 +36,7 @@ def __init__(self, doc, target=None, stylesheet=""):
"""
self.document = doc
self.stylesheet = stylesheet
self.target = target if target is not None else StringIO()
self.target = target if target is not None else six.BytesIO()

@property
def full_stylesheet(self):
Expand All @@ -57,19 +56,20 @@ def full_stylesheet(self):
}
""" % (self.document.properties.get("title"),
self.document.properties.get("author"),
self.document.properties.get("subject"))
self.document.properties.get("subject"),
)
return latex_fragment + self.stylesheet

def go(self):
rst = RSTWriter.write(self.document).getvalue()
settings = dict(input_encoding="UTF-8",
output_encoding="UTF-8",
stylesheet="stylesheet.tex")
latex = docutils.core.publish_string(rst,
writer_name="latex",
settings_overrides=settings)
settings = dict(
input_encoding="UTF-8", output_encoding="UTF-8", stylesheet="stylesheet.tex"
)
latex = docutils.core.publish_string(
rst, writer_name="latex", settings_overrides=settings
)
# We don't want to keep an \input command in the latex file
latex = latex.replace(r"\input{stylesheet.tex}",
self.full_stylesheet)
# assert False, '{}, {}'.format(type(rb"\input{stylesheet.tex}"), type(six.ensure_binary(self.full_stylesheet)))
latex = latex.replace(six.ensure_binary(r"\input{stylesheet.tex}"), six.ensure_binary(self.full_stylesheet))
self.target.write(latex)
return self.target
4 changes: 2 additions & 2 deletions pyth/plugins/pdf/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from __future__ import absolute_import

from six import StringIO
import six
import cgi # For escape()

from pyth import document
Expand Down Expand Up @@ -34,7 +34,7 @@ def write(klass, document, target=None, paragraphStyle=None):
story = writer.go()

if target is None:
target = StringIO()
target = six.BytesIO()

doc = SimpleDocTemplate(target)
doc.build(story)
Expand Down
34 changes: 17 additions & 17 deletions pyth/plugins/rst/writer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
Render documents as reStructuredText.
"""
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals
import six
from six import StringIO
from six import BytesIO

from pyth import document
from pyth.format import PythWriter
Expand All @@ -15,7 +15,7 @@ class RSTWriter(PythWriter):
@classmethod
def write(klass, document, target=None):
if target is None:
target = StringIO()
target = BytesIO()

writer = RSTWriter(document, target)
return writer.go()
Expand All @@ -28,10 +28,10 @@ def __init__(self, doc, target):
document.Paragraph: self.paragraph}

def go(self):
for (i, paragraph) in enumerate(self.document.content):
for _, paragraph in enumerate(self.document.content):
handler = self.paragraphDispatch[paragraph.__class__]
handler(paragraph)
self.target.write("\n")
self.target.write(b"\n")

# Heh heh, remove final paragraph spacing
self.target.seek(-2, 1)
Expand All @@ -43,35 +43,35 @@ def text(self, text):
"""
process a pyth text and return the formatted string
"""
ret = u"".join(text.content)
ret = "".join(text.content)
if 'url' in text.properties:
return u"`%s`_" % ret
return "`%s`_" % ret
if 'bold' in text.properties:
return u"**%s**" % ret
return "**%s**" % ret
if 'italic' in text.properties:
return u"*%s*" % ret
return "*%s*" % ret
if 'sub' in text.properties:
return six.u(r"\ :sub:`%s`\ " % ret)
return r"\ :sub:`%s`\ " % ret
if 'super' in text.properties:
return six.u(r"\ :sup:`%s`\ " % ret)
return r"\ :sup:`%s`\ " % ret
return ret

def paragraph(self, paragraph, prefix=""):
def paragraph(self, paragraph, prefix=b""):
"""
process a pyth paragraph into the target
"""
content = []
for text in paragraph.content:
content.append(self.text(text))
content = u"".join(content).encode("utf-8")
content = "".join(content).encode("utf-8")

for line in content.split("\n"):
self.target.write(" " * self.indent)
for line in content.split(b"\n"):
self.target.write(b" " * self.indent)
self.target.write(prefix)
self.target.write(line)
self.target.write("\n")
self.target.write(b"\n")
if prefix:
prefix = " "
prefix = b" "

# handle the links
if any('url' in text.properties for text in paragraph.content):
Expand Down
15 changes: 7 additions & 8 deletions pyth/plugins/xhtml/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from __future__ import absolute_import

from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
import six

from pyth import document
Expand All @@ -26,9 +26,8 @@ def __init__(self, source, css_source=None, encoding="utf-8", link_callback=None

def go(self):
soup = BeautifulSoup(self.source,
convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
fromEncoding=self.encoding,
smartQuotesTo=None)
features="xml",
from_encoding=self.encoding)
# Make sure the document content doesn't use multi-lines
soup = self.format(soup)
doc = document.Document()
Expand Down Expand Up @@ -58,12 +57,12 @@ def format(self, soup):
text = six.text_type(node)
lines = [x.strip() for x in text.splitlines()]
text = ' '.join(lines)
node.replaceWith(BeautifulSoup.BeautifulSoup(text))
soup = BeautifulSoup.BeautifulSoup(six.text_type(soup))
node.replaceWith(BeautifulSoup(text, features="xml"))
soup = BeautifulSoup(six.text_type(soup), features="xml")
# replace all <br/> tag by newline character
for node in soup.findAll('br'):
node.replaceWith("\n")
soup = BeautifulSoup.BeautifulSoup(six.text_type(soup))
soup = BeautifulSoup(six.text_type(soup), features="xml")
return soup

def is_bold(self, node):
Expand Down Expand Up @@ -143,7 +142,7 @@ def process_into(self, node, obj):
Process a BeautifulSoup node and fill its elements into a pyth
base object.
"""
if isinstance(node, BeautifulSoup.NavigableString):
if isinstance(node, NavigableString):
text = self.process_text(node)
if text:
obj.append(text)
Expand Down
12 changes: 7 additions & 5 deletions pyth/plugins/xhtml/writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Render documents as XHTML fragments
"""
import os

from pyth import document
from pyth.format import PythWriter

Expand All @@ -17,11 +19,11 @@
class XHTMLWriter(PythWriter):

@classmethod
def write(klass, document, target=None, cssClasses=True, pretty=False):
def write(klass, document, target=None, cssClasses=True, pretty=False, newline=os.linesep):
if target is None:
target = six.BytesIO()

writer = XHTMLWriter(document, target, cssClasses, pretty)
writer = XHTMLWriter(document, target, cssClasses, pretty, newline)
final = writer.go()
final.seek(0)

Expand All @@ -37,12 +39,12 @@ def write(klass, document, target=None, cssClasses=True, pretty=False):

return final


def __init__(self, doc, target, cssClasses=True, pretty=False):
def __init__(self, doc, target, cssClasses=True, pretty=False, newline=os.linesep):
self.document = doc
self.target = target
self.cssClasses = cssClasses
self.pretty = pretty
self.newline = newline
self.paragraphDispatch = {
document.List: self._list,
document.Paragraph: self._paragraph
Expand Down Expand Up @@ -154,7 +156,7 @@ def render(self, target):

if self.tag is not None:
target.write(('</%s>' % self.tag).encode("utf-8"))


def attrString(self):
return " ".join(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from setuptools import setup, find_packages

setup(name="pyth3",
version="0.7",
version="0.7.1",
packages = find_packages(),
zip_safe = False,

Expand Down
8 changes: 7 additions & 1 deletion tests/test_readrtf15.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
"""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

import glob
import os
import os.path
Expand All @@ -15,6 +17,10 @@
from pyth.plugins.xhtml.writer import XHTMLWriter, write_html_file
from pyth.plugins.plaintext.writer import PlaintextWriter


TEST_LINE_SEP = '\r\n' # Reference Outputs use CRLF


class TestRtfHTML(unittest.TestCase):
pass # will be filled dynamically now:

Expand Down Expand Up @@ -45,7 +51,7 @@ def testmethod(self): # the test method to be added
write_html_file(outputfilename, the_testoutput, print_msg=False)
elif writer == 'txt':
with open(outputfilename, "wt") as f:
PlaintextWriter.write(document, f)
PlaintextWriter.write(document, f, newline=TEST_LINE_SEP)

#--- compute test output:
with open(outputfilename, "rb") as input:
Expand Down
16 changes: 8 additions & 8 deletions tests/test_readxhtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,23 @@ def test_basic(self):
"""
xhtml = "<div></div>"
doc = XHTMLReader.read(xhtml)
self.assert_(isinstance(doc, pyth.document.Document))
self.assert_(not doc.content)
self.assertTrue(isinstance(doc, pyth.document.Document))
self.assertTrue(not doc.content)

def test_paragraphs(self):
"""
Try to read a simple xhtml document containing tree paragraphs
"""
xhtml = "<div><p>p0</p><p>p1</p><p>p2</p></div>"
doc = XHTMLReader.read(xhtml)
self.assert_(len(doc.content) == 3)
self.assertTrue(len(doc.content) == 3)
for i, p in enumerate(doc.content):
self.assert_(isinstance(p, pyth.document.Paragraph))
self.assert_(len(p.content) == 1)
self.assert_(isinstance(p.content[0], pyth.document.Text))
self.assertTrue(isinstance(p, pyth.document.Paragraph))
self.assertTrue(len(p.content) == 1)
self.assertTrue(isinstance(p.content[0], pyth.document.Text))
text = p.content[0]
self.assert_(len(text.content) == 1)
self.assert_(text.content[0] == 'p%d' % i)
self.assertTrue(len(text.content) == 1)
self.assertTrue(text.content[0] == 'p%d' % i)

def test_bold(self):
"""
Expand Down
Loading