From c335bcff9d4e312f27d21acfe4a6713b41ae74a2 Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Mon, 11 Mar 2019 18:50:12 +1100 Subject: [PATCH] Wrap lines of list items Paragraphs are usually wrapped at 78 characters per line. This patch applies that to list items as well. It contains elements from scumop who posted https://github.com/aaronsw/html2text/issues/13#issue-1185403. But it has been rewritten to fix the amount of newline characters and increase readability and performance. --- html2text.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/html2text.py b/html2text.py index 17528901..1dde31cd 100755 --- a/html2text.py +++ b/html2text.py @@ -730,6 +730,7 @@ def optwrap(self, text): assert wrap, "Requires Python 2.3." result = '' newlines = 0 + reList = re.compile('(^[ ]+[0-9]+\. )|(^[ ]+[%s] )' %(self.ul_item_mark)) for para in text.split("\n"): if len(para) > 0: if not skipwrap(para): @@ -740,6 +741,17 @@ def optwrap(self, text): else: result += "\n\n" newlines = 2 + # Handle list item + elif reList.match(para): + list_prefix = reList.search(para).group() + indent_width = len(list_prefix) + indent_spaces = ' ' * indent_width + list_width = BODY_WIDTH - indent_width + wrapped = wrap(para, list_width) + result += wrapped[0] + "\n" + for line in wrapped[1:]: + result += indent_spaces + line + "\n" + newlines = 1 else: if not onlywhite(para): result += para + "\n"