diff --git a/.eggs/README.txt b/.eggs/README.txt new file mode 100644 index 0000000..5d01668 --- /dev/null +++ b/.eggs/README.txt @@ -0,0 +1,6 @@ +This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins. + +This directory caches those eggs to prevent repeated downloads. + +However, it is safe to delete this directory. + diff --git a/htmlslacker/htmlslacker.py b/htmlslacker/htmlslacker.py index a81c5de..c104c4e 100644 --- a/htmlslacker/htmlslacker.py +++ b/htmlslacker/htmlslacker.py @@ -4,6 +4,7 @@ except ImportError: from HTMLParser import HTMLParser from htmlentitydefs import name2codepoint +import re LINEBR = "::LINEBR::" @@ -23,6 +24,9 @@ def __init__(self, html, *args, **kwargs): except TypeError: HTMLParser.__init__(self, *args, **kwargs) self.skip = False + self.isProcessingList = False + self.isProcessingOrderedList = False + self.orderedNumber = 0 # slackified string self.output = '' @@ -43,9 +47,11 @@ def handle_starttag(self, tag, attrs): if tag == 'br' or tag == 'p': self.output += LINEBR if tag == 'b' or tag == 'strong': - self.output += '*' + self.output += ' *' + if re.match("h[1-6]{1}", tag): + self.output += ' *' if tag == 'i' or tag == 'em': - self.output += '_' + self.output += ' _' if tag == 'code': self.output += '`' if tag == 'a': @@ -55,6 +61,16 @@ def handle_starttag(self, tag, attrs): self.output += attr[1] + '|' if tag == 'style' or tag == 'script': self.skip = True + if tag == 'ul': + self.isProcessingList = True + if tag == 'li' and self.isProcessingList: + self.output += '• ' + if tag == 'ol': + self.orderedNumber = 1 + self.isProcessingOrderedList = True + if tag == 'li' and self.isProcessingOrderedList: + self.output += '{}. '.format(self.orderedNumber) + self.orderedNumber = self.orderedNumber + 1 def handle_endtag(self, tag): """ @@ -63,15 +79,25 @@ def handle_endtag(self, tag): :return: """ if tag == 'b' or tag == 'strong': - self.output += '*' + self.output += '* ' + if re.match("h[1-6]{1}", tag): + self.output += '* '+LINEBR if tag == 'i' or tag == 'em': - self.output += '_' + self.output += '_ ' if tag == 'a': self.output += '>' if tag == 'code': self.output += '`' if tag == 'style' or tag == 'script': self.skip = False + if tag == 'ul': + self.isProcessingList = False + if tag == 'li' and self.isProcessingList: + self.output += LINEBR + if tag == 'ol': + self.isProcessingOrderedList = False + if tag == 'li' and self.isProcessingOrderedList: + self.output += LINEBR def handle_data(self, data): """ @@ -105,4 +131,12 @@ def get_output(self): link: https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python :return: """ - return ' '.join(self.output.split()).replace(LINEBR, "\n") + output = self.output + output = re.sub(r'\*(\s\*)+', '*', output) + output = re.sub(r'_( _)+', '_', output) + output = output.replace('[] ', '☐ ').replace('[x] ', '☑︎ ') + output = ' '.join(output.split()) + output = output.replace(LINEBR, "\n") + output = re.sub(r' *\n *', '\n', output) + output = output.strip() + return output diff --git a/test_general.py b/test_general.py index 3bd2a2f..8f80fe1 100644 --- a/test_general.py +++ b/test_general.py @@ -11,7 +11,7 @@ def test_example_1(): link in a paragraph!

""" - expected = "*Hello*\n There is _something_ interesting about `this doc` \n And " + expected = "*Hello*\nThere is _something_ interesting about `this doc`\nAnd " output = HTMLSlacker(html).get_output() assert(output == expected) @@ -35,3 +35,37 @@ def test_link_with_target(): expected = "Please click " output = HTMLSlacker(html).get_output() assert(output == expected) + +def test_unordered_list(): + html = 'Here is my cool list
  • The Shining
  • Memento
  • Blade Runner
' + expected = 'Here is my cool list • The Shining\n• Memento\n• Blade Runner' + output = HTMLSlacker(html).get_output() + assert(output == expected) + +def test_ordered_list(): + html = 'Here is my cool list
  1. The Shining
  2. Memento
  3. Blade Runner
' + expected = 'Here is my cool list 1. The Shining\n2. Memento\n3. Blade Runner' + output = HTMLSlacker(html).get_output() + assert(output == expected) + +def test_unordered_list_with_text_modifications(): + html = 'Here is my cool list
  • The Shining
  • Memento
  • Blade Runner
' + expected = 'Here is my cool list • The Shining\n• Memento\n• Blade *Runner*' + +def test_headers_rendered(): + html = '''

Hello

new

world

''' + expected = "*Hello*\nnew *world*" + output = HTMLSlacker(html).get_output() + assert(output == expected) + +def test_headers_rendered_no_spaces(): + html = '''

Hello

new

world

''' + expected = "*Hello*\nnew *world*" + output = HTMLSlacker(html).get_output() + assert(output == expected) + +def test_task_list_rendered(): + html = '''[] Grocery
[x] Laundary''' + expected = "☐ Grocery\n☑︎ Laundary" + output = HTMLSlacker(html).get_output() + assert(output == expected)