Skip to content

Commit 4e8f011

Browse files
authored
Merge pull request #37 from krassowski/plain-text
Add plain text and cPython docstring support
2 parents e43172e + 08286b6 commit 4e8f011

File tree

8 files changed

+282
-2
lines changed

8 files changed

+282
-2
lines changed

docstring_to_markdown/__init__.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from .cpython import cpython_to_markdown
12
from .google import google_to_markdown, looks_like_google
3+
from .plain import looks_like_plain_text, plain_text_to_markdown
24
from .rst import looks_like_rst, rst_to_markdown
35

4-
__version__ = "0.13"
6+
__version__ = "0.14"
57

68

79
class UnknownFormatError(Exception):
@@ -15,4 +17,11 @@ def convert(docstring: str) -> str:
1517
if looks_like_google(docstring):
1618
return google_to_markdown(docstring)
1719

20+
if looks_like_plain_text(docstring):
21+
return plain_text_to_markdown(docstring)
22+
23+
cpython = cpython_to_markdown(docstring)
24+
if cpython:
25+
return cpython
26+
1827
raise UnknownFormatError()

docstring_to_markdown/_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from re import sub
2+
3+
4+
def escape_markdown(text: str) -> str:
5+
return sub(r'([\\#*_[\]])', r'\\\1', text)

docstring_to_markdown/cpython.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Union, List
2+
from re import fullmatch
3+
4+
from ._utils import escape_markdown
5+
6+
def _is_cpython_signature_line(line: str) -> bool:
7+
"""CPython uses signature lines in the following format:
8+
9+
str(bytes_or_buffer[, encoding[, errors]]) -> str
10+
"""
11+
return fullmatch(r'\w+\(\S*(, \S+)*(\[, \S+\])*\)\s--?>\s.+', line) is not None
12+
13+
14+
def cpython_to_markdown(text: str) -> Union[str, None]:
15+
signature_lines: List[str] = []
16+
other_lines: List[str] = []
17+
for line in text.splitlines():
18+
if not other_lines and _is_cpython_signature_line(line):
19+
signature_lines.append(line)
20+
elif not signature_lines:
21+
return None
22+
elif line.startswith(' '):
23+
signature_lines.append(line)
24+
else:
25+
other_lines.append(line)
26+
return '\n'.join([
27+
'```',
28+
'\n'.join(signature_lines),
29+
'```',
30+
escape_markdown('\n'.join(other_lines))
31+
])
32+
33+
def looks_like_cpython(text: str) -> bool:
34+
return cpython_to_markdown(text) is not None
35+
36+
37+
__all__ = ['looks_like_cpython', 'cpython_to_markdown']

docstring_to_markdown/plain.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from re import fullmatch
2+
from ._utils import escape_markdown
3+
4+
5+
def looks_like_plain_text(value: str) -> bool:
6+
"""Check if given string has plain text following English syntax without need for escaping.
7+
8+
Accepts:
9+
- words without numbers
10+
- full stop, bangs and question marks at the end of a word if followed by a space or end of string
11+
- commas, colons and semicolons if after a word and followed by a space
12+
- dashes between words (like in `e-mail`)
13+
- double and single quotes if proceeded with a space and followed by a word, or if proceeded by a word and followed by a space (or end of string); single quotes are also allowed in between two words
14+
- parentheses if opening preceded by space and closing followed by space or end
15+
16+
Does not accept:
17+
- square brackets (used in markdown a lot)
18+
"""
19+
if '_' in value:
20+
return False
21+
return fullmatch(r"((\w[\.!\?\)'\"](\s|$))|(\w[,:;]\s)|(\w[-']\w)|(\w\s['\"\(])|\w|\s)+", value) is not None
22+
23+
24+
def plain_text_to_markdown(text: str) -> str:
25+
return escape_markdown(text)
26+
27+
__all__ = ['looks_like_plain_text', 'plain_text_to_markdown']

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ docstring-to-markdown = py.typed
3737
addopts =
3838
--pyargs tests
3939
--cov docstring_to_markdown
40-
--cov-fail-under=98
40+
--cov-fail-under=99
4141
--cov-report term-missing:skip-covered
4242
-p no:warnings
4343
--flake8

tests/test_convert.py

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from docstring_to_markdown import convert, UnknownFormatError
2+
import pytest
3+
4+
CPYTHON = """\
5+
bool(x) -> bool
6+
7+
Returns True when the argument x is true, False otherwise.\
8+
"""
9+
10+
11+
CPYTHON_MD = """\
12+
```
13+
bool(x) -> bool
14+
```
15+
16+
Returns True when the argument x is true, False otherwise.\
17+
"""
18+
19+
GOOGLE = """Do **something**.
20+
21+
Args:
22+
a: some arg
23+
b: some arg
24+
"""
25+
26+
GOOGLE_MD = """Do **something**.
27+
28+
#### Args
29+
30+
- `a`: some arg
31+
- `b`: some arg
32+
"""
33+
34+
35+
RST = "Please see `this link<https://example.com>`__."
36+
RST_MD = "Please see [this link](https://example.com)."
37+
38+
39+
def test_convert_cpython():
40+
assert convert(CPYTHON) == CPYTHON_MD
41+
42+
43+
def test_convert_plain_text():
44+
assert convert('This is a sentence.') == 'This is a sentence.'
45+
46+
47+
def test_convert_google():
48+
assert convert(GOOGLE) == GOOGLE_MD
49+
50+
51+
def test_convert_rst():
52+
assert convert(RST) == RST_MD
53+
54+
55+
def test_unknown_format():
56+
with pytest.raises(UnknownFormatError):
57+
convert('ARGS [arg1, arg2] RETURNS: str OR None')

tests/test_cpython.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import pytest
2+
from docstring_to_markdown.cpython import looks_like_cpython, cpython_to_markdown
3+
4+
BOOL = """\
5+
bool(x) -> bool
6+
7+
Returns True when the argument x is true, False otherwise.\
8+
"""
9+
10+
BOOL_MD = """\
11+
```
12+
bool(x) -> bool
13+
```
14+
15+
Returns True when the argument x is true, False otherwise.\
16+
"""
17+
18+
BYTES = """\
19+
bytes(iterable_of_ints) -> bytes
20+
bytes(string, encoding[, errors]) -> bytes
21+
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
22+
bytes(int) -> bytes object of size given by the parameter initialized with null bytes
23+
bytes() -> empty bytes object
24+
25+
Construct an immutable array of bytes from:
26+
- an iterable yielding integers in range(256)
27+
- a text string encoded using the specified encoding
28+
- any object implementing the buffer API.
29+
- an integer\
30+
"""
31+
32+
COLLECTIONS_DEQUEUE = """\
33+
deque([iterable[, maxlen]]) --> deque object
34+
35+
A list-like sequence optimized for data accesses near its endpoints.\
36+
"""
37+
38+
DICT = """\
39+
dict() -> new empty dictionary
40+
dict(mapping) -> new dictionary initialized from a mapping object's
41+
(key, value) pairs
42+
dict(iterable) -> new dictionary initialized as if via:
43+
d = {}
44+
for k, v in iterable:
45+
d[k] = v
46+
dict(**kwargs) -> new dictionary initialized with the name=value pairs
47+
in the keyword argument list. For example: dict(one=1, two=2)\
48+
"""
49+
50+
STR = """\
51+
str(object='') -> str
52+
str(bytes_or_buffer[, encoding[, errors]]) -> str
53+
54+
Create a new string object from the given object. If encoding or
55+
errors is specified, then the object must expose a data buffer
56+
that will be decoded using the given encoding and error handler.
57+
Otherwise, returns the result of object.__str__() (if defined)
58+
or repr(object).\
59+
"""
60+
61+
STR_MD = """\
62+
```
63+
str(object='') -> str
64+
str(bytes_or_buffer[, encoding[, errors]]) -> str
65+
```
66+
67+
Create a new string object from the given object. If encoding or
68+
errors is specified, then the object must expose a data buffer
69+
that will be decoded using the given encoding and error handler.
70+
Otherwise, returns the result of object.\\_\\_str\\_\\_() (if defined)
71+
or repr(object).\
72+
"""
73+
74+
75+
@pytest.mark.parametrize("text", [BYTES, STR, DICT, BOOL, COLLECTIONS_DEQUEUE])
76+
def test_accepts_cpython_docstrings(text):
77+
assert looks_like_cpython(text) is True
78+
79+
80+
@pytest.mark.parametrize("text", [
81+
"[link label](https://link)",
82+
"![image label](https://source)",
83+
"Some **bold** text",
84+
"More __bold__ text",
85+
"Some *italic* text",
86+
"More _italic_ text",
87+
"This is a sentence.",
88+
"Exclamation!",
89+
"Can I ask a question?",
90+
"Let's send an e-mail",
91+
"Parentheses (are) fine (really)",
92+
"Double \"quotes\" and single 'quotes'"
93+
])
94+
def test_rejects_markdown_and_plain_text(text):
95+
assert looks_like_cpython(text) is False
96+
97+
98+
def test_conversion_bool():
99+
assert cpython_to_markdown(BOOL) == BOOL_MD
100+
101+
102+
def test_conversion_str():
103+
assert cpython_to_markdown(STR) == STR_MD

tests/test_plain.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import pytest
2+
from docstring_to_markdown.plain import looks_like_plain_text, plain_text_to_markdown
3+
4+
5+
@pytest.mark.parametrize("text", [
6+
"This is a sentence.",
7+
"Exclamation!",
8+
"Can I ask a question?",
9+
"Let's send an e-mail",
10+
"Parentheses (are) fine (really)",
11+
"Double \"quotes\" and single 'quotes'"
12+
])
13+
def test_accepts_english(text):
14+
assert looks_like_plain_text(text) is True
15+
16+
17+
@pytest.mark.parametrize("text", [
18+
"[link label](https://link)",
19+
"![image label](https://source)",
20+
"Some **bold** text",
21+
"More __bold__ text",
22+
"Some *italic* text",
23+
"More _italic_ text"
24+
])
25+
def test_rejects_markdown(text):
26+
assert looks_like_plain_text(text) is False
27+
28+
29+
@pytest.mark.parametrize("text", [
30+
"def test():",
31+
"print(123)",
32+
"func(arg)",
33+
"2 + 2",
34+
"var['test']",
35+
"x = 'test'"
36+
])
37+
def test_rejects_code(text):
38+
assert looks_like_plain_text(text) is False
39+
40+
41+
def test_conversion():
42+
assert plain_text_to_markdown("test") == "test"

0 commit comments

Comments
 (0)