Skip to content

Commit 1b6fd0c

Browse files
authored
Replace tokenization with tokenize_rt (#77)
* Replace tokenization with tokenize_rt The standard library module `tokenize` does not round trip, so we had to implement our own tokenization on top of it. However, tokenize_rt does it better, so let's adopt it instead. * Drop version pin * Fix pip upgrade on Windows
1 parent 0d21b04 commit 1b6fd0c

File tree

3 files changed

+30
-48
lines changed

3 files changed

+30
-48
lines changed

ci.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -ex
44

55
BLACK_VERSION=22.6.0
66

7-
pip install -U pip setuptools wheel
7+
python -m pip install -U pip setuptools wheel
88

99
python setup.py sdist --formats=zip
1010
pip install dist/*.zip

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
include_package_data=True,
1818
packages=find_packages("src"),
1919
package_dir={"": "src"},
20-
install_requires=[],
20+
install_requires=["tokenize_rt"],
2121
keywords=["async"],
2222
python_requires=">=3.7",
2323
classifiers=[

src/unasync/__init__.py

+28-46
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
import tokenize as std_tokenize
88

9+
import tokenize_rt
910
from setuptools.command import build_py as orig
1011

1112
from ._version import __version__ # NOQA
@@ -65,35 +66,41 @@ def _match(self, filepath):
6566
def _unasync_file(self, filepath):
6667
with open(filepath, "rb") as f:
6768
encoding, _ = std_tokenize.detect_encoding(f.readline)
68-
f.seek(0)
69-
tokens = _tokenize(f)
69+
70+
with open(filepath, "rt", encoding=encoding) as f:
71+
tokens = tokenize_rt.src_to_tokens(f.read())
7072
tokens = self._unasync_tokens(tokens)
71-
result = _untokenize(tokens)
73+
result = tokenize_rt.tokens_to_src(tokens)
7274
outfilepath = filepath.replace(self.fromdir, self.todir)
7375
os.makedirs(os.path.dirname(outfilepath), exist_ok=True)
7476
with open(outfilepath, "wb") as f:
7577
f.write(result.encode(encoding))
7678

7779
def _unasync_tokens(self, tokens):
78-
# TODO __await__, ...?
79-
used_space = None
80-
for space, toknum, tokval in tokens:
81-
if tokval in ["async", "await"]:
82-
# When removing async or await, we want to use the whitespace that
83-
# was before async/await before the next token so that
84-
# `print(await stuff)` becomes `print(stuff)` and not
85-
# `print( stuff)`
86-
used_space = space
80+
skip_next = False
81+
for i, token in enumerate(tokens):
82+
if skip_next:
83+
skip_next = False
84+
continue
85+
86+
if token.src in ["async", "await"]:
87+
# When removing async or await, we want to skip the following whitespace
88+
# so that `print(await stuff)` becomes `print(stuff)` and not `print( stuff)`
89+
skip_next = True
8790
else:
88-
if toknum == std_tokenize.NAME:
89-
tokval = self._unasync_name(tokval)
90-
elif toknum == std_tokenize.STRING:
91-
left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1]
92-
tokval = left_quote + self._unasync_name(name) + right_quote
93-
if used_space is None:
94-
used_space = space
95-
yield (used_space, tokval)
96-
used_space = None
91+
if token.name == "NAME":
92+
token = token._replace(src=self._unasync_name(token.src))
93+
elif token.name == "STRING":
94+
left_quote, name, right_quote = (
95+
token.src[0],
96+
token.src[1:-1],
97+
token.src[-1],
98+
)
99+
token = token._replace(
100+
src=left_quote + self._unasync_name(name) + right_quote
101+
)
102+
103+
yield token
97104

98105
def _unasync_name(self, name):
99106
if name in self.token_replacements:
@@ -122,31 +129,6 @@ def unasync_files(fpath_list, rules):
122129
Token = collections.namedtuple("Token", ["type", "string", "start", "end", "line"])
123130

124131

125-
def _tokenize(f):
126-
last_end = (1, 0)
127-
for tok in std_tokenize.tokenize(f.readline):
128-
if tok.type == std_tokenize.ENCODING:
129-
continue
130-
131-
if last_end[0] < tok.start[0]:
132-
yield ("", std_tokenize.STRING, " \\\n")
133-
last_end = (tok.start[0], 0)
134-
135-
space = ""
136-
if tok.start > last_end:
137-
assert tok.start[0] == last_end[0]
138-
space = " " * (tok.start[1] - last_end[1])
139-
yield (space, tok.type, tok.string)
140-
141-
last_end = tok.end
142-
if tok.type in [std_tokenize.NEWLINE, std_tokenize.NL]:
143-
last_end = (tok.end[0] + 1, 0)
144-
145-
146-
def _untokenize(tokens):
147-
return "".join(space + tokval for space, tokval in tokens)
148-
149-
150132
_DEFAULT_RULE = Rule(fromdir="/_async/", todir="/_sync/")
151133

152134

0 commit comments

Comments
 (0)