code formatting

reynoldsnlp · Aug 20, 2021 · 52697bf · 52697bf
1 parent d3af84a
commit 52697bf
Show file tree

Hide file tree

Showing 20 changed files with 35 additions and 38 deletions.
diff --git a/dev/qa.sh b/dev/qa.sh
@@ -50,7 +50,7 @@ mypy src/udar
 
 
 echo "Running pytest..."
-pytest --cov=udar --cov-append --cov-report term-missing --doctest-modules
+python3.7 -m pytest --cov=udar --cov-append --cov-report term-missing --doctest-modules
 
 
 rm .coverage  # can conflict with tox

diff --git a/hfst_vislcg3_versions.txt b/hfst_vislcg3_versions.txt
@@ -1,3 +1,3 @@
 Versions with which tests passed for this commit:
-hfst-tokenize 0.1 (hfst 3.15.4)
-VISL CG-3 Disambiguator version 1.3.2.13891
+hfst-tokenize 0.1 (hfst 3.15.5)
+VISL CG-3 Disambiguator version 1.3.4.13891
diff --git a/scripts/post-commit.py b/scripts/post-commit.py
@@ -74,7 +74,7 @@ def tests():
                         ('1.2.3.1', '1.2.3.2'),
                         ('1.2.3.1b4', '1.2.3.1')]:
         print('1.2.3', t, bump_python_version(parse(t)), sep='\t')
-        assert bump_python_version(parse(t), beta=False) == expected, ('1.2.3', t, expected)
+        assert bump_python_version(parse(t), beta=False) == expected, ('1.2.3', t, expected)  # noqa: E501
     print('BETA')
     for t, expected in [('1.2.2.5', '1.2.3.0b0'),
                         ('1.2.2.5b4', '1.2.3.0b0'),
@@ -83,7 +83,7 @@ def tests():
                         ('1.2.3.1', '1.2.3.2b0'),
                         ('1.2.3.1b4', '1.2.3.1b5')]:
         print('1.2.3', t, bump_python_version(parse(t), beta=True), sep='\t')
-        assert bump_python_version(parse(t), beta=True) == expected, ('1.2.3', t, expected)
+        assert bump_python_version(parse(t), beta=True) == expected, ('1.2.3', t, expected)  # noqa: E501
 
 
 if __name__ == '__main__':
@@ -93,10 +93,10 @@ def tests():
         pypi_version = get_pypi_version(test=TEST)
         print(f'Current {"Test " if TEST else ""}PyPI version:',
               pypi_version, file=sys.stderr)
-        latest_tag = subprocess.check_output('git describe --tags'.split(), encoding='utf8').rstrip()
+        latest_tag = subprocess.check_output('git describe --tags'.split(), encoding='utf8').rstrip()  # noqa: E501
         print(f'Latest git tag: {latest_tag}', file=sys.stderr)
         new_version = bump_python_version(pypi_version, beta=BETA)
         print('Suggested new version:', new_version, file=sys.stderr)
-        version = input(f'Please type the version number (default: {new_version}): ')
+        version = input(f'Please type the version number (default: {new_version}): ')  # noqa: E501
         completed = subprocess.run(['git', 'tag', f'v{version}'])
         sys.exit(completed.returncode)
diff --git a/setup.cfg b/setup.cfg
@@ -64,11 +64,11 @@ test =
 
 [flake8]
 doctests = True
-ignore = N802,N806
+ignore = N802,N806,W503
 max-complexity = 10
 
 [tool:pytest]
 testpaths =
     test
 norecursedirs = *.egg-info .git .tox corp dev
-flake8-ignore = N802,N806
+flake8-ignore = N802,N806,W503
diff --git a/setup.py b/setup.py
@@ -5,8 +5,8 @@
 import setuptools_scm  # noqa: F401
 import toml  # noqa: F401
 
-site.ENABLE_USER_SITE = '--user' in sys.argv[1:]  # workaround for https://github.com/pypa/pip/issues/7953
-
+# workaround for https://github.com/pypa/pip/issues/7953
+site.ENABLE_USER_SITE = '--user' in sys.argv[1:]
 
 
 setuptools.setup(package_data={'udar': ['src/udar/resources/*']},

diff --git a/src/udar/__init__.py b/src/udar/__init__.py
@@ -16,4 +16,4 @@
 
 from .features import *  # noqa: F401, F403
 
-from .version import version as __version__
+from .version import version as __version__  # noqa: F401
diff --git a/src/udar/convenience.py b/src/udar/convenience.py
@@ -39,7 +39,8 @@ def stressed(in_str: str, disambiguate=False, **kwargs):
     return in_doc.stressed(**kwargs)
 
 
-def noun_distractors(noun: Union[str, Reading], stressed=True, L2_errors=False):
+def noun_distractors(noun: Union[str, Reading], stressed=True,
+                     L2_errors=False):
     """Given an input noun, return set of wordforms in its paradigm.
 
     The input noun can be in any case. Output paradigm is limited to the same
@@ -82,8 +83,8 @@ def diagnose_L2(in_str: str, tokenizer=None):
 
     Return dict of errors: {<Tag>: {set, of, exemplars, in, text}, ...}
 
-    >>> diag = diagnose_L2('Мы разговаривали в кафетерие с Таной')
-    >>> diag == {'Err/L2_ii': {'кафетерие'}, 'Err/L2_Pal': {'Таной'}}
+    >>> diag = diagnose_L2('Он видил нас на тролейбусе.')
+    >>> diag == {'Err/L2_Ikn': {'видил'}, 'Err/L2_NoGem': {'тролейбусе'}}
     True
     >>> tag_info('Err/L2_ii')
     'L2 error: Failure to change ending ие to ии in +Sg+Loc or +Sg+Dat, e.g. к Марие, о кафетерие, о знание'

diff --git a/src/udar/conversion/OC_conflicts.py b/src/udar/conversion/OC_conflicts.py
@@ -1,6 +1,5 @@
-OC_conflicts = {
-                # OC_tag: set of conflicting udar tags
-                'ADJF': {'N'},
+# OC_tag: set of conflicting udar tags
+OC_conflicts = {'ADJF': {'N'},
                 'ADJS': {'N'},
                 'ADVB': {'A', 'CS', 'Pcle'},
                 'CONJ': {'Pron', 'N', 'Interj', 'Adv'},

diff --git a/src/udar/conversion/OC_tags.py b/src/udar/conversion/OC_tags.py
@@ -1,6 +1,5 @@
 # Tags used in opencorpora.org
-tags = [
-        ('', 'POST', 'ЧР', 'часть речи'),
+tags = [('', 'POST', 'ЧР', 'часть речи'),
         ('POST', 'NOUN', 'СУЩ', 'имя существительное'),
         ('POST', 'ADJF', 'ПРИЛ', 'имя прилагательное (полное)'),
         ('POST', 'ADJS', 'КР_ПРИЛ', 'имя прилагательное (краткое)'),
@@ -60,4 +59,4 @@
         ('', 'Poss', 'притяж', 'притяжательное'),
         ('', 'V-ey', '*ею', 'форма на -ею'),
         ('', 'V-oy', '*ою', ''),
-       ]
+        ]
diff --git a/src/udar/conversion/UD_conflicts.py b/src/udar/conversion/UD_conflicts.py
@@ -1,6 +1,5 @@
-UD_conflicts = {
-                # UD_tag: set of conflicting udar tags
-                'ADJ': {'N'},
+# UD_tag: set of conflicting udar tags
+UD_conflicts = {'ADJ': {'N'},
                 'ADV': {'A', 'CS', 'Pcle'},
                 'CCONJ': {'Pron', 'N', 'Interj', 'Adv', 'CS'},
                 'SCONJ': {'Pron', 'N', 'Interj', 'Adv', 'CC'},

diff --git a/src/udar/features/feature_extractor.py b/src/udar/features/feature_extractor.py
@@ -2,7 +2,6 @@
 from collections import namedtuple
 from datetime import datetime
 import sys
-from typing import Any
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -66,7 +65,7 @@ def new_extractor_from_subset(self, feat_names: List[str] = None,
     def __call__(self, docs: Union[List[Document], Document], feat_names=None,
                  category_names: List[str] = None, header=True,
                  return_named_tuples=True, tsv=False,
-                 **kwargs) -> Union[List[Tuple[Any, ...]], str]:
+                 **kwargs) -> Union[List[Tuple], str]:
         feat_names = self._get_cat_and_feat_names(feat_names=feat_names,
                                                   category_names=category_names)  # noqa: E501
         if return_named_tuples:
@@ -75,7 +74,7 @@ def __call__(self, docs: Union[List[Document], Document], feat_names=None,
             else:
                 tuple_constructor = namedtuple('Features', feat_names)  # type: ignore  # noqa: E501
         else:
-            tuple_constructor = tuple
+            tuple_constructor = tuple  # type: ignore
         output = []
         if header:
             output.append(feat_names)

diff --git a/src/udar/features/features.py b/src/udar/features/features.py
@@ -1,5 +1,4 @@
 import pickle
-from pkg_resources import resource_filename
 from typing import Dict
 from typing import Optional
 from typing import Union

diff --git a/src/udar/fsts.py b/src/udar/fsts.py
@@ -29,8 +29,10 @@
 def decompress_fsts(fst_dir=FST_DIR):
     os.makedirs(fst_dir, exist_ok=True)
     for fname in glob(f'{RSRC_DIR}/*.gz'):
-        target_fname = f'{FST_DIR}/{fname.split(os.path.sep)[-1][:-3]}'  # remove ".gz"
-        print(f'\tdecompressing {fname} to {target_fname} ...', file=sys.stderr)
+        # remove ".gz"
+        target_fname = f'{FST_DIR}/{fname.split(os.path.sep)[-1][:-3]}'
+        print(f'\tdecompressing {fname} to {target_fname} ...',
+              file=sys.stderr)
         with gzip.open(fname) as gzipped:
             with open(target_fname, 'wb') as unzipped:
                 unzipped.write(gzipped.read())

diff --git a/src/udar/sentence.py b/src/udar/sentence.py
@@ -489,7 +489,8 @@ def disambiguate(self, gram_path: Union[str, Path] = '',
         else:
             cmd = ['vislcg3', '-g', gram_path]
         try:
-            p = Popen(cmd, encoding='utf8', stdin=PIPE, stdout=PIPE, universal_newlines=True)
+            p = Popen(cmd, encoding='utf8', stdin=PIPE, stdout=PIPE,
+                      universal_newlines=True)
         except FileNotFoundError as e:
             raise FileNotFoundError('vislcg3 must be installed and be in your '
                                     'PATH variable to disambiguate a text.') from e  # noqa: E501

diff --git a/test/test_README.py b/test/test_README.py
@@ -40,7 +40,7 @@ def test_blocks():
             with stdoutIO() as s:
                 exec(code, globals())
             out = s.getvalue().strip()
-            assert out == expected_out, f'{code} => {out}' 
+            assert out == expected_out, f'{code} => {out}'
 
 
 def test_properties_documented_in_tables_actually_exist():

diff --git a/test/test_document.py b/test/test_document.py
@@ -1,5 +1,4 @@
 from itertools import chain
-from pkg_resources import resource_filename
 from sys import stderr
 
 import udar
@@ -183,4 +182,4 @@ def test_str2Sentences():
 
 def test_to_json():
     doc = udar.Document('Мы здесь.')
-    assert doc.to_json() == '''[[{"id": "", "text": "Мы", "readings": [[{"lemma": "мы", "tags": ["Pron", "Pers", "Pl1", "Nom"]}]], "removed_readings": [], "head": -1, "deprel": ""}, {"id": "", "text": "здесь", "readings": [[{"lemma": "здесь", "tags": ["Adv"]}]], "removed_readings": [], "head": -1, "deprel": ""}, {"id": "", "text": ".", "readings": [[{"lemma": ".", "tags": ["CLB"]}]], "removed_readings": [], "head": -1, "deprel": ""}]]'''
+    assert doc.to_json() == '''[[{"id": "", "text": "Мы", "readings": [[{"lemma": "мы", "tags": ["Pron", "Pers", "Pl1", "Nom"]}]], "removed_readings": [], "head": -1, "deprel": ""}, {"id": "", "text": "здесь", "readings": [[{"lemma": "здесь", "tags": ["Adv"]}]], "removed_readings": [], "head": -1, "deprel": ""}, {"id": "", "text": ".", "readings": [[{"lemma": ".", "tags": ["CLB"]}]], "removed_readings": [], "head": -1, "deprel": ""}]]'''  # noqa: E501
diff --git a/test/test_reading.py b/test/test_reading.py
@@ -78,4 +78,4 @@ def test_can_be_pickled():
 
 def test_to_json():
     r = udar.reading.Reading(*('за+Pr#нечего+Pron+Neg+Acc', '50.000000'))
-    assert r.to_json() == '''[{"lemma": "за", "tags": ["Pr"]}, {"lemma": "нечего", "tags": ["Pron", "Neg", "Acc"]}]''', repr(r)
+    assert r.to_json() == '''[{"lemma": "за", "tags": ["Pr"]}, {"lemma": "нечего", "tags": ["Pron", "Neg", "Acc"]}]''', repr(r)  # noqa: E501
diff --git a/test/test_sentence.py b/test/test_sentence.py
@@ -1,5 +1,4 @@
 from copy import deepcopy
-from pkg_resources import resource_filename
 from pprint import pprint
 from sys import stderr
 

diff --git a/test/test_subreading.py b/test/test_subreading.py
@@ -71,4 +71,4 @@ def test_can_be_pickled():
 
 def test_to_json():
     s = udar.reading.Subreading('слово+N+Neu+Inan+Pl+Ins')
-    assert s.to_json() == '''{"lemma": "слово", "tags": ["N", "Neu", "Inan", "Pl", "Ins"]}''', repr(s)
+    assert s.to_json() == '''{"lemma": "слово", "tags": ["N", "Neu", "Inan", "Pl", "Ins"]}''', repr(s)  # noqa: E501
diff --git a/test/test_tok.py b/test/test_tok.py
@@ -110,4 +110,4 @@ def test_transliterate():
 
 def test_to_json():
     t = udar.Token('объясняли', _analyzer=anl)
-    assert t.to_json() == '''{"id": "", "text": "объясняли", "readings": [[{"lemma": "объяснять", "tags": ["V", "Impf", "IV", "Pst", "MFN", "Pl"]}], [{"lemma": "объяснять", "tags": ["V", "Impf", "TV", "Pst", "MFN", "Pl"]}]], "removed_readings": [], "head": -1, "deprel": ""}''', repr(t)
+    assert t.to_json() == '''{"id": "", "text": "объясняли", "readings": [[{"lemma": "объяснять", "tags": ["V", "Impf", "IV", "Pst", "MFN", "Pl"]}], [{"lemma": "объяснять", "tags": ["V", "Impf", "TV", "Pst", "MFN", "Pl"]}]], "removed_readings": [], "head": -1, "deprel": ""}''', repr(t)  # noqa: E501
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,4 +16,4 @@

		from .features import * # noqa: F401, F403

		from .version import version as __version__
		from .version import version as __version__ # noqa: F401