Skip to content

Commit c415465

Browse files
committed
cleanup
1 parent 4b495c4 commit c415465

File tree

12 files changed

+37
-31
lines changed

12 files changed

+37
-31
lines changed

tests/unit_tests/rule_based/test_infer_dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_negative_integers(self):
1515
self.assertEqual(dtyp, dtype.integer)
1616

1717
def test_negative_floats(self):
18-
data = pd.DataFrame([-random.randint(-10, 10) for _ in range(100)] + [0.1], columns=['test_col'])
18+
data = pd.DataFrame([float(-random.randint(-10, 10)) for _ in range(100)] + [0.1], columns=['test_col'])
1919
engine = RuleBasedEngine()
2020
dtyp, dist, ainfo, warn, info = engine.get_column_data_type(data['test_col'], data, 'test_col', 0.0)
2121
self.assertEqual(dtyp, dtype.float)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import unittest
2+
3+
from type_infer.rule_based.helpers import tokenize_text
4+
5+
6+
class TestDates(unittest.TestCase):
7+
def test_get_tokens(self):
8+
sentences = ['hello, world!', ' !hello! world!!,..#', '#hello!world']
9+
for sent in sentences:
10+
assert list(tokenize_text(sent)) == ['hello', 'world']
11+
12+
assert list(tokenize_text("don't wouldn't")) == ['do', 'not', 'would', 'not']

tests/unit_tests/test_misc.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from pathlib import Path
44

55
import type_infer
6-
from type_infer.rule_based.helpers import tokenize_text
76

87

98
class TestDates(unittest.TestCase):
@@ -19,10 +18,3 @@ def test_versions_are_in_sync(self):
1918
package_init_version = type_infer.__version__
2019

2120
self.assertEqual(package_init_version, pyproject_version)
22-
23-
def test_get_tokens(self):
24-
sentences = ['hello, world!', ' !hello! world!!,..#', '#hello!world']
25-
for sent in sentences:
26-
assert list(tokenize_text(sent)) == ['hello', 'world']
27-
28-
assert list(tokenize_text("don't wouldn't")) == ['do', 'not', 'would', 'not']

type_infer/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@
33
from type_infer import api
44
from type_infer import helpers
55

6-
from type_infer.api import ENGINES
7-
86
__version__ = '0.0.18'
97

108

119
__all__ = [
1210
'__version__',
1311
'base', 'dtype', 'api', 'helpers',
14-
'ENGINES'
1512
]

type_infer/api.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
from typing import Dict, Optional
22
import pandas as pd
33

4-
from type_infer.base import TypeInformation
4+
from type_infer.base import TypeInformation, ENGINES
55
from type_infer.rule_based.core import RuleBasedEngine
66

77

8-
class ENGINES:
9-
RULE_BASED = 'rule_based'
10-
11-
128
def infer_types(
139
data: pd.DataFrame,
1410
config: Optional[Dict] = None

type_infer/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ def __init__(self, stable = True):
3333
def infer(self, df) -> TypeInformation:
3434
"""Given a dataframe, infer the types of each column and return a TypeInformation object."""
3535
raise NotImplementedError
36+
37+
38+
class ENGINES:
39+
RULE_BASED = 'rule_based'

type_infer/bert/core.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from type_infer.base import BaseEngine
2+
3+
4+
class BERType(BaseEngine):
5+
def __init__(self, stable=False):
6+
super().__init__(stable=stable)
7+
8+
def infer(self, df):
9+
raise NotImplementedError

type_infer/bert/infer.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

type_infer/dtype.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ class dtype:
4646
empty = "empty"
4747
invalid = "invalid"
4848

49-
# TODO: introduce "modifiers"?
49+
50+
# TODO: modifier class + system

type_infer/helpers.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import os
2-
3-
import pandas as pd
42
import psutil
53
import random
64
import logging
@@ -9,12 +7,12 @@
97
from typing import Iterable
108

119
import numpy as np
10+
import pandas as pd
1211
from scipy.stats import norm
1312

1413

1514
def initialize_log():
1615
pid = os.getpid()
17-
1816
handler = colorlog.StreamHandler()
1917
handler.setFormatter(colorlog.ColoredFormatter())
2018

type_infer/rule_based/core.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
import re
22
import imghdr
33
import sndhdr
4-
from collections import Counter
5-
from typing import List, Union
64
import multiprocessing as mp
5+
from typing import List, Union
6+
from collections import Counter
77

8-
import pandas as pd
98
import numpy as np
9+
import pandas as pd
1010

1111
from type_infer.dtype import dtype
1212
from type_infer.base import BaseEngine, TypeInformation
13-
from type_infer.helpers import log, seed, sample_data, get_nr_procs
13+
from type_infer.helpers import log, seed, sample_data, get_nr_procs, is_nan_numeric, cast_string_to_python_type
1414
from type_infer.rule_based.helpers import get_language_dist, analyze_sentences, get_identifier_description_mp
15-
from type_infer.helpers import is_nan_numeric, cast_string_to_python_type
1615

1716

1817
class RuleBasedEngine(BaseEngine):
@@ -284,7 +283,7 @@ def count_data_types_in_column(self, data):
284283
return dtype_counts
285284

286285

287-
def get_column_data_type(self, data: Union[np.ndarray, list], full_data: pd.DataFrame, col_name: str, pct_invalid: float):
286+
def get_column_data_type(self, data: Union[pd.Series, np.ndarray, list], full_data: pd.DataFrame, col_name: str, pct_invalid: float):
288287
"""
289288
Provided the column data, define its data type and data subtype.
290289

type_infer/rule_based/helpers.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
import re
22
import nltk
33
import string
4+
from typing import Iterable
5+
from collections import Counter, defaultdict
46

57
import numpy as np
68
import scipy.stats as st
79
from langid.langid import LanguageIdentifier
810
from langid.langid import model as langid_model
911

10-
from typing import Iterable
11-
from collections import Counter, defaultdict
12-
1312
from type_infer.dtype import dtype
1413

1514

0 commit comments

Comments
 (0)