Skip to content

Commit

Permalink
first commit towards a unified API
Browse files Browse the repository at this point in the history
  • Loading branch information
Pedro Fluxa committed Dec 22, 2023
1 parent 2466b8e commit ae665ed
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 0 deletions.
Empty file added type_infer/column_modifiers.py
Empty file.
Empty file added type_infer/column_types.py
Empty file.
95 changes: 95 additions & 0 deletions type_infer/engines/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
""" base.py
Base implementation of engines to infer data type and other
useful information from tables.
"""
import pandas


class ColumnInfo:
""" Simple container for important column information
like data type, modifiers, length, number of
invalid entries, etc.
"""
def __init__(self, column_name: str):
""" Initializer
:param column_name (str)
name of column
"""
# base attributes
self.name_ = column_name
self.length_ = -1
self.data_type_ = 'unknown'
self.data_type_info_ = {}
self.modifier_ = 'unkonwn'
self.modifier_info_ = {}

def set_column_length(self, length: int):
""" Set length of column.
"""
self.length_ = length

def get_column_length(self):
""" Returns column length.
:note
if `set_column_length()` hasn't been called, then
`get_column_length()` will return -1.
"""
return self.length_

def get_name(self):
""" Return column name.
"""
return self.name_

def get_data_type(self):
""" Returns column data type.
"""
return self.data_type_

def get_data_type_info(self):
""" Returns copy of data type information.
"""
d = {}
d.update(self.data_type_info_)
return d

def get_modifier(self):
""" Returns column modifier.
"""
return self.modifier_

def get_modifier_info(self):
""" Returns copy of modifier info.
"""
d = {}
d.update(self.modifier_info_)
return d


class TypeInferenceEngine:
""" Base implementation for column type inference.
"""
def __init__(self, name: str):
""" Initializer
:param name (str)
name of the engine.
"""
self.name_ = name
self.dfs_ = []

def attach_dataframe(self, df: pandas.DataFrame):
""" Adds dataframe for analysis.
:param df (pandas.Dataframe)
dataframe to be analyzed.
:note
to avoid side effects, a copy of the original
dataframe is made.
"""
self.dfs_.append(df.copy())

Empty file added type_infer/engines/bertype.py
Empty file.
Empty file.

0 comments on commit ae665ed

Please sign in to comment.