Skip to content

Commit

Permalink
Merge pull request #63 from mindsdb/staging
Browse files Browse the repository at this point in the history
Release 0.0.19
  • Loading branch information
paxcema authored Mar 12, 2024
2 parents 8d2047f + fef7cd9 commit 0909202
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 167 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ on:
jobs:
doc_build:
runs-on: ubuntu-latest
permissions:
contents: write

steps:
- name: checkout and set up
Expand Down
329 changes: 168 additions & 161 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "type_infer"
version = "0.0.18"
version = "0.0.19"
description = "Automated type inference for Machine Learning pipelines."
authors = ["MindsDB Inc. <[email protected]>"]
license = "GPL-3.0"
Expand All @@ -19,7 +19,7 @@ psutil = "^5.9.0"
toml = "^0.10.2"

# rule based deps, part of core
langid = "^1.1.6"
py3langid = ">=0.2.2,<0.3"
nltk = "^3"

[build-system]
Expand Down
8 changes: 8 additions & 0 deletions tests/unit_tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

from type_infer import helpers
from type_infer.rule_based.helpers import get_language_dist


class TestCastStringToPythonType(unittest.TestCase):
Expand Down Expand Up @@ -32,3 +33,10 @@ def test_nan_is_numeric(self):
def test_inf_is_numeric(self):
self.assertTrue(helpers.is_nan_numeric('inf'))
self.assertTrue(helpers.is_nan_numeric(float('inf')))


class TestLangid(unittest.TestCase):
def test_get_language_dist(self):
dist = get_language_dist(['This is clearly English', 'Y esto es claramente español'])
self.assertTrue('en' in dist and dist['en'] == 1)
self.assertTrue('es' in dist and dist['es'] == 1)
2 changes: 1 addition & 1 deletion type_infer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from type_infer import api
from type_infer import helpers

__version__ = '0.0.18'
__version__ = '0.0.19'


__all__ = [
Expand Down
6 changes: 3 additions & 3 deletions type_infer/rule_based/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

import numpy as np
import scipy.stats as st
from langid.langid import LanguageIdentifier
from langid.langid import model as langid_model
from py3langid.langid import LanguageIdentifier
from py3langid.langid import MODEL_FILE as LANGID_MODEL_FILE

from type_infer.dtype import dtype

Expand Down Expand Up @@ -109,7 +109,7 @@ def get_language_dist(data):
lang_dist = defaultdict(lambda: 0)
lang_dist['Unknown'] = 0
lang_probs_cache = dict()
identifier = LanguageIdentifier.from_modelstring(langid_model, norm_probs=True)
identifier = LanguageIdentifier.from_pickled_model(LANGID_MODEL_FILE, norm_probs=True)
for text in data:
text = str(text)
text = text.translate(str.maketrans('', '', string.punctuation))
Expand Down

0 comments on commit 0909202

Please sign in to comment.