From 34272d8a090ce2b14d62f2c20c75bd5e016f086a Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Mon, 3 Jul 2023 21:52:28 -0400
Subject: [PATCH] fix warnings reported in #39

---
 type_infer/helpers.py | 5 +++--
 type_infer/infer.py   | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/type_infer/helpers.py b/type_infer/helpers.py
index 93d81b4..3ae6bc1 100644
--- a/type_infer/helpers.py
+++ b/type_infer/helpers.py
@@ -94,7 +94,7 @@ def get_identifier_description(data: Iterable, column_name: str, data_dtype: dty
     unique_pct = nr_unique / len(data)
 
     spaces = [len(str(x).split(' ')) - 1 for x in data]
-    mean_spaces = np.mean(spaces)
+    mean_spaces = np.mean(spaces) if len(spaces) > 0 else 0.0
 
     # Detect hash
     all_same_length = all(len(str(data[0])) == len(str(x)) for x in data)
@@ -113,7 +113,8 @@ def get_identifier_description(data: Iterable, column_name: str, data_dtype: dty
             else:
                 randomness_per_index.append(S / np.log(N))
 
-        if np.mean(randomness_per_index) > 0.95:
+        mean_randomness = np.mean(randomness_per_index) if len(randomness_per_index) > 0 else 0
+        if mean_randomness > 0.95:
             return 'Hash-like identifier'
 
     # Detect foreign key
diff --git a/type_infer/infer.py b/type_infer/infer.py
index 42912f6..8ffb57f 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -233,10 +233,12 @@ def get_column_data_type(data: Union[np.ndarray, list], full_data: pd.DataFrame,
         if all(isinstance(x, str) for x in data):
             can_be_tags = True
 
+        mean_lenghts = np.mean(lengths) if len(lenghts) > 0 else 0
+
         # If more than 30% of the samples contain more than 1 category and there's more than 6 and less than 30 of them and they are shared between the various cells # noqa
-        if (can_be_tags and np.mean(lengths) > 1.3 and
+        if (can_be_tags and mean_lenghts > 1.3 and
                 6 <= len(unique_tokens) <= 30 and
-                len(unique_tokens) / np.mean(lengths) < (len(data) / 4)):
+                len(unique_tokens) / mean_lenghts < (len(data) / 4)):
             curr_dtype = dtype.tags
 
     # Categorical based on unique values