From 64fec8ccf99b53dff3b76eb93e26dd33fe29990e Mon Sep 17 00:00:00 2001
From: george <george@cerebralab.com>
Date: Mon, 8 Nov 2021 13:15:56 -0500
Subject: [PATCH 1/3] fix: dropping columns with no info

---
 lightwood/helpers/text.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lightwood/helpers/text.py b/lightwood/helpers/text.py
index 992db70d5..e64dcde48 100644
--- a/lightwood/helpers/text.py
+++ b/lightwood/helpers/text.py
@@ -210,7 +210,12 @@ def get_identifier_description_mp(arg_tup):
 
 def get_identifier_description(data, column_name, data_dtype):
     data = list(data)
-    unquie_pct = len(set(data)) / len(data)
+    nr_unique = len(set(data))
+
+    if nr_unique == 1:
+        return 'No Information'
+
+    unquie_pct = nr_unique / len(data)
 
     spaces = [len(str(x).split(' ')) - 1 for x in data]
     mean_spaces = np.mean(spaces)

From fda35569d162e27dbe5039a879316720b09cff12 Mon Sep 17 00:00:00 2001
From: george <george@cerebralab.com>
Date: Wed, 10 Nov 2021 21:36:45 -0500
Subject: [PATCH 2/3] fix: spelling

---
 lightwood/helpers/text.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lightwood/helpers/text.py b/lightwood/helpers/text.py
index e64dcde48..13f7526f9 100644
--- a/lightwood/helpers/text.py
+++ b/lightwood/helpers/text.py
@@ -215,7 +215,7 @@ def get_identifier_description(data, column_name, data_dtype):
     if nr_unique == 1:
         return 'No Information'
 
-    unquie_pct = nr_unique / len(data)
+    unique_pct = nr_unique / len(data)
 
     spaces = [len(str(x).split(' ')) - 1 for x in data]
     mean_spaces = np.mean(spaces)
@@ -223,7 +223,7 @@ def get_identifier_description(data, column_name, data_dtype):
     # Detect auto incrementing index
     # -- some cases where I guess people do want to use this for learning, so ignoring this check for now...
     # if data_dtype == dtype.integer:
-    #    if get_pct_auto_increment(data) > 0.98 and unquie_pct > 0.99:
+    #    if get_pct_auto_increment(data) > 0.98 and unique_pct > 0.99:
     #        return 'Auto-incrementing identifier'
 
     # Detect hash
@@ -249,7 +249,7 @@ def get_identifier_description(data, column_name, data_dtype):
             return 'Foreign key'
 
     if _is_identifier_name(column_name) or data_dtype in (dtype.categorical, dtype.binary):
-        if unquie_pct > 0.98:
+        if unique_pct > 0.98:
             if is_uuid:
                 return 'UUID'
             else:
@@ -257,7 +257,7 @@ def get_identifier_description(data, column_name, data_dtype):
 
     # Everything is unique and it's too short to be rich text
     if data_dtype in (dtype.categorical, dtype.short_text, dtype.rich_text) and \
-            unquie_pct > 0.99999 and mean_spaces < 1:
+            unique_pct > 0.99999 and mean_spaces < 1:
         return 'Unknown identifier'
 
     return None

From 93eb330b557e5aae0845f04efdc48cc9d8082683 Mon Sep 17 00:00:00 2001
From: george <george@cerebralab.com>
Date: Wed, 10 Nov 2021 21:37:31 -0500
Subject: [PATCH 3/3] feat: type hinting

---
 lightwood/helpers/text.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lightwood/helpers/text.py b/lightwood/helpers/text.py
index 13f7526f9..f933f8582 100644
--- a/lightwood/helpers/text.py
+++ b/lightwood/helpers/text.py
@@ -13,6 +13,7 @@
 import json
 import re
 import hashlib
+from typing import Iterable
 import numpy as np
 import scipy.stats as st
 import langdetect
@@ -208,7 +209,7 @@ def get_identifier_description_mp(arg_tup):
     return get_identifier_description(data, column_name, data_dtype)
 
 
-def get_identifier_description(data, column_name, data_dtype):
+def get_identifier_description(data: Iterable, column_name: str, data_dtype: dtype):
     data = list(data)
     nr_unique = len(set(data))