From 5d99995a2c7a46ff3fe5e4317e68d4ce24f02872 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 16:09:31 +0000
Subject: [PATCH 01/16] added support for timestamps as time (seconds, ms, us,
 ns and days) since epoch (unix or Julian)

---
 tests/unit_tests/test_dates.py | 38 ++++++++++++++++++
 type_infer/dtype.py            |  2 +
 type_infer/infer.py            | 70 +++++++++++++++++++++++++++++-----
 3 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/tests/unit_tests/test_dates.py b/tests/unit_tests/test_dates.py
index 14959ab..8a06563 100644
--- a/tests/unit_tests/test_dates.py
+++ b/tests/unit_tests/test_dates.py
@@ -5,5 +5,43 @@
 
 
 class TestDates(unittest.TestCase):
+
     def test_0_type_check_dates(self):
+        """ Checks parsing of string containing a date to dtype 'date'.
+        """
         self.assertEqual(type_check_date('31/12/2010'), dtype.date)
+    
+    def test_1_type_check_datetime(self):
+        """ Checks parsing of string containing a date to dtype 'datetime'.
+        """
+        self.assertEqual(type_check_date('31/12/2010 23:15:41'), dtype.datetime)
+    
+    def test_2_type_check_timestamp_unix_seconds(self):
+        """ Checks parsing a number containing 1989-12-15T07:30:00 (as seconds
+            since Unix epoch) to dtype 'timestamp'.
+        """
+        self.assertEqual(type_check_date(629721000.0), dtype.timestamp)
+    
+    def test_3_type_check_timestamp_unix_miliseconds(self):
+        """ Checks parsing a number containing 1989-12-15T07:30:00 (as miliseconds
+            since Unix epoch) to dtype 'timestamp'.
+        """
+        self.assertEqual(type_check_date(629721000000.0), dtype.timestamp)
+
+    def test_4_type_check_timestamp_unix_microseconds(self):
+        """ Checks parsing a number containing 1989-12-15T07:30:00 (as microseconds
+            since Unix epoch) to dtype 'timestamp'.
+        """
+        self.assertEqual(type_check_date(629721000000000.0), dtype.timestamp)
+    
+    def test_5_type_check_timestamp_unix_nanoseconds(self):
+        """ Checks parsing a number containing 1989-12-15T07:30:00 (as nanoseconds
+            since Unix epoch) to dtype 'timestamp'.
+        """
+        self.assertEqual(type_check_date(629721000000000000.0), dtype.timestamp)
+    
+    def test_6_type_check_timestamp_julian_days(self):
+        """ Checks parsing a number containing 1989-12-15T07:30:00 (as days since
+            Julian calendar epoch) to dtype 'timestamp'.
+        """
+        self.assertEqual(type_check_date(2447875.81250), dtype.timestamp)
\ No newline at end of file
diff --git a/type_infer/dtype.py b/type_infer/dtype.py
index 9f05a2e..1f4f460 100644
--- a/type_infer/dtype.py
+++ b/type_infer/dtype.py
@@ -5,6 +5,7 @@ class dtype:
     - **Numerical**: Data that should be represented in the form of a number. Currently ``integer``, ``float``, and ``quantity`` are supported.
     - **Categorical**: Data that represents a class or label and is discrete. Currently ``binary``, ``categorical``, and ``tags`` are supported.
     - **Date/Time**: Time-series data that is temporal/sequential. Currently ``date``, and ``datetime`` are supported.
+    - **Timestamp**: Data that represents time in the form of the amount of nano/micro/milli-seconds, seconds after midnight 1970-01-01. Julian days are also supported.
     - **Text**: Data that can be considered as language information.  Currently ``short_text``, and ``rich_text`` are supported. Short text has a small vocabulary (~ 100 words) and is generally a limited number of characters. Rich text is anything with greater complexity.
     - **Complex**: Data types that require custom techniques. Currently ``audio``, ``video`` and ``image`` are available, but highly experimental.
     - **Array**: Data in the form of a sequence where order must be preserved. ``tsarray`` dtypes are for "normal" columns that will be transformed to arrays at a row-level because they will be treated as time series.
@@ -26,6 +27,7 @@ class dtype:
     # Dates and Times (time-series)
     date = "date"
     datetime = "datetime"
+    timestamp = "timestamp"
 
     # Text
     short_text = "short_text"
diff --git a/type_infer/infer.py b/type_infer/infer.py
index b54f1b0..226bed4 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -128,18 +128,68 @@ def type_check_sequence(element: object) -> str:
 
 
 def type_check_date(element: object) -> str:
+    """
+    Check if element corresponds to a date-like object.
+    """
+    # check if element represents a unix-timestamp
+    isTimestamp = False
+    # check if element represents a date (no hour/minute/seconds)
+    isDate = False
+    # check if element represents a datetime (has hour/minute/seconds)
+    isDatetime = False
+
+    # check if it makes sense to convert element to unix time-stamp by 
+    # evaluating if, when converted, the element represents a number
+    # that is compatible with a Unix timestamp (number of seconds since 1970-01-01T:00:00:00)
+    # note that we also check the number is not larger than the "epochalypse time",
+    # which is when the unix timestamp becomes larger than 2^32 - 1 seconds. We do
+    # this because timestamps outside this range are likely to be unreliable and hence
+    # rather treated as every-day numbers.
     try:
-        dt = pd.to_datetime(element)
-
-        # Not accurate 100% for a single datetime str, but should work in aggregate
-        if dt.hour == 0 and dt.minute == 0 and dt.second == 0 and len(str(element)) <= 16:
-            return dtype.date
+        unt = ''
+        for unt in ['ns', 'us', 'ms', 's']:
+            dt = pd.to_datetime(element, unit=unt, origin='unix')
+            if ((dt > pd.to_datetime('1970-01-01T:00:00:00', utc=True)) and \
+                (dt < pd.to_datetime('2038-01-19T03:14:08', utc=True))):
+                isTimestamp = True
+                break
+        # yes some kind of people still use Julian Days
+        dt = pd.to_datetime(element, unit='D', origin='julian')
+        if ((dt > pd.to_datetime('1970-01-01T:00:00:00', utc=True)) and \
+            (dt < pd.to_datetime('2038-01-19T03:14:08', utc=True))):
+            isTimestamp = True
+    except Exception as error:
+        pass 
+    # check if element represents a date-like object. 
+    # here we don't check for a validity range like with unix-timestamps
+    # because dates as string usually represent something more general than
+    # just the number of seconds since an epoch.
+    try:
+        dt = pd.to_datetime(element, errors='raise')
+        # round element day (drop hour/minute/second)
+        dtd = dt.to_period('D').to_timestamp()
+        # if rounded datetime equals the datetime itself, it means there was not
+        # hour/minute/second information to begin with. Mind the 'localize' to
+        # avoid time-zone BS to kick in.
+        if dtd == dt.tz_localize(None):
+            isDate = True
         else:
-            return dtype.datetime
-
-    except ValueError:
-        return None
-
+            isDatetime = True
+    except Exception as error:
+        pass
+    
+    # because of the explicit 'unit' argument when checking for timestamps,
+    # element cannot be timestamp AND date/datetime. Similarly, it cannot
+    # be both date and datetime.
+    rtype = None
+    if isTimestamp:
+        rtype = dtype.timestamp
+    if isDatetime:
+        rtype = dtype.datetime
+    if isDate:
+        rtype = dtype.date
+    
+    return rtype
 
 def count_data_types_in_column(data):
     dtype_counts = Counter()

From 3198216b4edba396259fca3284af36969fcebee5 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 19:34:44 +0000
Subject: [PATCH 02/16] minor refactor to comply with PyLint

---
 type_infer/infer.py | 75 +++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index 226bed4..d426f4a 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -132,65 +132,72 @@ def type_check_date(element: object) -> str:
     Check if element corresponds to a date-like object.
     """
     # check if element represents a unix-timestamp
-    isTimestamp = False
+    is_timestamp = False
     # check if element represents a date (no hour/minute/seconds)
-    isDate = False
+    is_date = False
     # check if element represents a datetime (has hour/minute/seconds)
-    isDatetime = False
+    is_datetime = False
 
-    # check if it makes sense to convert element to unix time-stamp by 
-    # evaluating if, when converted, the element represents a number
-    # that is compatible with a Unix timestamp (number of seconds since 1970-01-01T:00:00:00)
+    # check if it makes sense to convert element to unix time-stamp by
+    # evaluating if, when converted, the element represents a number that
+    # is compatible with a Unix timestamp (number of seconds since 1970-01-01T:00:00:00)
     # note that we also check the number is not larger than the "epochalypse time",
     # which is when the unix timestamp becomes larger than 2^32 - 1 seconds. We do
     # this because timestamps outside this range are likely to be unreliable and hence
     # rather treated as every-day numbers.
-    try:
-        unt = ''
-        for unt in ['ns', 'us', 'ms', 's']:
-            dt = pd.to_datetime(element, unit=unt, origin='unix')
-            if ((dt > pd.to_datetime('1970-01-01T:00:00:00', utc=True)) and \
-                (dt < pd.to_datetime('2038-01-19T03:14:08', utc=True))):
-                isTimestamp = True
-                break
-        # yes some kind of people still use Julian Days
-        dt = pd.to_datetime(element, unit='D', origin='julian')
-        if ((dt > pd.to_datetime('1970-01-01T:00:00:00', utc=True)) and \
-            (dt < pd.to_datetime('2038-01-19T03:14:08', utc=True))):
-            isTimestamp = True
-    except Exception as error:
-        pass 
-    # check if element represents a date-like object. 
+    min_dt = pd.to_datetime('1970-01-01T:00:00:00', utc=True)
+    max_dt = pd.to_datetime('2038-01-19T:03:14:08', utc=True)
+    valid_units = ['ns', 'us', 'ms', 's', 'D']
+    for unit in valid_units:
+        # Yes, some people still use Julian Days...
+        if unit == 'D':
+            try:
+                as_dt = pd.to_datetime(element, unit=unit, origin='julian', errors='raise')
+                if min_dt < as_dt < max_dt:
+                    is_timestamp = True
+                    break
+            except Exception:
+                pass
+        else:
+            try:
+                as_dt = pd.to_datetime(element, unit=unit, origin='unix', errors='raise')
+                if min_dt < as_dt < max_dt:
+                    is_timestamp = True
+                    break
+            except Exception:
+                pass
+    # check if element represents a date-like object.
     # here we don't check for a validity range like with unix-timestamps
     # because dates as string usually represent something more general than
     # just the number of seconds since an epoch.
     try:
-        dt = pd.to_datetime(element, errors='raise')
+        as_dt = pd.to_datetime(element, errors='raise')
         # round element day (drop hour/minute/second)
-        dtd = dt.to_period('D').to_timestamp()
+        dt_d = as_dt.to_period('D').to_timestamp()
         # if rounded datetime equals the datetime itself, it means there was not
         # hour/minute/second information to begin with. Mind the 'localize' to
         # avoid time-zone BS to kick in.
-        if dtd == dt.tz_localize(None):
-            isDate = True
+        if dt_d == as_dt.tz_localize(None):
+            is_date = True
         else:
-            isDatetime = True
-    except Exception as error:
+            is_datetime = True
+    except Exception:
         pass
-    
+
     # because of the explicit 'unit' argument when checking for timestamps,
     # element cannot be timestamp AND date/datetime. Similarly, it cannot
     # be both date and datetime.
     rtype = None
-    if isTimestamp:
+    if is_timestamp:
         rtype = dtype.timestamp
-    if isDatetime:
+    if is_datetime:
         rtype = dtype.datetime
-    if isDate:
+    if is_date:
         rtype = dtype.date
-    
+
     return rtype
 
+
 def count_data_types_in_column(data):
     dtype_counts = Counter()
 
@@ -441,7 +448,7 @@ def infer_types(
     population_size = len(data)
     log.info(f'Analyzing a sample of {sample_size}')
     log.info(
-        f'from a total population of {population_size}, this is equivalent to {round(sample_size*100/population_size, 1)}% of your data.') # noqa
+        f'from a total population of {population_size}, this is equivalent to {round(sample_size*100/population_size, 1)}% of your data.')  # noqa
 
     nr_procs = get_nr_procs(df=sample_df)
     pool_size = min(nr_procs, len(sample_df.columns.values))

From 1a45efe66562241cb22e352a1a7d5d433c6262ba Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 19:37:48 +0000
Subject: [PATCH 03/16] removed whitespaces

---
 tests/unit_tests/test_dates.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/unit_tests/test_dates.py b/tests/unit_tests/test_dates.py
index 8a06563..2b79b5b 100644
--- a/tests/unit_tests/test_dates.py
+++ b/tests/unit_tests/test_dates.py
@@ -10,18 +10,18 @@ def test_0_type_check_dates(self):
         """ Checks parsing of string containing a date to dtype 'date'.
         """
         self.assertEqual(type_check_date('31/12/2010'), dtype.date)
-    
+
     def test_1_type_check_datetime(self):
         """ Checks parsing of string containing a date to dtype 'datetime'.
         """
         self.assertEqual(type_check_date('31/12/2010 23:15:41'), dtype.datetime)
-    
+
     def test_2_type_check_timestamp_unix_seconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as seconds
             since Unix epoch) to dtype 'timestamp'.
         """
         self.assertEqual(type_check_date(629721000.0), dtype.timestamp)
-    
+
     def test_3_type_check_timestamp_unix_miliseconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as miliseconds
             since Unix epoch) to dtype 'timestamp'.
@@ -33,15 +33,15 @@ def test_4_type_check_timestamp_unix_microseconds(self):
             since Unix epoch) to dtype 'timestamp'.
         """
         self.assertEqual(type_check_date(629721000000000.0), dtype.timestamp)
-    
+
     def test_5_type_check_timestamp_unix_nanoseconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as nanoseconds
             since Unix epoch) to dtype 'timestamp'.
         """
         self.assertEqual(type_check_date(629721000000000000.0), dtype.timestamp)
-    
+
     def test_6_type_check_timestamp_julian_days(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as days since
             Julian calendar epoch) to dtype 'timestamp'.
         """
-        self.assertEqual(type_check_date(2447875.81250), dtype.timestamp)
\ No newline at end of file
+        self.assertEqual(type_check_date(2447875.81250), dtype.timestamp)

From 9c4223a574bb9506d9936d266eb2c070378368a4 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 19:41:47 +0000
Subject: [PATCH 04/16] typo in min_dt and max_dt

---
 type_infer/infer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index d426f4a..2998439 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -145,8 +145,8 @@ def type_check_date(element: object) -> str:
     # which is when the unix timestamp becomes larger than 2^32 - 1 seconds. We do
     # this because timestamps outside this range are likely to be unreliable and hence
     # rather treated as every-day numbers.
-    min_dt = pd.to_datetime('1970-01-01T:00:00:00', utc=True)
-    max_dt = pd.to_datetime('2038-01-19T:03:14:08', utc=True)
+    min_dt = pd.to_datetime('1970-01-01 00:00:00', utc=True)
+    max_dt = pd.to_datetime('2038-01-19 03:14:08', utc=True)
     valid_units = ['ns', 'us', 'ms', 's', 'D']
     for unit in valid_units:
         # Yes, some people still use Julian Days...

From 0b164440b541eac24b64280c4ad88fa9b64d8974 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 19:57:37 +0000
Subject: [PATCH 05/16] prioritize returning timestamps when found.

---
 type_infer/infer.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index 2998439..4cc879d 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -166,6 +166,9 @@ def type_check_date(element: object) -> str:
                     break
             except Exception:
                 pass
+    if is_timestamp:
+        return dtype.timestamp
+
     # check if element represents a date-like object.
     # here we don't check for a validity range like with unix-timestamps
     # because dates as string usually represent something more general than
@@ -184,19 +187,12 @@ def type_check_date(element: object) -> str:
     except Exception:
         pass
 
-    # because of the explicit 'unit' argument when checking for timestamps,
-    # element cannot be timestamp AND date/datetime. Similarly, it cannot
-    # be both date and datetime.
-    rtype = None
-    if is_timestamp:
-        rtype = dtype.timestamp
     if is_datetime:
-        rtype = dtype.datetime
+        return dtype.datetime
     if is_date:
-        rtype = dtype.date
-
-    return rtype
+        return dtype.date
 
+    return None
 
 def count_data_types_in_column(data):
     dtype_counts = Counter()

From f5e02d0bca38a59142263f87f9ab57f01941a989 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 20:05:18 +0000
Subject: [PATCH 06/16] forced flake8 checks...

---
 type_infer/infer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index 4cc879d..c3af676 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -194,6 +194,7 @@ def type_check_date(element: object) -> str:
 
     return None
 
+
 def count_data_types_in_column(data):
     dtype_counts = Counter()
 

From e2d236a88a8747bc487a334cc9044c405e66e81b Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Wed, 16 Aug 2023 20:22:25 +0000
Subject: [PATCH 07/16] datetime and timestamp are the same

---
 tests/unit_tests/test_dates.py | 20 ++++++++++----------
 type_infer/dtype.py            |  2 --
 type_infer/infer.py            | 28 +++++++++++-----------------
 3 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/tests/unit_tests/test_dates.py b/tests/unit_tests/test_dates.py
index 2b79b5b..6e8bfe7 100644
--- a/tests/unit_tests/test_dates.py
+++ b/tests/unit_tests/test_dates.py
@@ -18,30 +18,30 @@ def test_1_type_check_datetime(self):
 
     def test_2_type_check_timestamp_unix_seconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as seconds
-            since Unix epoch) to dtype 'timestamp'.
+            since Unix epoch) to dtype 'datetime'.
         """
-        self.assertEqual(type_check_date(629721000.0), dtype.timestamp)
+        self.assertEqual(type_check_date(629721000.0), dtype.datetime)
 
     def test_3_type_check_timestamp_unix_miliseconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as miliseconds
-            since Unix epoch) to dtype 'timestamp'.
+            since Unix epoch) to dtype 'datetime'.
         """
-        self.assertEqual(type_check_date(629721000000.0), dtype.timestamp)
+        self.assertEqual(type_check_date(629721000000.0), dtype.datetime)
 
     def test_4_type_check_timestamp_unix_microseconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as microseconds
-            since Unix epoch) to dtype 'timestamp'.
+            since Unix epoch) to dtype 'datetime'.
         """
-        self.assertEqual(type_check_date(629721000000000.0), dtype.timestamp)
+        self.assertEqual(type_check_date(629721000000000.0), dtype.datetime)
 
     def test_5_type_check_timestamp_unix_nanoseconds(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as nanoseconds
-            since Unix epoch) to dtype 'timestamp'.
+            since Unix epoch) to dtype 'datetime'.
         """
-        self.assertEqual(type_check_date(629721000000000000.0), dtype.timestamp)
+        self.assertEqual(type_check_date(629721000000000000.0), dtype.datetime)
 
     def test_6_type_check_timestamp_julian_days(self):
         """ Checks parsing a number containing 1989-12-15T07:30:00 (as days since
-            Julian calendar epoch) to dtype 'timestamp'.
+            Julian calendar epoch) to dtype 'datetime'.
         """
-        self.assertEqual(type_check_date(2447875.81250), dtype.timestamp)
+        self.assertEqual(type_check_date(2447875.81250), dtype.datetime)
diff --git a/type_infer/dtype.py b/type_infer/dtype.py
index 1f4f460..9f05a2e 100644
--- a/type_infer/dtype.py
+++ b/type_infer/dtype.py
@@ -5,7 +5,6 @@ class dtype:
     - **Numerical**: Data that should be represented in the form of a number. Currently ``integer``, ``float``, and ``quantity`` are supported.
     - **Categorical**: Data that represents a class or label and is discrete. Currently ``binary``, ``categorical``, and ``tags`` are supported.
     - **Date/Time**: Time-series data that is temporal/sequential. Currently ``date``, and ``datetime`` are supported.
-    - **Timestamp**: Data that represents time in the form of the amount of nano/micro/milli-seconds, seconds after midnight 1970-01-01. Julian days are also supported.
     - **Text**: Data that can be considered as language information.  Currently ``short_text``, and ``rich_text`` are supported. Short text has a small vocabulary (~ 100 words) and is generally a limited number of characters. Rich text is anything with greater complexity.
     - **Complex**: Data types that require custom techniques. Currently ``audio``, ``video`` and ``image`` are available, but highly experimental.
     - **Array**: Data in the form of a sequence where order must be preserved. ``tsarray`` dtypes are for "normal" columns that will be transformed to arrays at a row-level because they will be treated as time series.
@@ -27,7 +26,6 @@ class dtype:
     # Dates and Times (time-series)
     date = "date"
     datetime = "datetime"
-    timestamp = "timestamp"
 
     # Text
     short_text = "short_text"
diff --git a/type_infer/infer.py b/type_infer/infer.py
index c3af676..836f9e1 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -131,13 +131,10 @@ def type_check_date(element: object) -> str:
     """
     Check if element corresponds to a date-like object.
     """
-    # check if element represents a unix-timestamp
-    is_timestamp = False
     # check if element represents a date (no hour/minute/seconds)
     is_date = False
     # check if element represents a datetime (has hour/minute/seconds)
     is_datetime = False
-
     # check if it makes sense to convert element to unix time-stamp by
     # evaluating if, when converted, the element represents a number that
     # is compatible with a Unix timestamp (number of seconds since 1970-01-01T:00:00:00)
@@ -154,7 +151,7 @@ def type_check_date(element: object) -> str:
             try:
                 as_dt = pd.to_datetime(element, unit=unit, origin='julian', errors='raise')
                 if min_dt < as_dt < max_dt:
-                    is_timestamp = True
+                    is_datetime = True
                     break
             except Exception:
                 pass
@@ -162,35 +159,32 @@ def type_check_date(element: object) -> str:
             try:
                 as_dt = pd.to_datetime(element, unit=unit, origin='unix', errors='raise')
                 if min_dt < as_dt < max_dt:
-                    is_timestamp = True
+                    is_datetime = True
                     break
             except Exception:
                 pass
-    if is_timestamp:
-        return dtype.timestamp
-
     # check if element represents a date-like object.
     # here we don't check for a validity range like with unix-timestamps
     # because dates as string usually represent something more general than
     # just the number of seconds since an epoch.
     try:
         as_dt = pd.to_datetime(element, errors='raise')
+        is_datetime = True
+    except Exception:
+        pass
+    # finally, if element is represents a datetime object, check if only
+    # date part is contained (no time information)
+    if is_datetime:
         # round element day (drop hour/minute/second)
         dt_d = as_dt.to_period('D').to_timestamp()
         # if rounded datetime equals the datetime itself, it means there was not
         # hour/minute/second information to begin with. Mind the 'localize' to
         # avoid time-zone BS to kick in.
-        if dt_d == as_dt.tz_localize(None):
-            is_date = True
-        else:
-            is_datetime = True
-    except Exception:
-        pass
-
-    if is_datetime:
-        return dtype.datetime
+        is_date = dt_d == as_dt.tz_localize(None)
     if is_date:
         return dtype.date
+    if is_datetime:
+        return dtype.datetime
 
     return None
 

From 7ecbf7ffc86bb61d73da221fdca98c16a530a522 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Mon, 21 Aug 2023 15:52:07 +0000
Subject: [PATCH 08/16] minor change to de-duplicate code.

---
 type_infer/infer.py | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index 836f9e1..b89ce9a 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -144,25 +144,17 @@ def type_check_date(element: object) -> str:
     # rather treated as every-day numbers.
     min_dt = pd.to_datetime('1970-01-01 00:00:00', utc=True)
     max_dt = pd.to_datetime('2038-01-19 03:14:08', utc=True)
-    valid_units = ['ns', 'us', 'ms', 's', 'D']
-    for unit in valid_units:
-        # Yes, some people still use Julian Days...
-        if unit == 'D':
-            try:
-                as_dt = pd.to_datetime(element, unit=unit, origin='julian', errors='raise')
-                if min_dt < as_dt < max_dt:
-                    is_datetime = True
-                    break
-            except Exception:
-                pass
-        else:
-            try:
-                as_dt = pd.to_datetime(element, unit=unit, origin='unix', errors='raise')
-                if min_dt < as_dt < max_dt:
-                    is_datetime = True
-                    break
-            except Exception:
-                pass
+    valid_units = {'ns': 'unix', 'us': 'unix', 'ms': 'unix', 's': 'unix',
+    # Yes, some people still use Julian Days...
+                   'D': 'julian'}
+    for unit, origin in valid_units.items():
+        try:
+            as_dt = pd.to_datetime(element, unit=unit, origin=origin, errors='raise')
+            if min_dt < as_dt < max_dt:
+                is_datetime = True
+                break
+        except Exception:
+            pass
     # check if element represents a date-like object.
     # here we don't check for a validity range like with unix-timestamps
     # because dates as string usually represent something more general than

From 4f1fe29886d76d39a37db2917b9c81af14464103 Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Mon, 21 Aug 2023 15:52:39 +0000
Subject: [PATCH 09/16] fixed typo

---
 type_infer/infer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index b89ce9a..33dd73b 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -145,7 +145,6 @@ def type_check_date(element: object) -> str:
     min_dt = pd.to_datetime('1970-01-01 00:00:00', utc=True)
     max_dt = pd.to_datetime('2038-01-19 03:14:08', utc=True)
     valid_units = {'ns': 'unix', 'us': 'unix', 'ms': 'unix', 's': 'unix',
-    # Yes, some people still use Julian Days...
                    'D': 'julian'}
     for unit, origin in valid_units.items():
         try:

From d6ed8fa41913585846d09047358962970d14dddb Mon Sep 17 00:00:00 2001
From: Pedro Fluxa <pedro@mindsdb.com>
Date: Mon, 21 Aug 2023 15:53:11 +0000
Subject: [PATCH 10/16] conform to pylint and flake8

---
 type_infer/infer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/type_infer/infer.py b/type_infer/infer.py
index 33dd73b..dd329f3 100644
--- a/type_infer/infer.py
+++ b/type_infer/infer.py
@@ -148,7 +148,8 @@ def type_check_date(element: object) -> str:
                    'D': 'julian'}
     for unit, origin in valid_units.items():
         try:
-            as_dt = pd.to_datetime(element, unit=unit, origin=origin, errors='raise')
+            as_dt = pd.to_datetime(element, unit=unit, origin=origin,
+                                   errors='raise')
             if min_dt < as_dt < max_dt:
                 is_datetime = True
                 break

From 6290c8595fc6b8ab434243cdac10bac94cd562ff Mon Sep 17 00:00:00 2001
From: Tom Hudson <34073127+tomhuds@users.noreply.github.com>
Date: Tue, 26 Sep 2023 14:39:18 -0700
Subject: [PATCH 11/16] Delete .github/workflows/add_to_bugs_project.yml

---
 .github/workflows/add_to_bugs_project.yml | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 .github/workflows/add_to_bugs_project.yml

diff --git a/.github/workflows/add_to_bugs_project.yml b/.github/workflows/add_to_bugs_project.yml
deleted file mode 100644
index 1326053..0000000
--- a/.github/workflows/add_to_bugs_project.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Add issue to bugs project
-
-on:
-  issues:
-    types:
-      - opened
-
-jobs:
-  add-to-project:
-    name: Add issue to bugs project
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/add-to-project@v0.4.0
-        with:
-          # You can target a repository in a different organization
-          # to the issue
-          project-url: https://github.com/orgs/mindsdb/projects/53
-          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
-          labeled: bug

From 1f5d2713273090a617faacfe82c5d5cbbb2d9249 Mon Sep 17 00:00:00 2001
From: Tom Hudson <34073127+tomhuds@users.noreply.github.com>
Date: Tue, 26 Sep 2023 14:39:33 -0700
Subject: [PATCH 12/16] Delete .github/workflows/add_to_docs_project.yml

---
 .github/workflows/add_to_docs_project.yml | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 .github/workflows/add_to_docs_project.yml

diff --git a/.github/workflows/add_to_docs_project.yml b/.github/workflows/add_to_docs_project.yml
deleted file mode 100644
index ac34b2e..0000000
--- a/.github/workflows/add_to_docs_project.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Add issue to docs project
-
-on:
-  issues:
-    types:
-      - opened
-
-jobs:
-  add-to-project:
-    name: Add issue to docs project
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/add-to-project@v0.4.0
-        with:
-          # You can target a repository in a different organization
-          # to the issue
-          project-url: https://github.com/orgs/mindsdb/projects/32
-          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
-          labeled: documentation

From ab9c8dc73ae98b45698dd67c1a8a1afde85513d1 Mon Sep 17 00:00:00 2001
From: Tom Hudson <34073127+tomhuds@users.noreply.github.com>
Date: Tue, 26 Sep 2023 14:39:42 -0700
Subject: [PATCH 13/16] Delete .github/workflows/add_to_roadmap_project.yml

---
 .github/workflows/add_to_roadmap_project.yml | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 .github/workflows/add_to_roadmap_project.yml

diff --git a/.github/workflows/add_to_roadmap_project.yml b/.github/workflows/add_to_roadmap_project.yml
deleted file mode 100644
index 4aec947..0000000
--- a/.github/workflows/add_to_roadmap_project.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Add issue to roadmap project
-
-on:
-  issues:
-    types:
-      - opened
-
-jobs:
-  add-to-project:
-    name: Add issue to roadmap project
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/add-to-project@v0.4.0
-        with:
-          # You can target a repository in a different organization
-          # to the issue
-          project-url: https://github.com/orgs/mindsdb/projects/54
-          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
-          labeled: enhancement

From 20142b85018ed6c9ae5b02dab67da47112bc6e43 Mon Sep 17 00:00:00 2001
From: Tom Hudson <34073127+tomhuds@users.noreply.github.com>
Date: Tue, 26 Sep 2023 14:39:56 -0700
Subject: [PATCH 14/16] Add files via upload

---
 .github/workflows/add_to_pr_review.yml          | 16 ++++++++++++++++
 .github/workflows/add_to_roadmap_project_v2.yml | 14 ++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 .github/workflows/add_to_pr_review.yml
 create mode 100644 .github/workflows/add_to_roadmap_project_v2.yml

diff --git a/.github/workflows/add_to_pr_review.yml b/.github/workflows/add_to_pr_review.yml
new file mode 100644
index 0000000..384f2be
--- /dev/null
+++ b/.github/workflows/add_to_pr_review.yml
@@ -0,0 +1,16 @@
+name: Add Pull Requests to PR review project
+
+on:
+  pull_request:
+    types:
+      - opened
+
+jobs:
+  add-to-project:
+    name: Add issue to project
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/add-to-project@v0.5.0
+        with:
+          project-url: https://github.com/orgs/mindsdb/projects/65
+          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
diff --git a/.github/workflows/add_to_roadmap_project_v2.yml b/.github/workflows/add_to_roadmap_project_v2.yml
new file mode 100644
index 0000000..240c700
--- /dev/null
+++ b/.github/workflows/add_to_roadmap_project_v2.yml
@@ -0,0 +1,14 @@
+name: Add issue to roadmap project
+on:
+  issues:
+    types:
+      - opened
+jobs:
+  add-to-project:
+    name: Add issue to roadmap project
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/add-to-project@v0.4.0
+        with:
+          project-url: https://github.com/orgs/mindsdb/projects/53
+          github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
\ No newline at end of file

From 01cec97ffc0e97876402d2545218af458b8a5b6f Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Mon, 27 Nov 2023 13:54:35 +0900
Subject: [PATCH 15/16] version bump: 0.0.16

---
 pyproject.toml         | 2 +-
 type_infer/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3cb3478..61d3bf9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "type_infer"
-version = "0.0.15"
+version = "0.0.16"
 description = "Automated type inference for Machine Learning pipelines."
 authors = ["MindsDB Inc. <hello@mindsdb.com>"]
 license = "GPL-3.0"
diff --git a/type_infer/__init__.py b/type_infer/__init__.py
index e163350..2e45234 100644
--- a/type_infer/__init__.py
+++ b/type_infer/__init__.py
@@ -4,7 +4,7 @@
 from type_infer import helpers
 
 
-__version__ = '0.0.15'
+__version__ = '0.0.16'
 
 
 __all__ = ['base', 'dtype', 'infer', 'helpers', '__version__']

From 2a2ff0c5231c4843bb6c629b3d604dccbfa4b10e Mon Sep 17 00:00:00 2001
From: Patricio Cerda Mardini <patricio.mardini@mindsdb.com>
Date: Wed, 29 Nov 2023 19:10:39 +0900
Subject: [PATCH 16/16] update docs deps

---
 .github/workflows/docs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 226c7e3..cfbc836 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -22,7 +22,7 @@ jobs:
       run: |
         sudo apt install pandoc
         python -m pip install --upgrade pip
-        pip install install 'Sphinx==4.1.2' 'sphinx-autoapi==1.8.4' 'sphinx-autodoc-typehints==1.12.0' 'sphinx-code-include==1.1.1' 'sphinx-rtd-theme==0.5.2' 'sphinxcontrib-applehelp==1.0.2' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.0' 'sphinxcontrib-jsmath==1.0.1' 'sphinxcontrib-napoleon==0.7' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec
+        pip install install 'Sphinx==6.2.1' 'sphinx-autoapi==3.0.0' 'sphinx-autodoc-typehints' 'sphinx-code-include' 'sphinx-rtd-theme' 'sphinxcontrib-applehelp' 'sphinxcontrib-devhelp' 'sphinxcontrib-htmlhelp' 'sphinxcontrib-jsmath' 'sphinxcontrib-napoleon' 'sphinxcontrib-qthelp' 'sphinxcontrib-serializinghtml' autoapi nbsphinx myst_parser pandoc jupyter matplotlib imblearn fsspec
         pip install --no-cache-dir -e .
     - name: Make the docs
       run: |