Skip to content

Commit

Permalink
handle null dates
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasfcosta committed Dec 2, 2024
1 parent 7e267f9 commit c41b18e
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions apps/api/src/python/query/bigquery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,14 @@ def _briefer_make_bq_query():
if bq_type in ['DATE', 'DATETIME']:
df[col] = pd.to_datetime(df[col], errors='coerce')
elif bq_type == 'TIMESTAMP':
# Define fallback and maximum possible date
# This is useful because timestamps that are too small or large for pandas are converted to NaT
# handle out-of-bounds dates
fallback_date = datetime(1, 1, 1, 0, 0, 0, 0, tzinfo=pytz.utc)
max_date = datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc)
# Replace large values with the maximum possible date
df[col] = df[col].apply(lambda x: max_date if x > max_date else x)
# Replace small values with the fallback date
df[col] = df[col].apply(lambda x: fallback_date if x < fallback_date else x)
df[col] = df[col].apply(lambda x: max_date if not pd.isnull(x) and x > max_date else x)
df[col] = df[col].apply(lambda x: fallback_date if not pd.isnull(x) and x < fallback_date else x)
# make sure timezone is in utc and preserve timezone info
df[col] = df[col].apply(lambda x: x.astimezone(pytz.utc) if x.tzinfo else x)
df[col] = df[col].apply(lambda x: x.astimezone(pytz.utc) if not pd.isnull(x) and x.tzinfo else x)
elif bq_type == 'TIME':
df[col] = pd.to_datetime(df[col], errors='coerce').dt.time
elif bq_type == 'NUMERIC':
Expand Down

0 comments on commit c41b18e

Please sign in to comment.