From 8bde85d85470c197e791bfcfd65f884ab09b950d Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 19 Mar 2025 14:46:06 +0000 Subject: [PATCH 1/3] feat: updates time partitioning --- sqlalchemy_bigquery/base.py | 47 ++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 0204bc92..dd3c2aab 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -832,14 +832,11 @@ def _process_time_partitioning( function returns: "PARTITION BY TIMESTAMP_TRUNC(event_timestamp, DAY)". - Current inputs allowed by BQ and covered by this function include: + Current inputs allowed by BQ AND covered by this function include: * _PARTITIONDATE * DATETIME_TRUNC(, DAY/HOUR/MONTH/YEAR) * TIMESTAMP_TRUNC(, DAY/HOUR/MONTH/YEAR) * DATE_TRUNC(, MONTH/YEAR) - - Additional options allowed by BQ but not explicitly covered by this - function include: * DATE(_PARTITIONTIME) * DATE() * DATE() @@ -847,12 +844,40 @@ def _process_time_partitioning( """ sqltypes = { - "_PARTITIONDATE": ("_PARTITIONDATE", None), - "TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), - "DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), - "DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}), + # column_type | truncation func OR default value | partitioning_period(s) + + "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period + "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period + "DATE": { + "no_period": (None, None), # date_column, no trunc_fn, no period + "period": ("DATE_TRUNC", {"MONTH", "YEAR"}), # date_column, trunc_fn, period(s) + }, + "DATETIME": { + "no_period": ("DATE", None), # datetime_column, trunc_fn, no period + "period": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # datetime_column, trunc_fn, period(s) + }, + "TIMESTAMP": { + "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period + "period": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # timestamp_column, trunc_fn, period(s) + }, } + def parse_sqltypes(coltype, partitioning_period): + """Returns the default value OR the truncation function to be used + and the allowed partitioning periods. + """ + + if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}: + return sqltypes[coltype] + + # by this point, value must be a nested dict + if partitioning_period is None: + # use "no_period" key + return sqltypes[coltype]["no_period"] + else: + # use "period" key + return sqltypes[coltype]["period"] + # Extract field (i.e or _PARTITIONDATE) # AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE", # "DATETIME", "_PARTITIONDATE") @@ -870,14 +895,14 @@ def _process_time_partitioning( # immediately overwritten by python-bigquery to a default of DAY. partitioning_period = time_partitioning.type_ - # Extract the truncation_function (i.e. DATE_TRUNC) + # Extract the default value or truncation_function (i.e. DATE_TRUNC()) # and the set of allowable partition_periods # that can be used in that function - trunc_fn, allowed_partitions = sqltypes[column_type] + trunc_fn, allowed_partitions = parse_sqltypes(column_type, time_partitioning) # Create output: # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period - if trunc_fn == "_PARTITIONDATE": + if trunc_fn is None or trunc_fn in {"_PARTITIONDATE"}: return f"PARTITION BY {field}" # Special Case: BigQuery will not accept DAY as partitioning_period for From d68cf1740fe692e1c484875a4989885b12a397e5 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 19 Mar 2025 14:50:12 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- sqlalchemy_bigquery/base.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index dd3c2aab..cdd9aeb3 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -845,20 +845,28 @@ def _process_time_partitioning( sqltypes = { # column_type | truncation func OR default value | partitioning_period(s) - - "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period - "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period + "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period + "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period "DATE": { - "no_period": (None, None), # date_column, no trunc_fn, no period - "period": ("DATE_TRUNC", {"MONTH", "YEAR"}), # date_column, trunc_fn, period(s) - }, + "no_period": (None, None), # date_column, no trunc_fn, no period + "period": ( + "DATE_TRUNC", + {"MONTH", "YEAR"}, + ), # date_column, trunc_fn, period(s) + }, "DATETIME": { - "no_period": ("DATE", None), # datetime_column, trunc_fn, no period - "period": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # datetime_column, trunc_fn, period(s) - }, + "no_period": ("DATE", None), # datetime_column, trunc_fn, no period + "period": ( + "DATETIME_TRUNC", + {"DAY", "HOUR", "MONTH", "YEAR"}, + ), # datetime_column, trunc_fn, period(s) + }, "TIMESTAMP": { - "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period - "period": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # timestamp_column, trunc_fn, period(s) + "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period + "period": ( + "TIMESTAMP_TRUNC", + {"DAY", "HOUR", "MONTH", "YEAR"}, + ), # timestamp_column, trunc_fn, period(s) }, } @@ -866,10 +874,10 @@ def parse_sqltypes(coltype, partitioning_period): """Returns the default value OR the truncation function to be used and the allowed partitioning periods. """ - + if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}: return sqltypes[coltype] - + # by this point, value must be a nested dict if partitioning_period is None: # use "no_period" key From 3954351c329ab9abaea842c251b4f46943822f5c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 20 Mar 2025 15:18:32 +0000 Subject: [PATCH 3/3] Updates function argument: partitioning_period --- sqlalchemy_bigquery/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index cdd9aeb3..472c9dd4 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -906,7 +906,7 @@ def parse_sqltypes(coltype, partitioning_period): # Extract the default value or truncation_function (i.e. DATE_TRUNC()) # and the set of allowable partition_periods # that can be used in that function - trunc_fn, allowed_partitions = parse_sqltypes(column_type, time_partitioning) + trunc_fn, allowed_partitions = parse_sqltypes(column_type, partitioning_period) # Create output: # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period