From 9cc17f7b9754add7668242ce011da678c7f7c155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Fri, 7 Feb 2025 09:04:42 +0000 Subject: [PATCH 1/4] init --- .../unreleased/Features-20250207-090315.yaml | 6 +++++ .../bigquery/macros/adapters/datetime.sql | 27 +++++++++++++++++++ .../macros/materializations/incremental.sql | 12 ++++++++- .../incremental_strategy/microbatch.sql | 8 ++++++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 dbt-bigquery/.changes/unreleased/Features-20250207-090315.yaml create mode 100644 dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql diff --git a/dbt-bigquery/.changes/unreleased/Features-20250207-090315.yaml b/dbt-bigquery/.changes/unreleased/Features-20250207-090315.yaml new file mode 100644 index 000000000..b983e38c8 --- /dev/null +++ b/dbt-bigquery/.changes/unreleased/Features-20250207-090315.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Leverage static partitioning in dbt-bigquery materialization strategy +time: 2025-02-07T09:03:15.963098Z +custom: + Author: borjav + Issue: "538" diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql new file mode 100644 index 000000000..754a005e1 --- /dev/null +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql @@ -0,0 +1,27 @@ +{% macro generate_dates_in_range(start_date_datetime, end_date_datetime, granularity) %} + {#-- Generate a list of datetimes between two dates #} + {% set total_offset_seconds = (end_date_datetime - start_date_datetime).total_seconds() %} + {% set total_offset_hours = (total_offset_seconds/3600)|int %} + {% set total_offset_days = (total_offset_hours/24)|int %} + + {% if granularity == "day" %} + {% set total_offset = total_offset_days %} + {% set fmt_out = '"%Y-%m-%d"' %} + {% elif granularity == "hour" %} + {% set total_offset = total_offset_hours %} + {% set fmt_out = '"%Y-%m-%d %H:%M:%S"' %} + {% endif %} + + {% set date_list = [] %} + {% for i in range(0, total_offset ) -%} + {% if granularity == "day" %} + {% set delta = modules.datetime.timedelta(days = i) %} + {% elif granularity == "hour" %} + {% set delta = modules.datetime.timedelta(hours = i) %} + {% endif %} + {% set this_date = start_date_datetime + delta %} + {% set _ = date_list.append(this_date.strftime(fmt_out)) %} + {% endfor -%} + {{ log("date_list: " ~ date_list,info=true) }} + {{ return(date_list) }} +{% endmacro %} diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql index 25a83b0c6..0e8bb02c3 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql @@ -69,6 +69,16 @@ {% endmacro %} + +{% macro set_partitions(strategy, granularity) %} + {#-- We override the partitions to force a static insert overwrite on microbatch, significantly more performant --#} + {% if strategy == "microbatch" %} + {{ return(bq_generate_static_partitions(config, granularity)) }} + {% else %} + {{ return(config.get('partitions', none)) }} + {% endif %} +{% endmacro %} + {% materialization incremental, adapter='bigquery', supported_languages=['sql', 'python'] -%} {%- set unique_key = config.get('unique_key') -%} @@ -84,7 +94,7 @@ {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%} - {%- set partitions = config.get('partitions', none) -%} + {%- set partitions = set_partitions(strategy, partition_by.granularity) -%} {%- set cluster_by = config.get('cluster_by', none) -%} {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql index d4c4b7453..c28ebfaef 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql @@ -26,3 +26,11 @@ {{ return(build_sql) }} {% endmacro %} + +{% macro bq_generate_static_partitions(config, granularity) %} + {% set event_time_start = config.get("__dbt_internal_microbatch_event_time_start") %} + {% set event_time_end = config.get("__dbt_internal_microbatch_event_time_end") %} + {% set range_of_dates = generate_dates_in_range(event_time_start, event_time_end, granularity) %} + + {{ return(range_of_dates) }} +{% endmacro %} From c31a329618760a53c1fe8cc50f150f302257d492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Fri, 7 Feb 2025 14:45:51 +0000 Subject: [PATCH 2/4] Update incremental.sql --- .../include/bigquery/macros/materializations/incremental.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql index 0e8bb02c3..b762b6de9 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql @@ -70,7 +70,7 @@ {% endmacro %} -{% macro set_partitions(strategy, granularity) %} +{% macro set_partitions(config, strategy, granularity) %} {#-- We override the partitions to force a static insert overwrite on microbatch, significantly more performant --#} {% if strategy == "microbatch" %} {{ return(bq_generate_static_partitions(config, granularity)) }} @@ -94,7 +94,7 @@ {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%} - {%- set partitions = set_partitions(strategy, partition_by.granularity) -%} + {%- set partitions = set_partitions(config, strategy, partition_by.granularity) -%} {%- set cluster_by = config.get('cluster_by', none) -%} {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} From 242633113fae9c23bd6299f2a2c5d777d137be07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Sun, 9 Feb 2025 11:36:31 +0000 Subject: [PATCH 3/4] backwars compatible copy_partitions --- .../bigquery/macros/materializations/incremental.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql index b762b6de9..94a44b432 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql @@ -70,10 +70,10 @@ {% endmacro %} -{% macro set_partitions(config, strategy, granularity) %} +{% macro set_partitions(config, strategy, partition_by) %} {#-- We override the partitions to force a static insert overwrite on microbatch, significantly more performant --#} - {% if strategy == "microbatch" %} - {{ return(bq_generate_static_partitions(config, granularity)) }} + {% if strategy == "microbatch" and partition_by.copy_partitions is False%} + {{ return(bq_generate_static_partitions(config, partition_by.granularity)) }} {% else %} {{ return(config.get('partitions', none)) }} {% endif %} @@ -94,7 +94,7 @@ {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%} - {%- set partitions = set_partitions(config, strategy, partition_by.granularity) -%} + {%- set partitions = set_partitions(config, strategy, partition_by) -%} {%- set cluster_by = config.get('cluster_by', none) -%} {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} From 54c8df9e8c39479c7339b76e40568cbd1ea01884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Sun, 9 Feb 2025 17:22:05 +0000 Subject: [PATCH 4/4] fix --- .../src/dbt/include/bigquery/macros/adapters/datetime.sql | 1 - .../include/bigquery/macros/materializations/incremental.sql | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql index 754a005e1..ad2fe47db 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/adapters/datetime.sql @@ -22,6 +22,5 @@ {% set this_date = start_date_datetime + delta %} {% set _ = date_list.append(this_date.strftime(fmt_out)) %} {% endfor -%} - {{ log("date_list: " ~ date_list,info=true) }} {{ return(date_list) }} {% endmacro %} diff --git a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql index 94a44b432..ed1a93910 100644 --- a/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt-bigquery/src/dbt/include/bigquery/macros/materializations/incremental.sql @@ -72,7 +72,7 @@ {% macro set_partitions(config, strategy, partition_by) %} {#-- We override the partitions to force a static insert overwrite on microbatch, significantly more performant --#} - {% if strategy == "microbatch" and partition_by.copy_partitions is False%} + {% if strategy == "microbatch" and partition_by.copy_partitions is false %} {{ return(bq_generate_static_partitions(config, partition_by.granularity)) }} {% else %} {{ return(config.get('partitions', none)) }}