Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,18 @@ models:
compare_model: ref('other_table_name')
exclude_columns:
- third_column

# if the columns to be compared have different names, you can match them up like this
- name: model_name_different_names
tests:
- dbt_utils.equality:
compare_model: ref('other_table_name')
compare_columns:
- first_column
# This will compare `model_name_different_names.second_column_in_model`
# and `other_table_name.second_column_in_other_table`
- [second_column_in_model, second_column_in_other_table]
precision: 4
```

### expression_is_true ([source](macros/generic_tests/expression_is_true.sql))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
col_a,tbl_a_col_b,tbl_a_col_float
1,1,1.100005
1,2,1.200005
2,3,1.300005
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
tbl_b_col_b,col_a,tbl_b_col_float
1,1,1.100006
2,1,1.200007
3,2,1.300008
22 changes: 22 additions & 0 deletions integration_tests/models/generic_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,28 @@ seeds:
exclude_columns:
- col_c

- name: data_test_equality_different_column_names_a
data_tests:
- dbt_utils.equality:
compare_model: ref('data_test_equality_different_column_names_b')
compare_columns:
- col_a
- [tbl_a_col_b, tbl_b_col_b]
- dbt_utils.equality:
compare_model: ref('data_test_equality_different_column_names_b')
compare_columns:
- col_a
- [tbl_a_col_float, tbl_b_col_float]
precision: 4
- dbt_utils.equality:
compare_model: ref('data_test_equality_different_column_names_b')
compare_columns:
- col_a
- [tbl_a_col_float, tbl_b_col_float]
precision: 8
error_if: "<1" #sneaky way to ensure that the test is returning failing rows
warn_if: "<0"

- name: data_test_equality_floats_a
data_tests:
# test precision only
Expand Down
130 changes: 81 additions & 49 deletions macros/generic_tests/equality.sql
Original file line number Diff line number Diff line change
Expand Up @@ -63,42 +63,15 @@
{%- endif -%}

{% if compare_columns_set != compare_model_columns_set %}
{{ exceptions.raise_compiler_error(compare_model ~" has less columns than " ~ model ~ ", please ensure they have the same columns or use the `compare_columns` or `exclude_columns` arguments to subset them.") }}
{{ exceptions.raise_compiler_error(compare_model ~" has different columns than " ~ model ~ ", please ensure they have the same columns or use the `compare_columns` or `exclude_columns` arguments to subset them.") }}
{% endif %}


{% endif %}

{%- if not precision -%}
{%- if not compare_columns -%}
{#
You cannot get the columns in an ephemeral model (due to not existing in the information schema),
so if the user does not provide an explicit list of columns we must error in the case it is ephemeral
#}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set compare_columns = adapter.get_columns_in_relation(model)-%}

{%- if exclude_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set exclude_columns = exclude_columns | map("lower") | list %}

{# Filter out the excluded columns #}
{%- set include_columns = [] %}
{%- for column in compare_columns -%}
{%- if column.name | lower not in exclude_columns -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}

{%- set compare_columns = include_columns | map(attribute='quoted') %}
{%- else -%} {# Compare columns provided #}
{%- set compare_columns = compare_columns | map(attribute='quoted') %}
{%- endif -%}
{%- endif -%}

{% set compare_cols_csv = compare_columns | join(', ') %}

{% else %} {# Precision required #}
{# If testing with precision, then find out which columns in the main input model are numeric #}
{%- set numeric_columns = {} -%}
{%- if precision -%}
{#-
If rounding is required, we need to get the types, so it cannot be ephemeral even if they provide column names
-#}
Expand All @@ -107,23 +80,82 @@

{% set columns_list = [] %}
{%- for col in columns -%}
{%- if (
(col.name|lower in compare_columns|map('lower') or not compare_columns) and
(col.name|lower not in exclude_columns|map('lower') or not exclude_columns)
) -%}
{# Databricks double type is not picked up by any number type checks in dbt #}
{%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
{# Cast is required due to postgres not having round for a double precision number #}
{%- do columns_list.append('round(cast(' ~ col.quoted ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.quoted) -%}
{%- else -%} {# Non-numeric type #}
{%- do columns_list.append(col.quoted) -%}
{%- endif -%}
{% endif %}
{# Databricks double type is not picked up by any number type checks in dbt #}
{%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
{#- Lower case the column name for easier case-insensitive comparison -#}
{%- do numeric_columns.update({col.name|lower: true}) -%}
{#- Also include the quoted version, since we may see it as well. -#}
{%- do numeric_columns.update({col.quoted|lower: true}) -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}

{% set compare_cols_csv = columns_list | join(', ') %}
{# If compare_columns is provided, sort any given arrays into lists of columns for each model #}
{%- if compare_columns -%}
{%- set compare_columns__model = [] %}
{%- set compare_columns__compare_model = [] %}

{%- for column in compare_columns -%}
{%- if column is string -%}
{# A simple string was given. Assume the same column name in both models. #}
{%- do compare_columns__model.append(column) -%}
{%- do compare_columns__compare_model.append(column) -%}
{%- elif column is iterable and column | length == 2 -%}
{%- do compare_columns__model.append(column[0]) -%}
{%- do compare_columns__compare_model.append(column[1]) -%}
{%- else -%}
{{ exceptions.raise_compiler_error("compare_columns must be a string or a list of 2 strings") }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{#
You cannot get the columns in an ephemeral model (due to not existing in the information schema),
so if the user does not provide an explicit list of columns we must error in the case it is ephemeral
#}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set model_columns = adapter.get_columns_in_relation(model)-%}

{% endif %}
{%- if exclude_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set exclude_columns = exclude_columns | map("lower") | list %}
{%- endif -%}

{# Filter out the excluded columns #}
{%- set include_columns = [] %}
{%- for column in model_columns -%}
{%- if (not exclude_columns) or (column.name | lower not in exclude_columns) -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}

{# Assume same column names in the comparison model, since no alternates were given using compare_columns. #}
{%- set compare_columns__model = include_columns | map(attribute='quoted') | list %}
{%- set compare_columns__compare_model = compare_columns__model %}
{%- endif -%}

{# Build comma-delimited lists of column names for each input model. Round numeric types as needed. #}
{%- set compare_columns_csv = [] -%}
{%- set numeric_column_indexes_in_first_model = [] -%}
{%- for this_model_compare_columns in [compare_columns__model, compare_columns__compare_model] -%}
{%- set columns_list = [] %}
{%- set is_first_model = loop.first -%}

{%- for this_compare_column in this_model_compare_columns -%}
{# NOTE: We assume any numeric columns in the first model are also numeric in the second model #}
{%- if (is_first_model and this_compare_column|lower in numeric_columns) or (loop.index0 in numeric_column_indexes_in_first_model) -%}
{# Cast is required due to postgres not having round for a double precision number #}
{%- do columns_list.append('round(cast(' ~ this_compare_column ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ this_compare_column) -%}

{%- if is_first_model -%}
{%- do numeric_column_indexes_in_first_model.append(loop.index0) -%}
{%- endif -%}
{%- else -%} {# Non-numeric type #}
{%- do columns_list.append(this_compare_column) -%}
{%- endif -%}
{%- endfor -%}

{%- do compare_columns_csv.append(columns_list | join(', ')) -%}
{%- endfor -%}

with a as (

Expand All @@ -139,17 +171,17 @@ b as (

a_minus_b as (

select {{compare_cols_csv}} from a
select {{compare_columns_csv[0]}} from a
{{ dbt.except() }}
select {{compare_cols_csv}} from b
select {{compare_columns_csv[1]}} from b

),

b_minus_a as (

select {{compare_cols_csv}} from b
select {{compare_columns_csv[1]}} from b
{{ dbt.except() }}
select {{compare_cols_csv}} from a
select {{compare_columns_csv[0]}} from a

),

Expand Down
Loading