From d0535980c9f072076e3c026087a7ba73650ff09b Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Mon, 26 May 2025 17:14:49 +0200 Subject: [PATCH 1/6] url decode encode --- macros/web/url_decode.sql | 246 ++++++++++++++++++++++++++++++++++++++ macros/web/url_encode.sql | 42 +++++++ 2 files changed, 288 insertions(+) create mode 100644 macros/web/url_decode.sql create mode 100644 macros/web/url_encode.sql diff --git a/macros/web/url_decode.sql b/macros/web/url_decode.sql new file mode 100644 index 00000000..4438b7ca --- /dev/null +++ b/macros/web/url_decode.sql @@ -0,0 +1,246 @@ +{% macro url_decode(string_to_decode) -%} + {{ return(adapter.dispatch('url_decode', 'dbt_utils')(string_to_decode)) }} +{% endmacro %} + +{% macro default__url_decode(string_to_decode) -%} + + {%- set url_characters_encodings = + [ + ['€', '%E2%82%AC', 8364], + ['‚', '%E2%80%9A', 8218], + ['„', '%E2%80%9E', 8222], + ['…', '%E2%80%A6', 8230], + ['†', '%E2%80%A0', 8224], + ['‡', '%E2%80%A1', 8225], + ['‰', '%E2%80%B0', 8240], + ['‹', '%E2%80%B9', 8249], + ['single quote', '%E2%80%98', 8216], + ['single quote', '%E2%80%99', 8217], + ['"', '%E2%80%9C', 8220], + ['"', '%E2%80%9D', 8221], + ['•', '%E2%80%A2', 8226], + ['–', '%E2%80%93', 8211], + ['—', '%E2%80%94', 8212], + ['™', '%E2%84', 8482], + ['›', '%E2%80', 8250], + ['ƒ', '%C6%92', 402], + ['Š', '%C5%A0', 352], + ['Œ', '%C5%92', 338], + ['', '%C5%8D', 1036], + ['Ž', '%C5%BD', 381], + ['š', '%C5%A1', 353], + ['œ', '%C5%93', 339], + ['ž', '%C5%BE', 382], + ['Ÿ', '%C5%B8', 376], + ['ˆ', '%CB%86', 710], + ['˜', '%CB%9C', 732], + [' ', '%C2%A0', 160], + ['¡', '%C2%A1', 161], + ['¢', '%C2%A2', 162], + ['£', '%C2%A3', 163], + ['¤', '%C2%A4', 164], + ['¥', '%C2%A5', 165], + ['¦', '%C2%A6', 166], + ['§', '%C2%A7', 167], + ['¨', '%C2%A8', 168], + ['©', '%C2%A9', 169], + ['ª', '%C2%AA', 170], + ['«', '%C2%AB', 171], + ['¬', '%C2%AC', 172], + ['­', '%C2%AD', 173], + ['®', '%C2%AE', 174], + ['¯', '%C2%AF', 175], + ['°', '%C2%B0', 176], + ['±', '%C2%B1', 177], + ['²', '%C2%B2', 178], + ['³', '%C2%B3', 179], + ['´', '%C2%B4', 180], + ['µ', '%C2%B5', 181], + ['¶', '%C2%B6', 182], + ['·', '%C2%B7', 183], + ['¸', '%C2%B8', 184], + ['¹', '%C2%B9', 185], + ['º', '%C2%BA', 186], + ['»', '%C2%BB', 187], + ['¼', '%C2%BC', 188], + ['½', '%C2%BD', 189], + ['¾', '%C2%BE', 190], + ['¿', '%C2%BF', 191], + ['', '%C2%90', 144], + ['À', '%C3%80', 192], + ['Á', '%C3%81', 193], + ['Â', '%C3%82', 194], + ['Ã', '%C3%83', 195], + ['Ä', '%C3%84', 196], + ['Å', '%C3%85', 197], + ['Æ', '%C3%86', 198], + ['Ç', '%C3%87', 199], + ['È', '%C3%88', 200], + ['É', '%C3%89', 201], + ['Ê', '%C3%8A', 202], + ['Ë', '%C3%8B', 203], + ['Ì', '%C3%8C', 204], + ['Í', '%C3%8D', 205], + ['Î', '%C3%8E', 206], + ['Ï', '%C3%8F', 207], + ['Ð', '%C3%90', 208], + ['Ñ', '%C3%91', 209], + ['Ò', '%C3%92', 210], + ['Ó', '%C3%93', 211], + ['Ô', '%C3%94', 212], + ['Õ', '%C3%95', 213], + ['Ö', '%C3%96', 214], + ['×', '%C3%97', 215], + ['Ø', '%C3%98', 216], + ['Ù', '%C3%99', 217], + ['Ú', '%C3%9A', 218], + ['Û', '%C3%9B', 219], + ['Ü', '%C3%9C', 220], + ['Ý', '%C3%9D', 221], + ['Þ', '%C3%9E', 222], + ['ß', '%C3%9F', 223], + ['à', '%C3%A0', 224], + ['á', '%C3%A1', 225], + ['â', '%C3%A2', 226], + ['ã', '%C3%A3', 227], + ['ä', '%C3%A4', 228], + ['å', '%C3%A5', 229], + ['æ', '%C3%A6', 230], + ['ç', '%C3%A7', 231], + ['è', '%C3%A8', 232], + ['é', '%C3%A9', 233], + ['ê', '%C3%AA', 234], + ['ë', '%C3%AB', 235], + ['ì', '%C3%AC', 236], + ['í', '%C3%AD', 237], + ['î', '%C3%AE', 238], + ['ï', '%C3%AF', 239], + ['ð', '%C3%B0', 240], + ['ñ', '%C3%B1', 241], + ['ò', '%C3%B2', 242], + ['ó', '%C3%B3', 243], + ['ô', '%C3%B4', 244], + ['õ', '%C3%B5', 245], + ['ö', '%C3%B6', 246], + ['÷', '%C3%B7', 247], + ['ø', '%C3%B8', 248], + ['ù', '%C3%B9', 249], + ['ú', '%C3%BA', 250], + ['û', '%C3%BB', 251], + ['ü', '%C3%BC', 252], + ['ý', '%C3%BD', 253], + ['þ', '%C3%BE', 254], + ['ÿ', '%C3%BF', 255], + ['*', '%2A', 42], + ['+', '%2B', 43], + [',', '%2C', 44], + ['-', '%2D', 45], + ['.', '%2E', 46], + ['/', '%2F', 47], + [':', '%3A', 58], + [';', '%3B', 59], + ['<', '%3C', 60], + ['=', '%3D', 61], + ['>', '%3E', 62], + ['?', '%3F', 63], + ['J', '%4A', 74], + ['K', '%4B', 75], + ['L', '%4C', 76], + ['M', '%4D', 77], + ['N', '%4E', 78], + ['O', '%4F', 79], + ['P', '%50', 80], + ['Q', '%51', 81], + ['R', '%52', 82], + ['S', '%53', 83], + ['T', '%54', 84], + ['U', '%55', 85], + ['V', '%56', 86], + ['W', '%57', 87], + ['X', '%58', 88], + ['Y', '%59', 89], + ['Z', '%5A', 90], + ['[', '%5B', 91], + ['backslash', '%5C', 92], + [']', '%5D', 93], + ['^', '%5E', 94], + ['_', '%5F', 95], + ['`', '%60', 96], + ['j', '%6A', 106], + ['k', '%6B', 107], + ['l', '%6C', 108], + ['m', '%6D', 109], + ['n', '%6E', 110], + ['o', '%6F', 111], + ['p', '%70', 112], + ['q', '%71', 113], + ['r', '%72', 114], + ['s', '%73', 115], + ['t', '%74', 116], + ['u', '%75', 117], + ['v', '%76', 118], + ['w', '%77', 119], + ['x', '%78', 120], + ['y', '%79', 121], + ['z', '%7A', 122], + ['{', '%7B', 123], + ['|', '%7C', 124], + ['}', '%7D', 125], + ['~', '%7E', 126], + [' ', '%7F', 127], + ['', '%81', 129], + ['', '%8F', 143], + ['', '%9D', 157], + [' ', '%20', 32], + ['!', '%21', 33], + ['"', '%22', 34], + ['#', '%23', 35], + ['$', '%24', 36], + ['%', '%25', 37], + ['&', '%26', 38], + ['single quote', '%27', 39], + ['(', '%28', 40], + [')', '%29', 41], + ['0', '%30', 48], + ['1', '%31', 49], + ['2', '%32', 50], + ['3', '%33', 51], + ['4', '%34', 52], + ['5', '%35', 53], + ['6', '%36', 54], + ['7', '%37', 55], + ['8', '%38', 56], + ['9', '%39', 57], + ['@', '%40', 64], + ['A', '%41', 65], + ['B', '%42', 66], + ['C', '%43', 67], + ['D', '%44', 68], + ['E', '%45', 69], + ['F', '%46', 70], + ['G', '%47', 71], + ['H', '%48', 72], + ['I', '%49', 73], + ['a', '%61', 97], + ['b', '%62', 98], + ['c', '%63', 99], + ['d', '%64', 100], + ['e', '%65', 101], + ['f', '%66', 102], + ['g', '%67', 103], + ['h', '%68', 104], + ['i', '%69', 105] + ] + -%} + + {%- set ns = namespace(returning_query = string_to_decode) -%} + + {%- for character, encoding, decimal_code in url_characters_encodings -%} + + {%- set ns.returning_query = "replace(" ~ ns.returning_query ~ ", '" ~ encoding ~ "', chr(" ~ decimal_code ~ "))" -%} + + {%- endfor -%} + + {{- ns.returning_query -}} + +{% endmacro %} \ No newline at end of file diff --git a/macros/web/url_encode.sql b/macros/web/url_encode.sql new file mode 100644 index 00000000..d401a63e --- /dev/null +++ b/macros/web/url_encode.sql @@ -0,0 +1,42 @@ +{% macro url_encode(string_to_encode) -%} + {{ return(adapter.dispatch('url_encode', 'dbt_utils')(string_to_encode)) }} +{% endmacro %} + +{% macro default__url_encode(string_to_encode) -%} + + {%- set url_characters_encodings = + [ + ['%', '%25', 37], + [':', '%3A', 58], + ['/', '%2F', 47], + ['?', '%3F', 63], + ['#', '%23', 35], + ['[', '%5B', 91], + [']', '%5D', 93], + ['@', '%40', 64], + ['!', '%21', 33], + ['$', '%24', 36], + ['&', '%26', 38], + ['single quote', '%27', 39], + ['(', '%28', 40], + [')', '%29', 41], + ['*', '%2A', 42], + ['+', '%2B', 43], + [',', '%2C', 44], + [';', '%3B', 59], + ['=', '%3D', 61], + [' ', '%20', 32] + ] + -%} + + {%- set ns = namespace(returning_query = string_to_encode) -%} + + {%- for character, encoding, decimal_code in url_characters_encodings -%} + + {%- set ns.returning_query = "replace(" ~ ns.returning_query ~ ", chr(" ~ decimal_code ~ "), '" ~ encoding ~ "')" -%} + + {%- endfor -%} + + {{- ns.returning_query -}} + +{% endmacro %} \ No newline at end of file From 735cc59f5581a7d622bdf084b1a0afba35b26a86 Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Mon, 26 May 2025 17:28:11 +0200 Subject: [PATCH 2/6] readme --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 378c5930..02b08a59 100644 --- a/README.md +++ b/README.md @@ -1297,6 +1297,26 @@ This macro extracts a page path from a column containing a url. {{ dbt_utils.get_url_path(field='page_url') }} ``` +### url_encode ([source](macros/web/url_encode.sql)) + +This macro encodes a string to be used in a url. + +**Usage:** + +```sql +{{ dbt_utils.url_encode('field_to_encode') }} +``` + +### url_decode ([source](macros/web/url_decode.sql)) + +This macro decodes a string that was encoded for a url use. + +**Usage:** + +```sql +{{ dbt_utils.url_decode('field_to_decode') }} +``` + ---- ## Cross-database macros From 503b3175c78788f744bf5f4bfda1e14e919efada Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Sun, 8 Jun 2025 15:01:29 +0200 Subject: [PATCH 3/6] Integration tests --- .../data/web/data_url_encode_decode.csv | 2 ++ integration_tests/models/web/schema.yml | 6 ++++++ .../models/web/test_url_encode_decode.sql | 21 +++++++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 integration_tests/data/web/data_url_encode_decode.csv create mode 100644 integration_tests/models/web/test_url_encode_decode.sql diff --git a/integration_tests/data/web/data_url_encode_decode.csv b/integration_tests/data/web/data_url_encode_decode.csv new file mode 100644 index 00000000..12240801 --- /dev/null +++ b/integration_tests/data/web/data_url_encode_decode.csv @@ -0,0 +1,2 @@ +encoded,decoded +https://www.getdbt.com?param=questionmark?space at@dollar$brackets()plus+star*percent%,https%3A%2F%2Fwww.getdbt.com%3Fparam%3Dquestionmark%3Fspace%20at%40dollar%24brackets%28%29plus%2Bstar%2Apercent%25 \ No newline at end of file diff --git a/integration_tests/models/web/schema.yml b/integration_tests/models/web/schema.yml index 7f02b317..328a4273 100644 --- a/integration_tests/models/web/schema.yml +++ b/integration_tests/models/web/schema.yml @@ -14,6 +14,12 @@ models: expected: expected - name: test_url_path + data_tests: + - assert_equal: + actual: actual + expected: expected + + - name: test_url_encode_decode data_tests: - assert_equal: actual: actual diff --git a/integration_tests/models/web/test_url_encode_decode.sql b/integration_tests/models/web/test_url_encode_decode.sql new file mode 100644 index 00000000..7f0aaf38 --- /dev/null +++ b/integration_tests/models/web/test_url_encode_decode.sql @@ -0,0 +1,21 @@ +with + +test_data as ( + + select * from {{ ref('data_url_encode_decode') }} + +) + +select + {{ url_encode('decoded') }} as actual, + encoded as expected + +from test_data + +union all + +select + {{ url_decode('encoded') }} as actual, + decoded as expected + +from test_data \ No newline at end of file From 02c9d9f01829f95da93d95aa5200e98051f40550 Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Mon, 9 Jun 2025 22:46:07 +0200 Subject: [PATCH 4/6] Fixed integration test --- .../models/web/test_url_encode_decode.sql | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/integration_tests/models/web/test_url_encode_decode.sql b/integration_tests/models/web/test_url_encode_decode.sql index 7f0aaf38..e7d6dfd8 100644 --- a/integration_tests/models/web/test_url_encode_decode.sql +++ b/integration_tests/models/web/test_url_encode_decode.sql @@ -4,18 +4,24 @@ test_data as ( select * from {{ ref('data_url_encode_decode') }} -) +), + +final as ( + + select + {{ dbt_utils.url_encode('decoded') }} as actual, + encoded as expected -select - {{ url_encode('decoded') }} as actual, - encoded as expected + from test_data -from test_data + union all -union all + select + {{ dbt_utils.url_decode('encoded') }} as actual, + decoded as expected -select - {{ url_decode('encoded') }} as actual, - decoded as expected + from test_data + +) -from test_data \ No newline at end of file +select * from final \ No newline at end of file From 7b8952cd2d010491a6ac76f9faa24bf911766747 Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Tue, 10 Jun 2025 19:12:44 +0200 Subject: [PATCH 5/6] Chars order; Fixed integration test; --- integration_tests/models/web/test_url_encode_decode.sql | 4 ++-- macros/web/url_decode.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration_tests/models/web/test_url_encode_decode.sql b/integration_tests/models/web/test_url_encode_decode.sql index e7d6dfd8..031e9484 100644 --- a/integration_tests/models/web/test_url_encode_decode.sql +++ b/integration_tests/models/web/test_url_encode_decode.sql @@ -10,7 +10,7 @@ final as ( select {{ dbt_utils.url_encode('decoded') }} as actual, - encoded as expected + decoded as expected from test_data @@ -18,7 +18,7 @@ final as ( select {{ dbt_utils.url_decode('encoded') }} as actual, - decoded as expected + encoded as expected from test_data diff --git a/macros/web/url_decode.sql b/macros/web/url_decode.sql index 4438b7ca..fe1d5080 100644 --- a/macros/web/url_decode.sql +++ b/macros/web/url_decode.sql @@ -131,6 +131,7 @@ ['ý', '%C3%BD', 253], ['þ', '%C3%BE', 254], ['ÿ', '%C3%BF', 255], + ['%', '%25', 37], ['*', '%2A', 42], ['+', '%2B', 43], [',', '%2C', 44], @@ -196,7 +197,6 @@ ['"', '%22', 34], ['#', '%23', 35], ['$', '%24', 36], - ['%', '%25', 37], ['&', '%26', 38], ['single quote', '%27', 39], ['(', '%28', 40], From cee970a26c2b49235950e446b2acfd3b6549a4a8 Mon Sep 17 00:00:00 2001 From: Damir Mustafic Date: Tue, 10 Jun 2025 19:44:04 +0200 Subject: [PATCH 6/6] Integration test fix --- integration_tests/models/web/test_url_encode_decode.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/models/web/test_url_encode_decode.sql b/integration_tests/models/web/test_url_encode_decode.sql index 031e9484..dec6bb5c 100644 --- a/integration_tests/models/web/test_url_encode_decode.sql +++ b/integration_tests/models/web/test_url_encode_decode.sql @@ -9,7 +9,7 @@ test_data as ( final as ( select - {{ dbt_utils.url_encode('decoded') }} as actual, + {{ dbt_utils.url_encode('encoded') }} as actual, decoded as expected from test_data @@ -17,7 +17,7 @@ final as ( union all select - {{ dbt_utils.url_decode('encoded') }} as actual, + {{ dbt_utils.url_decode('decoded') }} as actual, encoded as expected from test_data