From efec969c2d8d06e01ccbe4b5cab5c2ba6830abda Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Thu, 5 Dec 2024 22:55:26 -0500 Subject: [PATCH 01/10] feat: add JSON support --- sqlalchemy_bigquery/__init__.py | 2 + sqlalchemy_bigquery/_json.py | 49 ++++++++++++++++++++ sqlalchemy_bigquery/_types.py | 3 ++ sqlalchemy_bigquery/base.py | 38 ++++++++++++++- tests/unit/test__json.py | 82 +++++++++++++++++++++++++++++++++ 5 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 sqlalchemy_bigquery/_json.py create mode 100644 tests/unit/test__json.py diff --git a/sqlalchemy_bigquery/__init__.py b/sqlalchemy_bigquery/__init__.py index 1e506125..567015ee 100644 --- a/sqlalchemy_bigquery/__init__.py +++ b/sqlalchemy_bigquery/__init__.py @@ -37,6 +37,7 @@ FLOAT64, INT64, INTEGER, + JSON, NUMERIC, RECORD, STRING, @@ -74,6 +75,7 @@ "FLOAT64", "INT64", "INTEGER", + "JSON", "NUMERIC", "RECORD", "STRING", diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py new file mode 100644 index 00000000..26be4f0e --- /dev/null +++ b/sqlalchemy_bigquery/_json.py @@ -0,0 +1,49 @@ +from sqlalchemy.sql import sqltypes + + +class _FormatTypeMixin: + def _format_value(self, value): + raise NotImplementedError() + + def bind_processor(self, dialect): + super_proc = self.string_bind_processor(dialect) + + def process(value): + value = self._format_value(value) + if super_proc: + value = super_proc(value) + return value + + return process + + def literal_processor(self, dialect): + super_proc = self.string_literal_processor(dialect) + + def process(value): + value = self._format_value(value) + if super_proc: + value = super_proc(value) + return value + + return process + + +class JSON(sqltypes.JSON): + ... + + +class JSONIndexType(_FormatTypeMixin, sqltypes.JSON.JSONIndexType): + def _format_value(self, value): + if isinstance(value, int): + value = "$[%s]" % value + else: + value = '$.%s' % value + return value + + +class JSONIntIndexType(JSONIndexType): + __visit_name__ = "json_int_index" + + +class JSONStrIndexType(JSONIndexType): + __visit_name__ = "json_str_index" diff --git a/sqlalchemy_bigquery/_types.py b/sqlalchemy_bigquery/_types.py index 8399e978..6a268ce9 100644 --- a/sqlalchemy_bigquery/_types.py +++ b/sqlalchemy_bigquery/_types.py @@ -27,6 +27,7 @@ except ImportError: # pragma: NO COVER pass +from ._json import JSON from ._struct import STRUCT _type_map = { @@ -41,6 +42,7 @@ "FLOAT": sqlalchemy.types.Float, "INT64": sqlalchemy.types.Integer, "INTEGER": sqlalchemy.types.Integer, + "JSON": JSON, "NUMERIC": sqlalchemy.types.Numeric, "RECORD": STRUCT, "STRING": sqlalchemy.types.String, @@ -61,6 +63,7 @@ FLOAT = _type_map["FLOAT"] INT64 = _type_map["INT64"] INTEGER = _type_map["INTEGER"] +JSON = _type_map["JSON"] NUMERIC = _type_map["NUMERIC"] RECORD = _type_map["RECORD"] STRING = _type_map["STRING"] diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index c36ca1b1..f4801acd 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -49,7 +49,7 @@ DDLCompiler, IdentifierPreparer, ) -from sqlalchemy.sql.sqltypes import Integer, String, NullType, Numeric +from sqlalchemy.sql.sqltypes import Integer, JSON, String, NullType, Numeric from sqlalchemy.engine.default import DefaultDialect, DefaultExecutionContext from sqlalchemy.engine.base import Engine from sqlalchemy.sql.schema import Column @@ -59,7 +59,7 @@ import re from .parse_url import parse_url -from . import _helpers, _struct, _types +from . import _helpers, _json, _struct, _types import sqlalchemy_bigquery_vendored.sqlalchemy.postgresql.base as vendored_postgresql # Illegal characters is intended to be all characters that are not explicitly @@ -531,6 +531,17 @@ def visit_bindparam( if literal_binds or isinstance(type_, NullType): return param + # FIXME: Adapt to dialect-specific JSON element types + # This feels weird, but I cannot figure out how to get sqlalchemy + # to consult `colspecs` in this context, and it feels more correct + # vs skipping these types here. + + if isinstance(type_, JSON.JSONIntIndexType): + type_ = type_.adapt(_json.JSONIntIndexType) + + if isinstance(type_, JSON.JSONStrIndexType): + type_ = type_.adapt(_json.JSONStrIndexType) + if ( isinstance(type_, Numeric) and (type_.precision is None or type_.scale is None) @@ -571,6 +582,12 @@ def visit_getitem_binary(self, binary, operator_, **kw): right = self.process(binary.right, **kw) return f"{left}[OFFSET({right})]" + def visit_json_getitem_op_binary(self, binary, operator_, **kw): + return "JSON_QUERY(%s, %s)" % ( + self.process(binary.left, **kw), + self.process(binary.right, **kw), + ) + def _get_regexp_args(self, binary, kw): string = self.process(binary.left, **kw) pattern = self.process(binary.right, **kw) @@ -641,6 +658,15 @@ def visit_NUMERIC(self, type_, **kw): visit_DECIMAL = visit_NUMERIC + def visit_JSON(self, type_, **kw): + return "JSON" + + def visit_json_int_index(self, type_, **kw): + return "STRING" + + def visit_json_str_index(self, type_, **kw): + return "STRING" + class BigQueryDDLCompiler(DDLCompiler): option_datatype_mapping = { @@ -1076,6 +1102,10 @@ class BigQueryDialect(DefaultDialect): sqlalchemy.sql.sqltypes.TIMESTAMP: BQTimestamp, sqlalchemy.sql.sqltypes.ARRAY: BQArray, sqlalchemy.sql.sqltypes.Enum: sqlalchemy.sql.sqltypes.Enum, + sqlalchemy.sql.sqltypes.JSON: _json.JSON, + sqlalchemy.sql.sqltypes.JSON.JSONIndexType: _json.JSONIndexType, + sqlalchemy.sql.sqltypes.JSON.JSONIntIndexType: _json.JSONIntIndexType, + sqlalchemy.sql.sqltypes.JSON.JSONStrIndexType: _json.JSONStrIndexType, } def __init__( @@ -1086,6 +1116,8 @@ def __init__( credentials_info=None, credentials_base64=None, list_tables_page_size=1000, + json_serializer=None, + json_deserializer=None, *args, **kwargs, ): @@ -1098,6 +1130,8 @@ def __init__( self.identifier_preparer = self.preparer(self) self.dataset_id = None self.list_tables_page_size = list_tables_page_size + self._json_serializer = json_serializer + self._json_deserializer = json_deserializer @classmethod def dbapi(cls): diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py new file mode 100644 index 00000000..52b4e79d --- /dev/null +++ b/tests/unit/test__json.py @@ -0,0 +1,82 @@ +import json +import pytest + +import sqlalchemy + + +@pytest.fixture +def json_table(metadata): + from sqlalchemy_bigquery import JSON + return sqlalchemy.Table( + "json_table", + metadata, + sqlalchemy.Column("cart", JSON), + ) + + +@pytest.fixture +def json_column(json_table): + return json_table.c.cart + + +@pytest.fixture +def json_data(): + return { + "name": "Alice", + "items": [ + {"product": "book", "price": 10}, + {"product": "food", "price": 5} + ] + } + + +def test_roundtrip_json(faux_conn, json_table, json_data): + faux_conn.ex(f"create table json_table (cart JSON)") + faux_conn.ex(f"insert into json_table values ('{json.dumps(json_data)}')") + + row = list(faux_conn.execute(sqlalchemy.select(json_table)))[0] + assert row.cart == json_data + + +def test_json_insert_type_info(faux_conn, metadata, json_table, json_data): + actual = str( + json_table.insert() + .values( + cart=json_data + ) + .compile(faux_conn.engine) + ) + + assert actual == "INSERT INTO `json_table` (`cart`) VALUES (%(cart:JSON)s)" + + +@pytest.mark.parametrize( + "index_values,sql,literal_sql", + ( + ( + ["name"], + "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + "JSON_QUERY(`json_table`.`cart`, '$.name')", + ), + ), +) +def test_json_index(faux_conn, json_column, index_values, sql, literal_sql): + expr = json_column + + for value in index_values: + expr = expr[value] + + expected_sql = f"SELECT {sql} AS `anon_1` \nFROM `json_table`" + expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" + + actual_sql = sqlalchemy.select(expr).compile(faux_conn).string + actual_literal_sql = sqlalchemy.select(expr).compile(faux_conn, compile_kwargs={"literal_binds": True}).string + + assert expected_sql == actual_sql + assert expected_literal_sql == actual_literal_sql + +# TODO: AFAICT, JSON is not a supported query parameter type - enforce this + +# TODO: Test _json_serializer set from create_engine + +# TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON \ No newline at end of file From e95ec73e3301b490fd9e315ed3c18bde6e555c28 Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Fri, 6 Dec 2024 19:06:03 -0500 Subject: [PATCH 02/10] add json path --- sqlalchemy_bigquery/_json.py | 17 +++++++++++++++-- sqlalchemy_bigquery/base.py | 10 ++++++++++ tests/unit/test__json.py | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index 26be4f0e..d2af0ea3 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -37,13 +37,26 @@ def _format_value(self, value): if isinstance(value, int): value = "$[%s]" % value else: - value = '$.%s' % value + value = '$."%s"' % value return value - class JSONIntIndexType(JSONIndexType): __visit_name__ = "json_int_index" class JSONStrIndexType(JSONIndexType): __visit_name__ = "json_str_index" + + +class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): + def _format_value(self, value): + return "$%s" % ( + "".join( + [ + "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem + for elem in value + ] + ) + ) + + diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index f4801acd..0ab9103d 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -588,6 +588,12 @@ def visit_json_getitem_op_binary(self, binary, operator_, **kw): self.process(binary.right, **kw), ) + def visit_json_path_getitem_op_binary(self, binary, operator, **kw): + return "JSON_QUERY(%s, %s)" % ( + self.process(binary.left, **kw), + self.process(binary.right, **kw), + ) + def _get_regexp_args(self, binary, kw): string = self.process(binary.left, **kw) pattern = self.process(binary.right, **kw) @@ -661,6 +667,9 @@ def visit_NUMERIC(self, type_, **kw): def visit_JSON(self, type_, **kw): return "JSON" + def visit_json_path(self, type_, **kw): + return "STRING" + def visit_json_int_index(self, type_, **kw): return "STRING" @@ -1104,6 +1113,7 @@ class BigQueryDialect(DefaultDialect): sqlalchemy.sql.sqltypes.Enum: sqlalchemy.sql.sqltypes.Enum, sqlalchemy.sql.sqltypes.JSON: _json.JSON, sqlalchemy.sql.sqltypes.JSON.JSONIndexType: _json.JSONIndexType, + sqlalchemy.sql.sqltypes.JSON.JSONPathType: _json.JSONPathType, sqlalchemy.sql.sqltypes.JSON.JSONIntIndexType: _json.JSONIntIndexType, sqlalchemy.sql.sqltypes.JSON.JSONStrIndexType: _json.JSONStrIndexType, } diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index 52b4e79d..e75cd16f 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -56,8 +56,13 @@ def test_json_insert_type_info(faux_conn, metadata, json_table, json_data): ( ["name"], "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", - "JSON_QUERY(`json_table`.`cart`, '$.name')", + "JSON_QUERY(`json_table`.`cart`, '$.\"name\"')", ), + # ( + # ["items", 0], + # "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + # "JSON_QUERY(`json_table`.`cart`, '$.items[0]')", + # ), ), ) def test_json_index(faux_conn, json_column, index_values, sql, literal_sql): @@ -75,6 +80,33 @@ def test_json_index(faux_conn, json_column, index_values, sql, literal_sql): assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql +@pytest.mark.parametrize( + "index_values,sql,literal_sql", + ( + ( + ["name"], + "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + "JSON_QUERY(`json_table`.`cart`, '$.\"name\"')", + ), + # ( + # ["items", 0], + # "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + # "JSON_QUERY(`json_table`.`cart`, '$.items[0]')", + # ), + ), +) +def test_json_path(faux_conn, json_column, index_values, sql, literal_sql): + expr = json_column[index_values] + + expected_sql = f"SELECT {sql} AS `anon_1` \nFROM `json_table`" + expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" + + actual_sql = sqlalchemy.select(expr).compile(faux_conn).string + actual_literal_sql = sqlalchemy.select(expr).compile(faux_conn, compile_kwargs={"literal_binds": True}).string + + assert expected_sql == actual_sql + assert expected_literal_sql == actual_literal_sql + # TODO: AFAICT, JSON is not a supported query parameter type - enforce this # TODO: Test _json_serializer set from create_engine From 3c985295cb9b2635d80e349241773fcb086b5e31 Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Fri, 6 Dec 2024 19:33:28 -0500 Subject: [PATCH 03/10] no indexing, test json_value --- sqlalchemy_bigquery/_json.py | 16 ---------- sqlalchemy_bigquery/base.py | 28 +---------------- tests/unit/test__json.py | 60 +++++++++++++++--------------------- 3 files changed, 25 insertions(+), 79 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index d2af0ea3..b04daf3a 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -32,22 +32,6 @@ class JSON(sqltypes.JSON): ... -class JSONIndexType(_FormatTypeMixin, sqltypes.JSON.JSONIndexType): - def _format_value(self, value): - if isinstance(value, int): - value = "$[%s]" % value - else: - value = '$."%s"' % value - return value - -class JSONIntIndexType(JSONIndexType): - __visit_name__ = "json_int_index" - - -class JSONStrIndexType(JSONIndexType): - __visit_name__ = "json_str_index" - - class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): def _format_value(self, value): return "$%s" % ( diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 0ab9103d..c5ca500f 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -49,7 +49,7 @@ DDLCompiler, IdentifierPreparer, ) -from sqlalchemy.sql.sqltypes import Integer, JSON, String, NullType, Numeric +from sqlalchemy.sql.sqltypes import Integer, String, NullType, Numeric from sqlalchemy.engine.default import DefaultDialect, DefaultExecutionContext from sqlalchemy.engine.base import Engine from sqlalchemy.sql.schema import Column @@ -531,17 +531,6 @@ def visit_bindparam( if literal_binds or isinstance(type_, NullType): return param - # FIXME: Adapt to dialect-specific JSON element types - # This feels weird, but I cannot figure out how to get sqlalchemy - # to consult `colspecs` in this context, and it feels more correct - # vs skipping these types here. - - if isinstance(type_, JSON.JSONIntIndexType): - type_ = type_.adapt(_json.JSONIntIndexType) - - if isinstance(type_, JSON.JSONStrIndexType): - type_ = type_.adapt(_json.JSONStrIndexType) - if ( isinstance(type_, Numeric) and (type_.precision is None or type_.scale is None) @@ -582,12 +571,6 @@ def visit_getitem_binary(self, binary, operator_, **kw): right = self.process(binary.right, **kw) return f"{left}[OFFSET({right})]" - def visit_json_getitem_op_binary(self, binary, operator_, **kw): - return "JSON_QUERY(%s, %s)" % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw), - ) - def visit_json_path_getitem_op_binary(self, binary, operator, **kw): return "JSON_QUERY(%s, %s)" % ( self.process(binary.left, **kw), @@ -670,12 +653,6 @@ def visit_JSON(self, type_, **kw): def visit_json_path(self, type_, **kw): return "STRING" - def visit_json_int_index(self, type_, **kw): - return "STRING" - - def visit_json_str_index(self, type_, **kw): - return "STRING" - class BigQueryDDLCompiler(DDLCompiler): option_datatype_mapping = { @@ -1112,10 +1089,7 @@ class BigQueryDialect(DefaultDialect): sqlalchemy.sql.sqltypes.ARRAY: BQArray, sqlalchemy.sql.sqltypes.Enum: sqlalchemy.sql.sqltypes.Enum, sqlalchemy.sql.sqltypes.JSON: _json.JSON, - sqlalchemy.sql.sqltypes.JSON.JSONIndexType: _json.JSONIndexType, sqlalchemy.sql.sqltypes.JSON.JSONPathType: _json.JSONPathType, - sqlalchemy.sql.sqltypes.JSON.JSONIntIndexType: _json.JSONIntIndexType, - sqlalchemy.sql.sqltypes.JSON.JSONStrIndexType: _json.JSONStrIndexType, } def __init__( diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index e75cd16f..dc17911d 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -30,7 +30,7 @@ def json_data(): } -def test_roundtrip_json(faux_conn, json_table, json_data): +def test_select_json(faux_conn, json_table, json_data): faux_conn.ex(f"create table json_table (cart JSON)") faux_conn.ex(f"insert into json_table values ('{json.dumps(json_data)}')") @@ -38,7 +38,7 @@ def test_roundtrip_json(faux_conn, json_table, json_data): assert row.cart == json_data -def test_json_insert_type_info(faux_conn, metadata, json_table, json_data): +def test_insert_json(faux_conn, metadata, json_table, json_data): actual = str( json_table.insert() .values( @@ -51,58 +51,46 @@ def test_json_insert_type_info(faux_conn, metadata, json_table, json_data): @pytest.mark.parametrize( - "index_values,sql,literal_sql", + "path,sql,literal_sql", ( ( ["name"], "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", "JSON_QUERY(`json_table`.`cart`, '$.\"name\"')", ), - # ( - # ["items", 0], - # "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", - # "JSON_QUERY(`json_table`.`cart`, '$.items[0]')", - # ), + ( + ["items", 0], + "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + "JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]')", + ), + ( + ["items", 0, "price"], + "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", + "JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0].\"price\"')", + ), ), ) -def test_json_index(faux_conn, json_column, index_values, sql, literal_sql): - expr = json_column - - for value in index_values: - expr = expr[value] +def test_json_query(faux_conn, json_column, path, sql, literal_sql): + expr = sqlalchemy.select(json_column[path]) expected_sql = f"SELECT {sql} AS `anon_1` \nFROM `json_table`" expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" - actual_sql = sqlalchemy.select(expr).compile(faux_conn).string - actual_literal_sql = sqlalchemy.select(expr).compile(faux_conn, compile_kwargs={"literal_binds": True}).string + actual_sql = expr.compile(faux_conn).string + actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql -@pytest.mark.parametrize( - "index_values,sql,literal_sql", - ( - ( - ["name"], - "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", - "JSON_QUERY(`json_table`.`cart`, '$.\"name\"')", - ), - # ( - # ["items", 0], - # "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", - # "JSON_QUERY(`json_table`.`cart`, '$.items[0]')", - # ), - ), -) -def test_json_path(faux_conn, json_column, index_values, sql, literal_sql): - expr = json_column[index_values] - expected_sql = f"SELECT {sql} AS `anon_1` \nFROM `json_table`" - expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" +def test_json_value(faux_conn, json_column, json_data): + expr = sqlalchemy.select(json_column[["items", 0]].label("first_item")).where(sqlalchemy.func.JSON_VALUE(json_column[["name"]]) == 'Alice') + + expected_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" + expected_literal_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" - actual_sql = sqlalchemy.select(expr).compile(faux_conn).string - actual_literal_sql = sqlalchemy.select(expr).compile(faux_conn, compile_kwargs={"literal_binds": True}).string + actual_sql = expr.compile(faux_conn).string + actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql From 209cda106c14c67bf032d6cb9a6eb178e7757aa9 Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Fri, 6 Dec 2024 23:59:06 -0500 Subject: [PATCH 04/10] no json query param --- sqlalchemy_bigquery/_json.py | 16 +++++++++++++++- sqlalchemy_bigquery/base.py | 7 +++++++ tests/unit/test__json.py | 24 +++++++++++++++++++++--- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index b04daf3a..5611ac43 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -1,3 +1,5 @@ +import json +import sqlalchemy from sqlalchemy.sql import sqltypes @@ -29,10 +31,22 @@ def process(value): class JSON(sqltypes.JSON): - ... + def bind_expression(self, bindvalue): + # JSON query parameters are STRINGs + return sqlalchemy.func.PARSE_JSON(bindvalue, type_=self) + + def literal_processor(self, dialect): + json_serializer = dialect._json_serializer or json.dumps + + def process(value): + value = json_serializer(value) + return f"'{value}'" + + return process class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): + # TODO: Handle lax, lax recursive def _format_value(self, value): return "$%s" % ( "".join( diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index c5ca500f..344683b7 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -547,6 +547,13 @@ def visit_bindparam( bq_type = self.dialect.type_compiler.process(type_) bq_type = self.__remove_type_parameter(bq_type) + if bq_type == 'JSON': + # FIXME: JSON is not a member of `SqlParameterScalarTypes` in the DBAPI + # For now, we hack around this by: + # - Rewriting the bindparam type to STRING + # - Applying a bind expression that converts the parameter back to JSON + bq_type = 'STRING' + assert_(param != "%s", f"Unexpected param: {param}") if bindparam.expanding: # pragma: NO COVER diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index dc17911d..5bd27468 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -47,7 +47,7 @@ def test_insert_json(faux_conn, metadata, json_table, json_data): .compile(faux_conn.engine) ) - assert actual == "INSERT INTO `json_table` (`cart`) VALUES (%(cart:JSON)s)" + assert actual == "INSERT INTO `json_table` (`cart`) VALUES (PARSE_JSON(%(cart:STRING)s))" @pytest.mark.parametrize( @@ -95,8 +95,26 @@ def test_json_value(faux_conn, json_column, json_data): assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql -# TODO: AFAICT, JSON is not a supported query parameter type - enforce this + +def test_json_literal(faux_conn): + from sqlalchemy_bigquery import JSON + expr = sqlalchemy.select(sqlalchemy.func.STRING(sqlalchemy.sql.expression.literal("purple", type_=JSON)).label("color")) + + expected_sql = "SELECT STRING(PARSE_JSON(%(param_1:STRING)s)) AS `color`" + expected_literal_sql = "SELECT STRING(PARSE_JSON('\"purple\"')) AS `color`" + + actual_sql = expr.compile(faux_conn).string + actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string + + assert expected_sql == actual_sql + assert expected_literal_sql == actual_literal_sql # TODO: Test _json_serializer set from create_engine -# TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON \ No newline at end of file +# TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON + +# TODO: Test differences between JSON and JSON-formatted STRING + +# TODO: Support lax + lax recursive + +# TODO: Provide some GenericFunction, or at least docs for how to use type_ \ No newline at end of file From 1c596f1d4590da45157e38aeacad6d168eda3d3e Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sat, 7 Dec 2024 15:46:11 -0500 Subject: [PATCH 05/10] test json serde set via engine --- tests/unit/test__json.py | 6 +++--- tests/unit/test_engine.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index 5bd27468..e5d43d05 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -109,11 +109,11 @@ def test_json_literal(faux_conn): assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql -# TODO: Test _json_serializer set from create_engine - # TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON -# TODO: Test differences between JSON and JSON-formatted STRING +# TODO: Test SQL NULL vs JSON null as described above + +# TODO: Test documented differences between JSON and JSON-formatted STRING # TODO: Support lax + lax recursive diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index 59481baa..0e537e4e 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -16,6 +16,8 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import json +from unittest import mock import pytest import sqlalchemy @@ -64,3 +66,31 @@ def test_arraysize_querystring_takes_precedence_over_default(faux_conn, metadata metadata.create_all(engine) assert conn.connection.test_data["arraysize"] == arraysize + + +def test_set_json_serde(faux_conn, metadata): + from sqlalchemy_bigquery import JSON + json_serializer = mock.Mock(side_effect=json.dumps) + json_deserializer = mock.Mock(side_effect=json.loads) + + engine = sqlalchemy.create_engine( + f"bigquery://myproject/mydataset", + json_serializer=json_serializer, + json_deserializer=json_deserializer + ) + + json_data = {"foo": "bar"} + json_table = sqlalchemy.Table("json_table", metadata, sqlalchemy.Column("json", JSON)) + + metadata.create_all(engine) + faux_conn.ex(f"insert into json_table values ('{json.dumps(json_data)}')") + + with engine.begin() as conn: + row = conn.execute(sqlalchemy.select(json_table.c.json)).first() + assert row == (json_data,) + assert json_deserializer.mock_calls == [mock.call(json.dumps(json_data))] + + expr = sqlalchemy.select(sqlalchemy.literal(json_data, type_=JSON)) + literal_sql = expr.compile(engine, compile_kwargs={"literal_binds": True}).string + assert literal_sql == f"SELECT PARSE_JSON('{json.dumps(json_data)}') AS `anon_1`" + assert json_serializer.mock_calls == [mock.call(json_data)] From 9c0c560f305c1e12beb65391596785570650938c Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sat, 7 Dec 2024 17:26:13 -0500 Subject: [PATCH 06/10] add casts --- sqlalchemy_bigquery/_json.py | 32 +++++++++++++++++++++++++++++++- tests/unit/test__json.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index 5611ac43..aa7a4fca 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -40,10 +40,40 @@ def literal_processor(self, dialect): def process(value): value = json_serializer(value) - return f"'{value}'" + return repr(value) return process + class Comparator(sqltypes.JSON.Comparator): + def _generate_converter(self, name, lax): + prefix = 'LAX_' if lax else '' + func_ = getattr(sqlalchemy.func, f"{prefix}{name}") + return func_ + + def as_boolean(self, lax=False): + func_ = self._generate_converter("BOOL", lax) + return func_(self.expr, type_=sqltypes.Boolean) + + def as_string(self, lax=False): + func_ = self._generate_converter("STRING", lax) + return func_(self.expr, type_=sqltypes.String) + + def as_integer(self, lax=False): + func_ = self._generate_converter("INT64", lax) + return func_(self.expr, type_=sqltypes.Integer) + + def as_float(self, lax=False): + func_ = self._generate_converter("FLOAT64", lax) + return func_(self.expr, type_=sqltypes.Float) + + def as_numeric(self, precision, scale, asdecimal=True): + # No converter available - technically we could cast, but even + # then we can't satisfy this interface because it is not possible + # to cast to parameterized types. + raise NotImplementedError() + + comparator_factory = Comparator + class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): # TODO: Handle lax, lax recursive diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index e5d43d05..b2895020 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -109,6 +109,36 @@ def test_json_literal(faux_conn): assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql + +@pytest.mark.parametrize("lax", (False, True)) +def test_json_casts(faux_conn, json_column, json_data, lax): + from sqlalchemy_bigquery import JSON + prefix = 'LAX_' if lax else '' # FIXME: Manually parameterize + + expr = sqlalchemy.select(1).where(json_column[["name"]].as_string(lax=lax) == 'Alice') + assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( + "SELECT 1 \n" + "FROM `json_table` \n" + f"WHERE {prefix}STRING(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" + ) + + expr = sqlalchemy.select(1).where(json_column[["items", 1, "price"]].as_integer(lax=lax) == 10) + assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( + "SELECT 1 \n" + "FROM `json_table` \n" + f"WHERE {prefix}INT64(JSON_QUERY(`json_table`.`cart`, '$.\"items\"[1].\"price\"')) = 10" + ) + + expr = sqlalchemy.select(sqlalchemy.literal(10.0, type_=JSON).as_float(lax=lax) == 10.0) + assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( + f"SELECT {prefix}FLOAT64(PARSE_JSON('10.0')) = 10.0 AS `anon_1`" + ) + + expr = sqlalchemy.select(sqlalchemy.literal(True, type_=JSON).as_boolean(lax=lax) == sqlalchemy.true()) + assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( + f"SELECT {prefix}BOOL(PARSE_JSON('true')) = true AS `anon_1`" + ) + # TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON # TODO: Test SQL NULL vs JSON null as described above From 90339924303c7c707276b97ef8cc1fa9e800672c Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sat, 7 Dec 2024 18:31:17 -0500 Subject: [PATCH 07/10] simplify literal processor --- sqlalchemy_bigquery/_json.py | 8 +++----- tests/unit/test__json.py | 7 ++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index aa7a4fca..dd2f3ff4 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -36,10 +36,10 @@ def bind_expression(self, bindvalue): return sqlalchemy.func.PARSE_JSON(bindvalue, type_=self) def literal_processor(self, dialect): - json_serializer = dialect._json_serializer or json.dumps + super_proc = self.bind_processor(dialect) def process(value): - value = json_serializer(value) + value = super_proc(value) return repr(value) return process @@ -67,9 +67,7 @@ def as_float(self, lax=False): return func_(self.expr, type_=sqltypes.Float) def as_numeric(self, precision, scale, asdecimal=True): - # No converter available - technically we could cast, but even - # then we can't satisfy this interface because it is not possible - # to cast to parameterized types. + # No converter available in BigQuery raise NotImplementedError() comparator_factory = Comparator diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index b2895020..440c9dae 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -139,12 +139,13 @@ def test_json_casts(faux_conn, json_column, json_data, lax): f"SELECT {prefix}BOOL(PARSE_JSON('true')) = true AS `anon_1`" ) -# TODO: Casting as described in https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON +def test_nulls(faux_conn, json_column, json_data): + pass # TODO: Test SQL NULL vs JSON null as described above -# TODO: Test documented differences between JSON and JSON-formatted STRING - # TODO: Support lax + lax recursive +# TODO: Test documented differences between JSON and JSON-formatted STRING + # TODO: Provide some GenericFunction, or at least docs for how to use type_ \ No newline at end of file From b485d20d7de41df1ad9ee779344169b6ef2dcb2a Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sat, 7 Dec 2024 21:51:57 -0500 Subject: [PATCH 08/10] add path modes, blacken --- sqlalchemy_bigquery/_json.py | 23 +++++++--- sqlalchemy_bigquery/base.py | 4 +- tests/unit/test__json.py | 82 +++++++++++++++++++++++------------- tests/unit/test_engine.py | 7 ++- 4 files changed, 75 insertions(+), 41 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index dd2f3ff4..b875bd69 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -1,4 +1,4 @@ -import json +from enum import StrEnum import sqlalchemy from sqlalchemy.sql import sqltypes @@ -46,7 +46,7 @@ def process(value): class Comparator(sqltypes.JSON.Comparator): def _generate_converter(self, name, lax): - prefix = 'LAX_' if lax else '' + prefix = "LAX_" if lax else "" func_ = getattr(sqlalchemy.func, f"{prefix}{name}") return func_ @@ -72,17 +72,26 @@ def as_numeric(self, precision, scale, asdecimal=True): comparator_factory = Comparator + class JSONPathMode(StrEnum): + LAX = "lax" + LAX_RECURSIVE = "lax recursive" + class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): - # TODO: Handle lax, lax recursive def _format_value(self, value): - return "$%s" % ( + if isinstance(value[0], JSON.JSONPathMode): + mode = value[0] + mode_prefix = mode.value + " " + value = value[1:] + else: + mode_prefix = "" + + return "%s$%s" % ( + mode_prefix, "".join( [ "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem for elem in value ] - ) + ), ) - - diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 344683b7..91168d53 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -547,12 +547,12 @@ def visit_bindparam( bq_type = self.dialect.type_compiler.process(type_) bq_type = self.__remove_type_parameter(bq_type) - if bq_type == 'JSON': + if bq_type == "JSON": # FIXME: JSON is not a member of `SqlParameterScalarTypes` in the DBAPI # For now, we hack around this by: # - Rewriting the bindparam type to STRING # - Applying a bind expression that converts the parameter back to JSON - bq_type = 'STRING' + bq_type = "STRING" assert_(param != "%s", f"Unexpected param: {param}") diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index 440c9dae..a98f7080 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -7,6 +7,7 @@ @pytest.fixture def json_table(metadata): from sqlalchemy_bigquery import JSON + return sqlalchemy.Table( "json_table", metadata, @@ -23,10 +24,7 @@ def json_column(json_table): def json_data(): return { "name": "Alice", - "items": [ - {"product": "book", "price": 10}, - {"product": "food", "price": 5} - ] + "items": [{"product": "book", "price": 10}, {"product": "food", "price": 5}], } @@ -39,15 +37,12 @@ def test_select_json(faux_conn, json_table, json_data): def test_insert_json(faux_conn, metadata, json_table, json_data): - actual = str( - json_table.insert() - .values( - cart=json_data - ) - .compile(faux_conn.engine) - ) + actual = str(json_table.insert().values(cart=json_data).compile(faux_conn.engine)) - assert actual == "INSERT INTO `json_table` (`cart`) VALUES (PARSE_JSON(%(cart:STRING)s))" + assert ( + actual + == "INSERT INTO `json_table` (`cart`) VALUES (PARSE_JSON(%(cart:STRING)s))" + ) @pytest.mark.parametrize( @@ -66,7 +61,7 @@ def test_insert_json(faux_conn, metadata, json_table, json_data): ( ["items", 0, "price"], "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", - "JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0].\"price\"')", + 'JSON_QUERY(`json_table`.`cart`, \'$."items"[0]."price"\')', ), ), ) @@ -77,20 +72,26 @@ def test_json_query(faux_conn, json_column, path, sql, literal_sql): expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" actual_sql = expr.compile(faux_conn).string - actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string + actual_literal_sql = expr.compile( + faux_conn, compile_kwargs={"literal_binds": True} + ).string assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql def test_json_value(faux_conn, json_column, json_data): - expr = sqlalchemy.select(json_column[["items", 0]].label("first_item")).where(sqlalchemy.func.JSON_VALUE(json_column[["name"]]) == 'Alice') + expr = sqlalchemy.select(json_column[["items", 0]].label("first_item")).where( + sqlalchemy.func.JSON_VALUE(json_column[["name"]]) == "Alice" + ) expected_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" expected_literal_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" actual_sql = expr.compile(faux_conn).string - actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string + actual_literal_sql = expr.compile( + faux_conn, compile_kwargs={"literal_binds": True} + ).string assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql @@ -98,13 +99,20 @@ def test_json_value(faux_conn, json_column, json_data): def test_json_literal(faux_conn): from sqlalchemy_bigquery import JSON - expr = sqlalchemy.select(sqlalchemy.func.STRING(sqlalchemy.sql.expression.literal("purple", type_=JSON)).label("color")) + + expr = sqlalchemy.select( + sqlalchemy.func.STRING( + sqlalchemy.sql.expression.literal("purple", type_=JSON) + ).label("color") + ) expected_sql = "SELECT STRING(PARSE_JSON(%(param_1:STRING)s)) AS `color`" expected_literal_sql = "SELECT STRING(PARSE_JSON('\"purple\"')) AS `color`" actual_sql = expr.compile(faux_conn).string - actual_literal_sql = expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string + actual_literal_sql = expr.compile( + faux_conn, compile_kwargs={"literal_binds": True} + ).string assert expected_sql == actual_sql assert expected_literal_sql == actual_literal_sql @@ -113,39 +121,53 @@ def test_json_literal(faux_conn): @pytest.mark.parametrize("lax", (False, True)) def test_json_casts(faux_conn, json_column, json_data, lax): from sqlalchemy_bigquery import JSON - prefix = 'LAX_' if lax else '' # FIXME: Manually parameterize - expr = sqlalchemy.select(1).where(json_column[["name"]].as_string(lax=lax) == 'Alice') + prefix = "LAX_" if lax else "" # FIXME: Manually parameterize + + expr = sqlalchemy.select(1).where( + json_column[["name"]].as_string(lax=lax) == "Alice" + ) assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( "SELECT 1 \n" "FROM `json_table` \n" f"WHERE {prefix}STRING(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" ) - expr = sqlalchemy.select(1).where(json_column[["items", 1, "price"]].as_integer(lax=lax) == 10) + expr = sqlalchemy.select(1).where( + json_column[["items", 1, "price"]].as_integer(lax=lax) == 10 + ) assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( "SELECT 1 \n" "FROM `json_table` \n" - f"WHERE {prefix}INT64(JSON_QUERY(`json_table`.`cart`, '$.\"items\"[1].\"price\"')) = 10" + f'WHERE {prefix}INT64(JSON_QUERY(`json_table`.`cart`, \'$."items"[1]."price"\')) = 10' ) - expr = sqlalchemy.select(sqlalchemy.literal(10.0, type_=JSON).as_float(lax=lax) == 10.0) + expr = sqlalchemy.select( + sqlalchemy.literal(10.0, type_=JSON).as_float(lax=lax) == 10.0 + ) assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( f"SELECT {prefix}FLOAT64(PARSE_JSON('10.0')) = 10.0 AS `anon_1`" ) - expr = sqlalchemy.select(sqlalchemy.literal(True, type_=JSON).as_boolean(lax=lax) == sqlalchemy.true()) + expr = sqlalchemy.select( + sqlalchemy.literal(True, type_=JSON).as_boolean(lax=lax) == sqlalchemy.true() + ) assert expr.compile(faux_conn, compile_kwargs={"literal_binds": True}).string == ( f"SELECT {prefix}BOOL(PARSE_JSON('true')) = true AS `anon_1`" ) -def test_nulls(faux_conn, json_column, json_data): - pass -# TODO: Test SQL NULL vs JSON null as described above +def test_json_path_mode(faux_conn, json_column): + from sqlalchemy_bigquery import JSON -# TODO: Support lax + lax recursive + expr = sqlalchemy.select(json_column[[JSON.JSONPathMode.LAX, "items", "price"]]) -# TODO: Test documented differences between JSON and JSON-formatted STRING + expected_literal_sql = ( + f'SELECT JSON_QUERY(`json_table`.`cart`, \'lax $."items"."price"\') AS `anon_1` \n' + "FROM `json_table`" + ) + actual_literal_sql = expr.compile( + faux_conn, compile_kwargs={"literal_binds": True} + ).string -# TODO: Provide some GenericFunction, or at least docs for how to use type_ \ No newline at end of file + assert expected_literal_sql == actual_literal_sql diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index 0e537e4e..bab34174 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -70,17 +70,20 @@ def test_arraysize_querystring_takes_precedence_over_default(faux_conn, metadata def test_set_json_serde(faux_conn, metadata): from sqlalchemy_bigquery import JSON + json_serializer = mock.Mock(side_effect=json.dumps) json_deserializer = mock.Mock(side_effect=json.loads) engine = sqlalchemy.create_engine( f"bigquery://myproject/mydataset", json_serializer=json_serializer, - json_deserializer=json_deserializer + json_deserializer=json_deserializer, ) json_data = {"foo": "bar"} - json_table = sqlalchemy.Table("json_table", metadata, sqlalchemy.Column("json", JSON)) + json_table = sqlalchemy.Table( + "json_table", metadata, sqlalchemy.Column("json", JSON) + ) metadata.create_all(engine) faux_conn.ex(f"insert into json_table values ('{json.dumps(json_data)}')") From 8ec3556342f204f20ff110a487d934fd28be26d4 Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sat, 7 Dec 2024 22:06:02 -0500 Subject: [PATCH 09/10] no StrEnum in old python, lint --- sqlalchemy_bigquery/_json.py | 19 ++++++++++++++----- tests/unit/test__json.py | 8 ++++---- tests/unit/test_engine.py | 2 +- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index b875bd69..70df8b62 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -1,4 +1,4 @@ -from enum import StrEnum +from enum import auto, Enum import sqlalchemy from sqlalchemy.sql import sqltypes @@ -72,16 +72,25 @@ def as_numeric(self, precision, scale, asdecimal=True): comparator_factory = Comparator - class JSONPathMode(StrEnum): - LAX = "lax" - LAX_RECURSIVE = "lax recursive" + class JSONPathMode(Enum): + LAX = auto() + LAX_RECURSIVE = auto() class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): + def _mode_prefix(self, mode): + if mode == JSON.JSONPathMode.LAX: + mode_prefix = "lax " + elif mode == JSON.JSONPathMode.LAX_RECURSIVE: + mode_prefix = "lax recursive" + else: + raise NotImplementedError(f"Unhandled JSONPathMode: {mode}") + return mode_prefix + def _format_value(self, value): if isinstance(value[0], JSON.JSONPathMode): mode = value[0] - mode_prefix = mode.value + " " + mode_prefix = self._mode_prefix(mode) value = value[1:] else: mode_prefix = "" diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index a98f7080..6c3cbc28 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -29,7 +29,7 @@ def json_data(): def test_select_json(faux_conn, json_table, json_data): - faux_conn.ex(f"create table json_table (cart JSON)") + faux_conn.ex("create table json_table (cart JSON)") faux_conn.ex(f"insert into json_table values ('{json.dumps(json_data)}')") row = list(faux_conn.execute(sqlalchemy.select(json_table)))[0] @@ -85,8 +85,8 @@ def test_json_value(faux_conn, json_column, json_data): sqlalchemy.func.JSON_VALUE(json_column[["name"]]) == "Alice" ) - expected_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" - expected_literal_sql = f"SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" + expected_sql = "SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" + expected_literal_sql = "SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" actual_sql = expr.compile(faux_conn).string actual_literal_sql = expr.compile( @@ -163,7 +163,7 @@ def test_json_path_mode(faux_conn, json_column): expr = sqlalchemy.select(json_column[[JSON.JSONPathMode.LAX, "items", "price"]]) expected_literal_sql = ( - f'SELECT JSON_QUERY(`json_table`.`cart`, \'lax $."items"."price"\') AS `anon_1` \n' + 'SELECT JSON_QUERY(`json_table`.`cart`, \'lax $."items"."price"\') AS `anon_1` \n' "FROM `json_table`" ) actual_literal_sql = expr.compile( diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index bab34174..f9e6eb3d 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -75,7 +75,7 @@ def test_set_json_serde(faux_conn, metadata): json_deserializer = mock.Mock(side_effect=json.loads) engine = sqlalchemy.create_engine( - f"bigquery://myproject/mydataset", + "bigquery://myproject/mydataset", json_serializer=json_serializer, json_deserializer=json_deserializer, ) From 25ab0c483effd28a80d09f653fc57fbf98df9d9a Mon Sep 17 00:00:00 2001 From: Robert Jensen <robert.cole.jensen@gmail.com> Date: Sun, 8 Dec 2024 11:31:04 -0500 Subject: [PATCH 10/10] cleanup tests --- sqlalchemy_bigquery/_json.py | 4 ++-- tests/unit/test__json.py | 46 ++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/sqlalchemy_bigquery/_json.py b/sqlalchemy_bigquery/_json.py index 70df8b62..ff800dc9 100644 --- a/sqlalchemy_bigquery/_json.py +++ b/sqlalchemy_bigquery/_json.py @@ -80,7 +80,7 @@ class JSONPathMode(Enum): class JSONPathType(_FormatTypeMixin, sqltypes.JSON.JSONPathType): def _mode_prefix(self, mode): if mode == JSON.JSONPathMode.LAX: - mode_prefix = "lax " + mode_prefix = "lax" elif mode == JSON.JSONPathMode.LAX_RECURSIVE: mode_prefix = "lax recursive" else: @@ -96,7 +96,7 @@ def _format_value(self, value): mode_prefix = "" return "%s$%s" % ( - mode_prefix, + mode_prefix + " " if mode_prefix else "", "".join( [ "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem diff --git a/tests/unit/test__json.py b/tests/unit/test__json.py index 6c3cbc28..fcc3a9d7 100644 --- a/tests/unit/test__json.py +++ b/tests/unit/test__json.py @@ -46,29 +46,29 @@ def test_insert_json(faux_conn, metadata, json_table, json_data): @pytest.mark.parametrize( - "path,sql,literal_sql", + "path,literal_sql", ( ( ["name"], - "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", "JSON_QUERY(`json_table`.`cart`, '$.\"name\"')", ), ( ["items", 0], - "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", "JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]')", ), ( ["items", 0, "price"], - "JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s)", 'JSON_QUERY(`json_table`.`cart`, \'$."items"[0]."price"\')', ), ), ) -def test_json_query(faux_conn, json_column, path, sql, literal_sql): +def test_json_query(faux_conn, json_column, path, literal_sql): expr = sqlalchemy.select(json_column[path]) - expected_sql = f"SELECT {sql} AS `anon_1` \nFROM `json_table`" + expected_sql = ( + "SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `anon_1` \n" + "FROM `json_table`" + ) expected_literal_sql = f"SELECT {literal_sql} AS `anon_1` \nFROM `json_table`" actual_sql = expr.compile(faux_conn).string @@ -85,8 +85,16 @@ def test_json_value(faux_conn, json_column, json_data): sqlalchemy.func.JSON_VALUE(json_column[["name"]]) == "Alice" ) - expected_sql = "SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" - expected_literal_sql = "SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \nFROM `json_table` \nWHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" + expected_sql = ( + "SELECT JSON_QUERY(`json_table`.`cart`, %(cart_1:STRING)s) AS `first_item` \n" + "FROM `json_table` \n" + "WHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, %(cart_2:STRING)s)) = %(JSON_VALUE_1:STRING)s" + ) + expected_literal_sql = ( + "SELECT JSON_QUERY(`json_table`.`cart`, '$.\"items\"[0]') AS `first_item` \n" + "FROM `json_table` \n" + "WHERE JSON_VALUE(JSON_QUERY(`json_table`.`cart`, '$.\"name\"')) = 'Alice'" + ) actual_sql = expr.compile(faux_conn).string actual_literal_sql = expr.compile( @@ -118,12 +126,10 @@ def test_json_literal(faux_conn): assert expected_literal_sql == actual_literal_sql -@pytest.mark.parametrize("lax", (False, True)) -def test_json_casts(faux_conn, json_column, json_data, lax): +@pytest.mark.parametrize("lax,prefix", ((False, ""), (True, "LAX_"))) +def test_json_casts(faux_conn, json_column, json_data, lax, prefix): from sqlalchemy_bigquery import JSON - prefix = "LAX_" if lax else "" # FIXME: Manually parameterize - expr = sqlalchemy.select(1).where( json_column[["name"]].as_string(lax=lax) == "Alice" ) @@ -157,13 +163,23 @@ def test_json_casts(faux_conn, json_column, json_data, lax): ) -def test_json_path_mode(faux_conn, json_column): +@pytest.mark.parametrize( + "mode,prefix", ((None, ""), ("LAX", "lax "), ("LAX_RECURSIVE", "lax recursive ")) +) +def test_json_path_mode(faux_conn, json_column, mode, prefix): from sqlalchemy_bigquery import JSON - expr = sqlalchemy.select(json_column[[JSON.JSONPathMode.LAX, "items", "price"]]) + if mode == "LAX": + path = [JSON.JSONPathMode.LAX, "items", "price"] + elif mode == "LAX_RECURSIVE": + path = [JSON.JSONPathMode.LAX_RECURSIVE, "items", "price"] + else: + path = ["items", "price"] + + expr = sqlalchemy.select(json_column[path]) expected_literal_sql = ( - 'SELECT JSON_QUERY(`json_table`.`cart`, \'lax $."items"."price"\') AS `anon_1` \n' + f'SELECT JSON_QUERY(`json_table`.`cart`, \'{prefix}$."items"."price"\') AS `anon_1` \n' "FROM `json_table`" ) actual_literal_sql = expr.compile(