From 550353628d858314423b1156721ad2316d469f28 Mon Sep 17 00:00:00 2001 From: Ansgar Lampe Date: Tue, 17 Jun 2025 17:18:54 +0200 Subject: [PATCH 1/5] [DB-40533] Add support for new type VECTOR(,DOUBLE) Add encode/decode support for new VECTOR datatype. A result column of VECTOR(, DOUBLE) shows as type datatype.VECTOR_DOUBLE in the metadata description. It is returned as datatype.Vector with subtype datatype.Vector.DOUBLE which can be used as a list. For a parameter to be detected as VECTOR(,DOUBLE) it also needs to use the datatype.Vector datatype. Alternatively, binding a string is also possible - which currently includes binding a list of numbers which will then be converted to string. This commit also adds tests for the described behavior. --- pynuodb/datatype.py | 28 ++++++++++++++ pynuodb/encodedsession.py | 62 +++++++++++++++++++++++++++++++ pynuodb/protocol.py | 5 ++- tests/nuodb_types_test.py | 78 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 172 insertions(+), 1 deletion(-) diff --git a/pynuodb/datatype.py b/pynuodb/datatype.py index 510382e..d6c2eda 100644 --- a/pynuodb/datatype.py +++ b/pynuodb/datatype.py @@ -37,6 +37,8 @@ from datetime import datetime as Timestamp, date as Date, time as Time from datetime import timedelta as TimeDelta +from pynuodb import protocol + try: from typing import Tuple, Union # pylint: disable=unused-import except ImportError: @@ -149,10 +151,35 @@ def __cmp__(self, other): return -1 +class Vector(list): + """A specific type for SQL VECTOR(, DOUBLE) + to be able to detect the desired type when binding parameters. + Apart from creating the value as a Vector with subtype + this can be used as a list.""" + DOUBLE = protocol.VECTOR_DOUBLE + + def __init__(self, subtype, *args, **kwargs): + if args: + if subtype != Vector.DOUBLE: + raise TypeError("Vector type only supported for subtype DOUBLE") + + self.subtype = subtype + + # forward the remaining arguments to the list __init__ + super(Vector, self).__init__(*args, **kwargs) + else: + raise TypeError("Vector needs to be initialized with a subtype like Vector.DOUBLE as" + " first argument") + + def getSubtype(self): + return self.subtype + + STRING = TypeObject(str) BINARY = TypeObject(str) NUMBER = TypeObject(int, decimal.Decimal) DATETIME = TypeObject(Timestamp, Date, Time) +VECTOR_DOUBLE = TypeObject(list) ROWID = TypeObject() NULL = TypeObject(None) @@ -179,6 +206,7 @@ def __cmp__(self, other): "timestamp without time zone": DATETIME, "timestamp with time zone": DATETIME, "time without time zone": DATETIME, + "vector double": VECTOR_DOUBLE, # Old types used by NuoDB <2.0.3 "binarystring": BINARY, "binaryvaryingstring": BINARY, diff --git a/pynuodb/encodedsession.py b/pynuodb/encodedsession.py index a271a52..033767d 100644 --- a/pynuodb/encodedsession.py +++ b/pynuodb/encodedsession.py @@ -775,6 +775,30 @@ def putScaledCount2(self, value): self.__output += data return self + def putVectorDouble(self, value): + # type + self.__output.append(protocol.VECTOR) + # subtype + self.__output.append(protocol.VECTOR_DOUBLE) + # length in bytes in count notation, i.e. first + # number of bytes needed for the length, then the + # encoded length + lengthStr = crypt.toByteString(len(value) * 8) + self.__output.append(len(lengthStr)) + self.__output += lengthStr + + # the actual vector: Each value as double in little endian encoding + for val in value: + self.__output += struct.pack(' EncodedSession """Call the supporting function based on the type of the value.""" @@ -806,6 +830,11 @@ def putValue(self, value): # pylint: disable=too-many-return-statements if isinstance(value, bool): return self.putBoolean(value) + # we don't want to autodetect lists as being VECTOR, so we + # only bind double if it is the explicit type + if isinstance(value, datatype.Vector): + return self.putVector(value) + # I find it pretty bogus that we pass str(value) here: why not value? return self.putString(str(value)) @@ -1035,6 +1064,36 @@ def getUUID(self): raise DataError('Not a UUID') + def getVector(self): + # type: () -> list[float] + """Read the next vector off the session. + + :rtype datatype.Vector + """ + if self._getTypeCode() == protocol.VECTOR: + subtype = crypt.fromByteString(self._takeBytes(1)) + if subtype == protocol.VECTOR_DOUBLE: + # VECTOR(, DOUBLE) + lengthBytes = crypt.fromByteString(self._takeBytes(1)) + length = crypt.fromByteString(self._takeBytes(lengthBytes)) + + if length % 8 != 0: + raise DataError("Invalid size for VECTOR DOUBLE data: %d" % (length)) + + dimension = length // 8 + + # VECTOR DOUBLE stores the data as little endian + vector = datatype.Vector(datatype.Vector.DOUBLE, + [struct.unpack(' decimal.Decimal """Read a scaled and signed decimal from the session. @@ -1109,6 +1168,9 @@ def getValue(self): if code == protocol.UUID: return self.getUUID() + + if code == protocol.VECTOR: + return self.getVector() if code == protocol.SCALEDCOUNT2: return self.getScaledCount2() diff --git a/pynuodb/protocol.py b/pynuodb/protocol.py index a0836bb..79e2c65 100644 --- a/pynuodb/protocol.py +++ b/pynuodb/protocol.py @@ -45,7 +45,7 @@ BLOBLEN4 = 193 CLOBLEN0 = 194 CLOBLEN4 = 198 -SCALEDCOUNT1 = 199 +VECTOR = 199 UUID = 200 SCALEDDATELEN0 = 200 SCALEDDATELEN1 = 201 @@ -66,6 +66,9 @@ DEBUGBARRIER = 240 SCALEDTIMESTAMPNOTZ = 241 +# subtypes of the VECTOR type +VECTOR_DOUBLE = 0 + # Protocol Messages FAILURE = 0 OPENDATABASE = 3 diff --git a/tests/nuodb_types_test.py b/tests/nuodb_types_test.py index ea71984..4f4838e 100644 --- a/tests/nuodb_types_test.py +++ b/tests/nuodb_types_test.py @@ -8,6 +8,8 @@ import decimal import datetime +from pynuodb import datatype + from . import nuodb_base @@ -125,3 +127,79 @@ def test_null_type(self): assert len(row) == 1 assert cursor.description[0][1] == null_type assert row[0] is None + + def test_vector_type(self): + con = self._connect() + cursor = con.cursor() + + cursor.execute("CREATE TEMPORARY TABLE tmp (" + " vec3 VECTOR(3, DOUBLE)," + " vec5 VECTOR(5, DOUBLE))") + + cursor.execute("INSERT INTO tmp VALUES (" + " '[1.1,2.2,33.33]'," + " '[-1,2,-3,4,-5]')") + + cursor.execute("SELECT * FROM tmp") + + # check metadata + [name, type, _, _, precision, scale, _] = cursor.description[0] + assert name == "VEC3" + assert type == datatype.VECTOR_DOUBLE + assert precision == 3 + assert scale == 0 + + [name, type, _, _, precision, scale, _] = cursor.description[1] + assert name == "VEC5" + assert type == datatype.VECTOR_DOUBLE + assert precision == 5 + assert scale == 0 + + # check content + row = cursor.fetchone() + assert len(row) == 2 + assert row[0] == [1.1, 2.2, 33.33] + assert row[1] == [-1, 2, -3, 4, -5] + assert cursor.fetchone() is None + + # check this is actually a Vector type, not just a list + assert isinstance(row[0], datatype.Vector) + assert row[0].getSubtype() == datatype.Vector.DOUBLE + assert isinstance(row[1], datatype.Vector) + assert row[1].getSubtype() == datatype.Vector.DOUBLE + + # check prepared parameters + parameters = [datatype.Vector(datatype.Vector.DOUBLE, [11.11, -2.2, 3333.333]), + datatype.Vector(datatype.Vector.DOUBLE, [-1.23, 2.345, -0.34, 4, -5678.9])] + cursor.execute("TRUNCATE TABLE tmp") + cursor.execute("INSERT INTO tmp VALUES (?, ?)", parameters) + + cursor.execute("SELECT * FROM tmp") + + # check content + row = cursor.fetchone() + assert len(row) == 2 + assert row[0] == parameters[0] + assert row[1] == parameters[1] + assert cursor.fetchone() is None + + # check that the inserted values are interpreted correctly by the database + cursor.execute("SELECT CAST(vec3 AS STRING) || ' - ' || CAST(vec5 AS STRING) AS strRep" + " FROM tmp") + + row = cursor.fetchone() + assert len(row) == 1 + assert row[0] == "[11.11,-2.2,3333.333] - [-1.23,2.345,-0.34,4,-5678.9]" + assert cursor.fetchone() is None + + # currently binding a list also works - this is done via implicit string + # conversion of the passed argument in default bind case + parameters = [[11.11, -2.2, 3333.333]] + cursor.execute("SELECT VEC3 = ? FROM tmp", parameters) + + # check content + row = cursor.fetchone() + assert len(row) == 1 + assert row[0] is True + assert cursor.fetchone() is None + From 236287fd20c0ea9daee1fda8f05dbacfbeb8f210 Mon Sep 17 00:00:00 2001 From: Ansgar Lampe Date: Tue, 17 Jun 2025 17:44:50 +0200 Subject: [PATCH 2/5] Fix pylint issues - remove whitespace line - add documentation --- pynuodb/datatype.py | 2 ++ pynuodb/encodedsession.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pynuodb/datatype.py b/pynuodb/datatype.py index d6c2eda..3770c9b 100644 --- a/pynuodb/datatype.py +++ b/pynuodb/datatype.py @@ -172,6 +172,8 @@ def __init__(self, subtype, *args, **kwargs): " first argument") def getSubtype(self): + # type: () -> int + """Returns the subtype of vector this instance holds data for""" return self.subtype diff --git a/pynuodb/encodedsession.py b/pynuodb/encodedsession.py index 033767d..6931be6 100644 --- a/pynuodb/encodedsession.py +++ b/pynuodb/encodedsession.py @@ -776,7 +776,11 @@ def putScaledCount2(self, value): return self def putVectorDouble(self, value): - # type + # type: (datatype.Vector) -> EncodedSession + """Append a Vector with subtype Vector.DOUBLE to the message. + + :type value: datatype.Vector + """ self.__output.append(protocol.VECTOR) # subtype self.__output.append(protocol.VECTOR_DOUBLE) @@ -794,6 +798,11 @@ def putVectorDouble(self, value): return self def putVector(self, value): + # type: (datatype.Vector) -> EncodedSession + """Append a Vector type to the message. + + :type value: datatype.Vector + """ if value.getSubtype() == datatype.Vector.DOUBLE: return self.putVectorDouble(value) @@ -1065,7 +1074,7 @@ def getUUID(self): raise DataError('Not a UUID') def getVector(self): - # type: () -> list[float] + # type: () -> datatype.Vector """Read the next vector off the session. :rtype datatype.Vector @@ -1168,7 +1177,7 @@ def getValue(self): if code == protocol.UUID: return self.getUUID() - + if code == protocol.VECTOR: return self.getVector() From d48573e9a24f519881733100cde98690d0bf9fdf Mon Sep 17 00:00:00 2001 From: Ansgar Lampe Date: Mon, 23 Jun 2025 13:54:08 +0200 Subject: [PATCH 3/5] Don't execute VECTOR tests against versions pre 8.0 --- tests/nuodb_types_test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/nuodb_types_test.py b/tests/nuodb_types_test.py index 4f4838e..2779991 100644 --- a/tests/nuodb_types_test.py +++ b/tests/nuodb_types_test.py @@ -131,6 +131,14 @@ def test_null_type(self): def test_vector_type(self): con = self._connect() cursor = con.cursor() + + # only activate this tests if tested against version 8 or above + cursor.execute("select cast(substring_index(release_ver, '.', 1) as int)" + " from system.nodes limit 1") + row = cursor.fetchone() + database_major_version = row[0] + if database_major_version < 8: + return cursor.execute("CREATE TEMPORARY TABLE tmp (" " vec3 VECTOR(3, DOUBLE)," From cc38dad89fd84b78d2f59940dc357d58605e1d94 Mon Sep 17 00:00:00 2001 From: Ansgar Lampe Date: Mon, 30 Jun 2025 14:13:23 +0200 Subject: [PATCH 4/5] Address review comments - correct documentation for VECTOR type - check system version using GETEFFECTIVEPLATFORMVERSION() - add system version information to separate field in test class --- pynuodb/datatype.py | 2 +- tests/conftest.py | 13 ++++++++++++- tests/nuodb_base.py | 6 ++++-- tests/nuodb_types_test.py | 8 ++------ 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pynuodb/datatype.py b/pynuodb/datatype.py index 3770c9b..c56afd9 100644 --- a/pynuodb/datatype.py +++ b/pynuodb/datatype.py @@ -152,7 +152,7 @@ def __cmp__(self, other): class Vector(list): - """A specific type for SQL VECTOR(, DOUBLE) + """A specific type for SQL VECTOR(, ) to be able to detect the desired type when binding parameters. Apart from creating the value as a Vector with subtype this can be used as a list.""" diff --git a/tests/conftest.py b/tests/conftest.py index 98da5dd..86fc17b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -299,10 +299,21 @@ def database(ap, db, te): 'user': db[1], 'password': db[2], 'options': {'schema': 'test'}} # type: DATABASE_FIXTURE + system_information = {'effective_version': 0} + try: while True: try: conn = pynuodb.connect(**connect_args) + cursor = conn.cursor() + try: + cursor.execute("select GETEFFECTIVEPLATFORMVERSION() from system.dual") + row = cursor.fetchone() + effective_version = row[0] + system_information['effective_version'] = effective_version + finally: + cursor.close() + break except pynuodb.session.SessionException: pass @@ -315,4 +326,4 @@ def database(ap, db, te): _log.info("Database %s is available", db[0]) - return connect_args + return {'connect_args': connect_args, 'system_information': system_information} diff --git a/tests/nuodb_base.py b/tests/nuodb_base.py index 0c4e997..cd43846 100644 --- a/tests/nuodb_base.py +++ b/tests/nuodb_base.py @@ -25,6 +25,7 @@ class NuoBase(object): driver = pynuodb # type: Any connect_args = () + system_information = () host = None lower_func = 'lower' # For stored procedure test @@ -32,10 +33,11 @@ class NuoBase(object): @pytest.fixture(autouse=True) def _setup(self, database): # Preserve the options we'll need to create a connection to the DB - self.connect_args = database + self.connect_args = database['connect_args'] + self.system_information = database['system_information'] # Verify the database is up and has a running TE - dbname = database['database'] + dbname = database['connect_args']['database'] (ret, out) = nuocmd(['--show-json', 'get', 'processes', '--db-name', dbname], logout=False) assert ret == 0, "DB not running: %s" % (out) diff --git a/tests/nuodb_types_test.py b/tests/nuodb_types_test.py index 2779991..76da838 100644 --- a/tests/nuodb_types_test.py +++ b/tests/nuodb_types_test.py @@ -131,13 +131,9 @@ def test_null_type(self): def test_vector_type(self): con = self._connect() cursor = con.cursor() - + # only activate this tests if tested against version 8 or above - cursor.execute("select cast(substring_index(release_ver, '.', 1) as int)" - " from system.nodes limit 1") - row = cursor.fetchone() - database_major_version = row[0] - if database_major_version < 8: + if self.system_information['effective_version'] < 1835008: return cursor.execute("CREATE TEMPORARY TABLE tmp (" From cae9e9ac2771e4f5d44f3e8d2884b0ed3441f408 Mon Sep 17 00:00:00 2001 From: Ansgar Lampe Date: Tue, 1 Jul 2025 09:24:17 +0200 Subject: [PATCH 5/5] Add Vector to datatype.__all__ and changed some minor things --- pynuodb/datatype.py | 4 ++-- tests/conftest.py | 3 +-- tests/nuodb_base.py | 2 +- tests/nuodb_types_test.py | 18 +++++++++--------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pynuodb/datatype.py b/pynuodb/datatype.py index 79282f7..e69a0ac 100644 --- a/pynuodb/datatype.py +++ b/pynuodb/datatype.py @@ -27,8 +27,8 @@ __all__ = ['Date', 'Time', 'Timestamp', 'DateFromTicks', 'TimeFromTicks', 'TimestampFromTicks', 'DateToTicks', 'TimeToTicks', - 'TimestampToTicks', 'Binary', 'STRING', 'BINARY', 'NUMBER', - 'DATETIME', 'ROWID', 'TypeObjectFromNuodb'] + 'TimestampToTicks', 'Binary', 'Vector', 'STRING', 'BINARY', 'NUMBER', + 'DATETIME', 'ROWID', 'VECTOR_DOUBLE', 'TypeObjectFromNuodb'] import sys import decimal diff --git a/tests/conftest.py b/tests/conftest.py index 86fc17b..fb7cd90 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -309,8 +309,7 @@ def database(ap, db, te): try: cursor.execute("select GETEFFECTIVEPLATFORMVERSION() from system.dual") row = cursor.fetchone() - effective_version = row[0] - system_information['effective_version'] = effective_version + system_information['effective_version'] = row[0] finally: cursor.close() diff --git a/tests/nuodb_base.py b/tests/nuodb_base.py index cd43846..a3959dd 100644 --- a/tests/nuodb_base.py +++ b/tests/nuodb_base.py @@ -37,7 +37,7 @@ def _setup(self, database): self.system_information = database['system_information'] # Verify the database is up and has a running TE - dbname = database['connect_args']['database'] + dbname = self.connect_args['database'] (ret, out) = nuocmd(['--show-json', 'get', 'processes', '--db-name', dbname], logout=False) assert ret == 0, "DB not running: %s" % (out) diff --git a/tests/nuodb_types_test.py b/tests/nuodb_types_test.py index 5cd1336..927a688 100644 --- a/tests/nuodb_types_test.py +++ b/tests/nuodb_types_test.py @@ -8,7 +8,7 @@ import decimal import datetime -from pynuodb import datatype +import pynuodb from . import nuodb_base from .mock_tzs import localize @@ -151,13 +151,13 @@ def test_vector_type(self): # check metadata [name, type, _, _, precision, scale, _] = cursor.description[0] assert name == "VEC3" - assert type == datatype.VECTOR_DOUBLE + assert type == pynuodb.VECTOR_DOUBLE assert precision == 3 assert scale == 0 [name, type, _, _, precision, scale, _] = cursor.description[1] assert name == "VEC5" - assert type == datatype.VECTOR_DOUBLE + assert type == pynuodb.VECTOR_DOUBLE assert precision == 5 assert scale == 0 @@ -169,14 +169,14 @@ def test_vector_type(self): assert cursor.fetchone() is None # check this is actually a Vector type, not just a list - assert isinstance(row[0], datatype.Vector) - assert row[0].getSubtype() == datatype.Vector.DOUBLE - assert isinstance(row[1], datatype.Vector) - assert row[1].getSubtype() == datatype.Vector.DOUBLE + assert isinstance(row[0], pynuodb.Vector) + assert row[0].getSubtype() == pynuodb.Vector.DOUBLE + assert isinstance(row[1], pynuodb.Vector) + assert row[1].getSubtype() == pynuodb.Vector.DOUBLE # check prepared parameters - parameters = [datatype.Vector(datatype.Vector.DOUBLE, [11.11, -2.2, 3333.333]), - datatype.Vector(datatype.Vector.DOUBLE, [-1.23, 2.345, -0.34, 4, -5678.9])] + parameters = [pynuodb.Vector(pynuodb.Vector.DOUBLE, [11.11, -2.2, 3333.333]), + pynuodb.Vector(pynuodb.Vector.DOUBLE, [-1.23, 2.345, -0.34, 4, -5678.9])] cursor.execute("TRUNCATE TABLE tmp") cursor.execute("INSERT INTO tmp VALUES (?, ?)", parameters)