diff --git a/c-questdb-client b/c-questdb-client index f97c4cb..14fc19f 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit f97c4cb164ef357560a92c8681438df5f7452bb6 +Subproject commit 14fc19f382ada9d58ce18884e7f42b10bbb36784 diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml index 6e6aef5..6e5b156 100644 --- a/ci/cibuildwheel.yaml +++ b/ci/cibuildwheel.yaml @@ -144,7 +144,7 @@ stages: inputs: {pathtoPublish: 'wheelhouse'} - job: macos_x64 - pool: {vmImage: 'macOS-12'} + pool: {vmImage: 'macOS-13'} timeoutInMinutes: 90 steps: - task: UsePythonVersion@0 diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index cc642c0..b75b6f5 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -345,11 +345,11 @@ class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(bytes(buffer))``. """ - def __str__(self) -> str: - """Return the constructed buffer as a string. Use for debugging.""" + def __bytes__(self) -> bytes: + """Return the constructed buffer as bytes. Use for debugging.""" def row( self, @@ -941,20 +941,20 @@ class Sender: def __enter__(self) -> Sender: """Call :func:`Sender.establish` at the start of a ``with`` block.""" - def __str__(self) -> str: + def __len__(self) -> int: """ - Inspect the contents of the internal buffer. - - The ``str`` value returned represents the unsent data. + Number of bytes of unsent data in the internal buffer. - Also see :func:`Sender.__len__`. + Equivalent (but cheaper) to ``len(bytes(sender))``. """ - def __len__(self) -> int: + def __bytes__(self) -> bytes: """ - Number of bytes of unsent data in the internal buffer. + Inspect the contents of the internal buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + The ``bytes`` value returned represents the unsent data. + + Also see :func:`Sender.__len__`. """ def transaction(self, table_name: str) -> SenderTransaction: diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 51f263d..3621368 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -78,6 +78,7 @@ from enum import Enum from typing import List, Tuple, Dict, Union, Any, Optional, Callable, \ Iterable import pathlib +from cpython.bytes cimport PyBytes_FromStringAndSize import sys import os @@ -825,18 +826,17 @@ cdef class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(bytes(buffer))``. """ return line_sender_buffer_size(self._impl) - def __str__(self) -> str: - """Return the constructed buffer as a string. Use for debugging.""" - return self._to_str() + def __bytes__(self) -> bytes: + """Return the constructed buffer as bytes. Use for debugging.""" + return self._to_bytes() - cdef inline object _to_str(self): - cdef size_t size = 0 - cdef const char* utf8 = line_sender_buffer_peek(self._impl, &size) - return PyUnicode_FromStringAndSize(utf8, size) + cdef inline object _to_bytes(self): + cdef line_sender_buffer_view view = line_sender_buffer_peek(self._impl) + return PyBytes_FromStringAndSize( view.buf, view.len) cdef inline void_int _set_marker(self) except -1: cdef line_sender_error* err = NULL @@ -2281,21 +2281,21 @@ cdef class Sender: self.establish() return self - def __str__(self) -> str: + def __bytes__(self) -> bytes: """ Inspect the contents of the internal buffer. - The ``str`` value returned represents the unsent data. + The ``bytes`` value returned represents the unsent data. Also see :func:`Sender.__len__`. """ - return str(self._buffer) + return bytes(self._buffer) def __len__(self) -> int: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(bytes(sender))``. """ return len(self._buffer) diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index 50490ab..d5759fa 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -22,7 +22,7 @@ ## ################################################################################ -from libc.stdint cimport int64_t, uint16_t, uint64_t +from libc.stdint cimport int64_t, uint16_t, uint64_t, uint8_t cdef extern from "questdb/ingress/line_sender.h": cdef struct line_sender_error: @@ -102,6 +102,10 @@ cdef extern from "questdb/ingress/line_sender.h": size_t len const char* buf + cdef struct line_sender_buffer_view: + size_t len + const uint8_t* buf + bint line_sender_column_name_init( line_sender_column_name* name, size_t len, @@ -171,9 +175,8 @@ cdef extern from "questdb/ingress/line_sender.h": const line_sender_buffer* buffer ) noexcept nogil - const char* line_sender_buffer_peek( - const line_sender_buffer* buffer, - size_t* len_out + line_sender_buffer_view line_sender_buffer_peek( + const line_sender_buffer* buffer ) noexcept nogil bint line_sender_buffer_table( diff --git a/test/test.py b/test/test.py index a5ee870..03a2212 100755 --- a/test/test.py +++ b/test/test.py @@ -76,7 +76,7 @@ def test_basic(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) self.assertEqual(len(buf), 25) - self.assertEqual(str(buf), 'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(bytes(buf), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_table(self): buf = qi.Buffer() @@ -92,7 +92,7 @@ def test_bad_table(self): def test_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), 'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(bytes(buf), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_symbol_column_name(self): buf = qi.Buffer() @@ -121,38 +121,38 @@ def test_column(self): 'col7': two_h_after_epoch, 'col8': None}, at=qi.ServerTimestamp) exp = ( - 'tbl1 col1=t,col2=f,col3=-1i,col4=0.5,' - 'col5="val",col6=12345t,col7=7200000000t\n') - self.assertEqual(str(buf), exp) + b'tbl1 col1=t,col2=f,col3=-1i,col4=0.5,' + b'col5="val",col6=12345t,col7=7200000000t\n') + self.assertEqual(bytes(buf), exp) def test_none_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': None}, at=qi.ServerTimestamp) - exp = 'tbl1,sym1=val1\n' - self.assertEqual(str(buf), exp) + exp = b'tbl1,sym1=val1\n' + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', symbols={'sym1': None, 'sym2': None}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) def test_none_column(self): buf = qi.Buffer() buf.row('tbl1', columns={'col1': 1}, at=qi.ServerTimestamp) - exp = 'tbl1 col1=1i\n' - self.assertEqual(str(buf), exp) + exp = b'tbl1 col1=1i\n' + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', columns={'col1': None, 'col2': None}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) def test_no_symbol_or_col_args(self): buf = qi.Buffer() buf.row('table_name', at=qi.ServerTimestamp) - self.assertEqual(str(buf), '') + self.assertEqual(bytes(buf), b'') def test_unicode(self): buf = qi.Buffer() @@ -171,15 +171,15 @@ def test_unicode(self): 'questdb2': '嚜꓂', # UCS-2, 3 bytes for UTF-8. 'questdb3': '💩🦞'}, at=qi.ServerTimestamp) # UCS-4, 4 bytes for UTF-8. - self.assertEqual(str(buf), - f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' + + self.assertEqual(bytes(buf), + (f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' + 'tbl1,Questo\\ è\\ il\\ nome\\ di\\ una\\ colonna=' + 'Це\\ символьне\\ значення ' + - 'questdb1="",questdb2="嚜꓂",questdb3="💩🦞"\n') + 'questdb1="",questdb2="嚜꓂",questdb3="💩🦞"\n').encode('utf-8')) buf.clear() buf.row('tbl1', symbols={'questdb1': 'q❤️p'}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), 'tbl1,questdb1=q❤️p\n') + self.assertEqual(bytes(buf), 'tbl1,questdb1=q❤️p\n'.encode('utf-8')) # A bad char in Python. with self.assertRaisesRegex( @@ -191,30 +191,30 @@ def test_unicode(self): # Ensure we can continue using the buffer after an error. buf.row('tbl1', symbols={'questdb1': 'another line of input'}, at=qi.ServerTimestamp) self.assertEqual( - str(buf), - 'tbl1,questdb1=q❤️p\n' + + bytes(buf), + ('tbl1,questdb1=q❤️p\n' + # Note: No partially written failed line here. - 'tbl1,questdb1=another\\ line\\ of\\ input\n') + 'tbl1,questdb1=another\\ line\\ of\\ input\n').encode('utf-8')) def test_float(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 1.2345678901234567}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 num=1.2345678901234567\n') + self.assertEqual(bytes(buf), f'tbl1 num=1.2345678901234567\n'.encode('utf-8')) def test_int_range(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 0}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 num=0i\n') + self.assertEqual(bytes(buf), f'tbl1 num=0i\n'.encode('utf-8')) buf.clear() # 32-bit int range. buf.row('tbl1', columns={'min': -2 ** 31, 'max': 2 ** 31 - 1}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 min=-2147483648i,max=2147483647i\n') + self.assertEqual(bytes(buf), f'tbl1 min=-2147483648i,max=2147483647i\n'.encode('utf-8')) buf.clear() # 64-bit int range. buf.row('tbl1', columns={'min': -2 ** 63, 'max': 2 ** 63 - 1}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n') + self.assertEqual(bytes(buf), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n'.encode('utf-8')) buf.clear() # Overflow. @@ -356,9 +356,9 @@ def test_flush_1(self): server.accept() with self.assertRaisesRegex(qi.IngressError, 'Column names'): sender.row('tbl1', symbols={'...bad name..': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(str(sender), '') + self.assertEqual(bytes(sender), b'') sender.flush() - self.assertEqual(str(sender), '') + self.assertEqual(bytes(sender), b'') msgs = server.recv() self.assertEqual(msgs, []) @@ -423,7 +423,7 @@ def test_two_rows_explicit_buffer(self): exp = ( 'line_sender_buffer_example2,id=Hola price="111222233333i",qty=3.5 111222233333\n' 'line_sender_example,id=Adios price="111222233343i",qty=2.5 111222233343\n') - self.assertEqual(str(buffer), exp) + self.assertEqual(bytes(buffer), exp.encode('utf-8')) sender.flush(buffer) msgs = server.recv() bexp = [msg.encode('utf-8') for msg in exp.rstrip().split('\n')] @@ -432,9 +432,8 @@ def test_two_rows_explicit_buffer(self): def test_independent_buffer(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) - exp = 'tbl1,sym1=val1\n' - bexp = exp[:-1].encode('utf-8') - self.assertEqual(str(buf), exp) + exp = b'tbl1,sym1=val1\n' + self.assertEqual(bytes(buf), exp) with Server() as server1, Server() as server2: with self.builder('tcp', 'localhost', server1.port) as sender1, \ @@ -443,21 +442,21 @@ def test_independent_buffer(self): server2.accept() sender1.flush(buf, clear=False) - self.assertEqual(str(buf), exp) + self.assertEqual(bytes(buf), exp) sender2.flush(buf, clear=False) - self.assertEqual(str(buf), exp) + self.assertEqual(bytes(buf), exp) msgs1 = server1.recv() msgs2 = server2.recv() - self.assertEqual(msgs1, [bexp]) - self.assertEqual(msgs2, [bexp]) + self.assertEqual(msgs1, [exp[:-1]]) + self.assertEqual(msgs2, [exp[:-1]]) sender1.flush(buf) - self.assertEqual(server1.recv(), [bexp]) + self.assertEqual(server1.recv(), [exp[:-1]]) # The buffer is now auto-cleared. - self.assertEqual(str(buf), '') + self.assertEqual(bytes(buf), b'') def test_auto_flush_settings_defaults(self): for protocol in ('tcp', 'tcps', 'http', 'https'): @@ -560,7 +559,7 @@ def test_dont_flush_on_exception(self): with self.builder('tcp', 'localhost', server.port) as sender: server.accept() sender.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(str(sender), 'tbl1,sym1=val1\n') + self.assertEqual(bytes(sender), b'tbl1,sym1=val1\n') raise RuntimeError('Test exception') msgs = server.recv() self.assertEqual(msgs, []) diff --git a/test/test_dataframe.py b/test/test_dataframe.py index cbd082e..baa5b97 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -35,7 +35,7 @@ def _dataframe(*args, **kwargs): buf = qi.Buffer() buf.dataframe(*args, **kwargs) - return str(buf) + return bytes(buf) DF1 = pd.DataFrame({ 'A': [1.0, 2.0, 3.0], @@ -165,17 +165,17 @@ def test_bad_at(self): def test_empty_dataframe(self): buf = _dataframe(pd.DataFrame(), table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_zero_row_dataframe(self): buf = _dataframe(pd.DataFrame(columns=['A', 'B']), table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_zero_column_dataframe(self): df = pd.DataFrame(index=[0, 1, 2]) self.assertEqual(len(df), 3) buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_basic(self): buf = _dataframe( @@ -185,9 +185,9 @@ def test_basic(self): at=-1) self.assertEqual( buf, - 't1,A=a1,B=b1,C=b1,D=a1 E=1.0,F=1i 1520640000000000000\n' + - 't2,A=a2,D=a2 E=2.0,F=2i 1520726400000000000\n' + - 't1,A=a3,B=b3,C=b3,D=a3 E=3.0,F=3i 1520812800000000000\n') + b't1,A=a1,B=b1,C=b1,D=a1 E=1.0,F=1i 1520640000000000000\n' + + b't2,A=a2,D=a2 E=2.0,F=2i 1520726400000000000\n' + + b't1,A=a3,B=b3,C=b3,D=a3 E=3.0,F=3i 1520812800000000000\n') def test_named_dataframe(self): df = pd.DataFrame({ @@ -197,23 +197,23 @@ def test_named_dataframe(self): buf = _dataframe(df, at=qi.ServerTimestamp) self.assertEqual( buf, - 'table_name a=1i,b="a"\n' + - 'table_name a=2i,b="b"\n' + - 'table_name a=3i,b="c"\n') + b'table_name a=1i,b="a"\n' + + b'table_name a=2i,b="b"\n' + + b'table_name a=3i,b="c"\n') buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n') buf = _dataframe(df, table_name_col='b', at=qi.ServerTimestamp) self.assertEqual( buf, - 'a a=1i\n' + - 'b a=2i\n' + - 'c a=3i\n') + b'a a=1i\n' + + b'b a=2i\n' + + b'c a=3i\n') df.index.name = 42 # bad type, not str with self.assertRaisesRegex(qi.IngressError, @@ -244,9 +244,9 @@ def test_at_good(self): buf = _dataframe(df, table_name='tbl1', at=ts) self.assertEqual( buf, - 'tbl1 a=1i,b="a" 1520640000000000000\n' + - 'tbl1 a=2i,b="b" 1520640000000000000\n' + - 'tbl1 a=3i,b="c" 1520640000000000000\n') + b'tbl1 a=1i,b="a" 1520640000000000000\n' + + b'tbl1 a=2i,b="b" 1520640000000000000\n' + + b'tbl1 a=3i,b="c" 1520640000000000000\n') @unittest.skipIf(BROKEN_TIMEZONES, 'requires accurate timezones') def test_at_neg(self): @@ -281,9 +281,9 @@ def test_at_ts_0(self): buf = _dataframe(df, table_name='tbl1', at=ts) self.assertEqual( buf, - 'tbl1 a=1i,b="a" 0\n' + - 'tbl1 a=2i,b="b" 0\n' + - 'tbl1 a=3i,b="c" 0\n') + b'tbl1 a=1i,b="a" 0\n' + + b'tbl1 a=2i,b="b" 0\n' + + b'tbl1 a=3i,b="c" 0\n') def test_single_at_col(self): df = pd.DataFrame({'timestamp': pd.to_datetime(['2023-01-01'])}) @@ -306,11 +306,11 @@ def test_u8_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=255i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=255i\n') def test_i8_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -321,12 +321,12 @@ def test_i8_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-128i\n' + - 'tbl1 a=127i\n' + - 'tbl1 a=0i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-128i\n' + + b'tbl1 a=127i\n' + + b'tbl1 a=0i\n') def test_u16_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -337,11 +337,11 @@ def test_u16_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=65535i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=65535i\n') def test_i16_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -352,12 +352,12 @@ def test_i16_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-32768i\n' + - 'tbl1 a=32767i\n' + - 'tbl1 a=0i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-32768i\n' + + b'tbl1 a=32767i\n' + + b'tbl1 a=0i\n') def test_u32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -368,11 +368,11 @@ def test_u32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=4294967295i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=4294967295i\n') def test_i32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -384,12 +384,12 @@ def test_i32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-2147483648i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=2147483647i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-2147483648i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=2147483647i\n') def test_u64_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -400,20 +400,20 @@ def test_u64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=9223372036854775807i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=9223372036854775807i\n') buf = qi.Buffer() buf.dataframe(pd.DataFrame({'b': [.5, 1.0, 1.5]}), table_name='tbl2', at=qi.ServerTimestamp) exp1 = ( - 'tbl2 b=0.5\n' + - 'tbl2 b=1.0\n' + - 'tbl2 b=1.5\n') + b'tbl2 b=0.5\n' + + b'tbl2 b=1.0\n' + + b'tbl2 b=1.5\n') self.assertEqual( - str(buf), + bytes(buf), exp1) df2 = pd.DataFrame({'a': pd.Series([ 1, 2, 3, @@ -426,7 +426,7 @@ def test_u64_numpy_col(self): buf.dataframe(df2, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( - str(buf), + bytes(buf), exp1) # No partial write of `df2`. def test_i64_numpy_col(self): @@ -439,12 +439,12 @@ def test_i64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-9223372036854775808i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=9223372036854775807i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-9223372036854775808i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=9223372036854775807i\n') def test_f32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -458,14 +458,14 @@ def test_f32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0\n' + - 'tbl1 a=2.0\n' + - 'tbl1 a=3.0\n' + - 'tbl1 a=0.0\n' + - 'tbl1 a=Infinity\n' + - 'tbl1 a=-Infinity\n' + - 'tbl1 a=NaN\n' + - 'tbl1 a=3.4028234663852886e38\n') + b'tbl1 a=1.0\n' + + b'tbl1 a=2.0\n' + + b'tbl1 a=3.0\n' + + b'tbl1 a=0.0\n' + + b'tbl1 a=Infinity\n' + + b'tbl1 a=-Infinity\n' + + b'tbl1 a=NaN\n' + + b'tbl1 a=3.4028234663852886e38\n') def test_f64_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -479,14 +479,14 @@ def test_f64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0\n' + - 'tbl1 a=2.0\n' + - 'tbl1 a=3.0\n' + - 'tbl1 a=0.0\n' + - 'tbl1 a=Infinity\n' + - 'tbl1 a=-Infinity\n' + - 'tbl1 a=NaN\n' + - 'tbl1 a=1.7976931348623157e308\n') + b'tbl1 a=1.0\n' + + b'tbl1 a=2.0\n' + + b'tbl1 a=3.0\n' + + b'tbl1 a=0.0\n' + + b'tbl1 a=Infinity\n' + + b'tbl1 a=-Infinity\n' + + b'tbl1 a=NaN\n' + + b'tbl1 a=1.7976931348623157e308\n') def test_u8_arrow_col(self): df = pd.DataFrame({ @@ -500,12 +500,12 @@ def test_u8_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=255i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=255i,b="f"\n') def test_i8_arrow_col(self): df = pd.DataFrame({ @@ -520,13 +520,13 @@ def test_i8_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-128i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=127i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-128i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=127i,b="g"\n') def test_u16_arrow_col(self): df = pd.DataFrame({ @@ -540,12 +540,12 @@ def test_u16_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + + ('tbl1 a=1i,b="a"\n' + 'tbl1 a=2i,b="b"\n' + 'tbl1 a=3i,b="c"\n' + 'tbl1 a=0i,b="d"\n' + 'tbl1 b="e"\n' + - 'tbl1 a=65535i,b="f"\n') + 'tbl1 a=65535i,b="f"\n').encode('utf-8')) def test_i16_arrow_col(self): df = pd.DataFrame({ @@ -560,13 +560,13 @@ def test_i16_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-32768i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=32767i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-32768i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=32767i,b="g"\n') def test_u32_arrow_col(self): df = pd.DataFrame({ @@ -580,12 +580,12 @@ def test_u32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=4294967295i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=4294967295i,b="f"\n') def test_i32_arrow_col(self): df = pd.DataFrame({ @@ -600,13 +600,13 @@ def test_i32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-2147483648i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=2147483647i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-2147483648i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=2147483647i,b="g"\n') def test_u64_arrow_col(self): df = pd.DataFrame({ @@ -620,12 +620,12 @@ def test_u64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=9223372036854775807i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=9223372036854775807i,b="f"\n') df2 = pd.DataFrame({'a': pd.Series([ 1, 2, 3, @@ -650,13 +650,13 @@ def test_i64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-9223372036854775808i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=9223372036854775807i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-9223372036854775808i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=9223372036854775807i,b="g"\n') def test_f32_arrow_col(self): df = pd.DataFrame({ @@ -673,15 +673,15 @@ def test_f32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0,b="a"\n' + - 'tbl1 a=2.0,b="b"\n' + - 'tbl1 a=3.0,b="c"\n' + - 'tbl1 a=0.0,b="d"\n' + - 'tbl1 a=Infinity,b="e"\n' + - 'tbl1 a=-Infinity,b="f"\n' + - 'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - 'tbl1 a=3.4028234663852886e38,b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1.0,b="a"\n' + + b'tbl1 a=2.0,b="b"\n' + + b'tbl1 a=3.0,b="c"\n' + + b'tbl1 a=0.0,b="d"\n' + + b'tbl1 a=Infinity,b="e"\n' + + b'tbl1 a=-Infinity,b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a=3.4028234663852886e38,b="h"\n' + + b'tbl1 b="i"\n') def test_f64_arrow_col(self): df = pd.DataFrame({ @@ -698,15 +698,15 @@ def test_f64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0,b="a"\n' + - 'tbl1 a=2.0,b="b"\n' + - 'tbl1 a=3.0,b="c"\n' + - 'tbl1 a=0.0,b="d"\n' + - 'tbl1 a=Infinity,b="e"\n' + - 'tbl1 a=-Infinity,b="f"\n' + - 'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - 'tbl1 a=1.7976931348623157e308,b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1.0,b="a"\n' + + b'tbl1 a=2.0,b="b"\n' + + b'tbl1 a=3.0,b="c"\n' + + b'tbl1 a=0.0,b="d"\n' + + b'tbl1 a=Infinity,b="e"\n' + + b'tbl1 a=-Infinity,b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a=1.7976931348623157e308,b="h"\n' + + b'tbl1 b="i"\n') def test_bool_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -716,12 +716,12 @@ def test_bool_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') def test_bool_arrow_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -733,18 +733,18 @@ def test_bool_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=t\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n') df2 = pd.DataFrame({'a': pd.Series([ True, False, False, @@ -763,12 +763,12 @@ def test_bool_obj_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') df2 = pd.DataFrame({'a': pd.Series([ True, False, 'false'], @@ -803,15 +803,15 @@ def test_datetime64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1546300800000000t,b="a"\n' + - 'tbl1 a=1546300801000000t,b="b"\n' + - 'tbl1 a=1546300802000000t,b="c"\n' + - 'tbl1 a=1546300803000000t,b="d"\n' + - 'tbl1 a=1546300804000000t,b="e"\n' + - 'tbl1 a=1546300805000000t,b="f"\n' + - 'tbl1 b="g"\n' + - 'tbl1 b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1546300800000000t,b="a"\n' + + b'tbl1 a=1546300801000000t,b="b"\n' + + b'tbl1 a=1546300802000000t,b="c"\n' + + b'tbl1 a=1546300803000000t,b="d"\n' + + b'tbl1 a=1546300804000000t,b="e"\n' + + b'tbl1 a=1546300805000000t,b="f"\n' + + b'tbl1 b="g"\n' + + b'tbl1 b="h"\n' + + b'tbl1 b="i"\n') df = pd.DataFrame({'a': pd.Series([ pd.Timestamp('1970-01-01 00:00:00'), @@ -820,9 +820,9 @@ def test_datetime64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=0t\n' + - 'tbl1 a=1000000t\n' + - 'tbl1 a=2000000t\n') + b'tbl1 a=0t\n' + + b'tbl1 a=1000000t\n' + + b'tbl1 a=2000000t\n') def test_datetime64_tz_arrow_col(self): df = pd.DataFrame({ @@ -842,10 +842,10 @@ def test_datetime64_tz_arrow_col(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 a=1546318800000000t\n' + - 'tbl1,b=sym2 a=1546318801000000t\n' + - 'tbl1,b=sym3\n' + - 'tbl1,b=sym4 a=1546318803000000t\n') + b'tbl1,b=sym1 a=1546318800000000t\n' + + b'tbl1,b=sym2 a=1546318801000000t\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 a=1546318803000000t\n') # Not epoch 0. df = pd.DataFrame({ @@ -864,9 +864,9 @@ def test_datetime64_tz_arrow_col(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 a=18000000000t\n' + - 'tbl1,b=sym2 a=18001000000t\n' + - 'tbl1,b=sym3 a=18002000000t\n') + b'tbl1,b=sym1 a=18000000000t\n' + + b'tbl1,b=sym2 a=18001000000t\n' + + b'tbl1,b=sym3 a=18002000000t\n') # Actual epoch 0. df = pd.DataFrame({ @@ -884,9 +884,9 @@ def test_datetime64_tz_arrow_col(self): buf = _dataframe(df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,b=sym1 a=0t\n' + - 'tbl1,b=sym2 a=1000000t\n' + - 'tbl1,b=sym3 a=2000000t\n') + b'tbl1,b=sym1 a=0t\n' + + b'tbl1,b=sym2 a=1000000t\n' + + b'tbl1,b=sym3 a=2000000t\n') df2 = pd.DataFrame({ 'a': [ @@ -900,8 +900,8 @@ def test_datetime64_tz_arrow_col(self): # Mostly, here assert that negative timestamps are allowed. self.assertIn( buf, - ['tbl1,b=sym1 a=-2208970800000000t\n', - 'tbl1,b=sym1 a=-2208971040000000t\n']) + [b'tbl1,b=sym1 a=-2208970800000000t\n', + b'tbl1,b=sym1 a=-2208971040000000t\n']) def test_datetime64_numpy_at(self): df = pd.DataFrame({ @@ -920,15 +920,15 @@ def test_datetime64_numpy_at(self): buf = _dataframe(df, table_name='tbl1', at='a') self.assertEqual( buf, - 'tbl1 b=1i 1546300800000000000\n' + - 'tbl1 b=2i 1546300801000000000\n' + - 'tbl1 b=3i 1546300802000000000\n' + - 'tbl1 b=4i 1546300803000000000\n' + - 'tbl1 b=5i 1546300804000000000\n' + - 'tbl1 b=6i 1546300805000000000\n' + - 'tbl1 b=7i\n' + - 'tbl1 b=8i\n' + - 'tbl1 b=9i\n') + b'tbl1 b=1i 1546300800000000000\n' + + b'tbl1 b=2i 1546300801000000000\n' + + b'tbl1 b=3i 1546300802000000000\n' + + b'tbl1 b=4i 1546300803000000000\n' + + b'tbl1 b=5i 1546300804000000000\n' + + b'tbl1 b=6i 1546300805000000000\n' + + b'tbl1 b=7i\n' + + b'tbl1 b=8i\n' + + b'tbl1 b=9i\n') df = pd.DataFrame({ 'a': pd.Series([ @@ -940,9 +940,9 @@ def test_datetime64_numpy_at(self): buf = _dataframe(df, table_name='tbl1', at='a') self.assertEqual( buf, - 'tbl1 b=1i 0\n' + - 'tbl1 b=2i 1000000000\n' + - 'tbl1 b=3i 2000000000\n') + b'tbl1 b=1i 0\n' + + b'tbl1 b=2i 1000000000\n' + + b'tbl1 b=3i 2000000000\n') def test_datetime64_tz_arrow_at(self): df = pd.DataFrame({ @@ -962,10 +962,10 @@ def test_datetime64_tz_arrow_at(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 1546318800000000000\n' + - 'tbl1,b=sym2 1546318801000000000\n' + - 'tbl1,b=sym3\n' + - 'tbl1,b=sym4 1546318803000000000\n') + b'tbl1,b=sym1 1546318800000000000\n' + + b'tbl1,b=sym2 1546318801000000000\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 1546318803000000000\n') df2 = pd.DataFrame({ 'a': [ @@ -991,11 +991,11 @@ def _test_pyobjstr_table(self, dtype): buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) self.assertEqual( buf, - 'a b=1i\n' + + ('a b=1i\n' + ('b' * 127) + ' b=2i\n' + 'q❤️p b=3i\n' + '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n') + '💩🦞 b=5i\n').encode("utf-8")) with self.assertRaisesRegex( qi.IngressError, "Too long"): @@ -1063,8 +1063,8 @@ def test_obj_string_table(self): '.': pd.Series(['x', 42], dtype='string'), 'z': [1, 2]}), table_name_col='.', at=qi.ServerTimestamp), - 'x z=1i\n' + - '42 z=2i\n') + b'x z=1i\n' + + b'42 z=2i\n') def _test_pyobjstr_numpy_symbol(self, dtype): df = pd.DataFrame({'a': pd.Series([ @@ -1080,14 +1080,14 @@ def _test_pyobjstr_numpy_symbol(self, dtype): buf = _dataframe(df, table_name='tbl1', symbols=True, at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,a=a\n' + + ('tbl1,a=a\n' + 'tbl1,a=q❤️p\n' + 'tbl1,a=' + ('❤️' * 1200) + '\n' + 'tbl1,a=Questo\\ è\\ un\\ qualcosa\n' + 'tbl1,a=щось\n' + 'tbl1,a=\n' + 'tbl1,a=嚜꓂\n' + - 'tbl1,a=💩🦞\n') + 'tbl1,a=💩🦞\n').encode("utf-8")) for null_obj in (None, float('nan'), pd.NA): self.assertEqual( @@ -1096,8 +1096,8 @@ def _test_pyobjstr_numpy_symbol(self, dtype): 'x': pd.Series(['a', null_obj], dtype=dtype), 'y': [1, 2]}), table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - 'tbl1,x=a y=1i\n' + - 'tbl1 y=2i\n') + b'tbl1,x=a y=1i\n' + + b'tbl1 y=2i\n') def test_obj_str_numpy_symbol(self): self._test_pyobjstr_numpy_symbol('object') @@ -1119,8 +1119,8 @@ def test_obj_string_numpy_symbol(self): 'x': pd.Series(['x', 42], dtype='string'), 'y': [1, 2]}), table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - 'tbl1,x=x y=1i\n' + - 'tbl1,x=42 y=2i\n') + b'tbl1,x=x y=1i\n' + + b'tbl1,x=42 y=2i\n') def test_str_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -1136,14 +1136,14 @@ def test_str_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a="a"\n' + + ('tbl1 a="a"\n' + 'tbl1 a="q❤️p"\n' + 'tbl1 a="' + ('❤️' * 1200) + '"\n' + 'tbl1 a="Questo è un qualcosa"\n' + 'tbl1 a="щось"\n' + 'tbl1 a=""\n' + 'tbl1 a="嚜꓂"\n' + - 'tbl1 a="💩🦞"\n') + 'tbl1 a="💩🦞"\n').encode("utf-8")) def test_str_arrow_table(self): df = pd.DataFrame({ @@ -1158,11 +1158,11 @@ def test_str_arrow_table(self): buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) self.assertEqual( buf, - 'a b=1i\n' + + ('a b=1i\n' + ('b' * 127) + ' b=2i\n' + 'q❤️p b=3i\n' + '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n') + '💩🦞 b=5i\n').encode("utf-8")) with self.assertRaisesRegex( qi.IngressError, "Too long"): @@ -1210,7 +1210,7 @@ def test_str_arrow_symbol(self): buf = _dataframe(df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,a=a b=1i\n' + + ('tbl1,a=a b=1i\n' + 'tbl1,a=q❤️p b=2i\n' + 'tbl1,a=' + ('❤️' * 1200) + ' b=3i\n' + 'tbl1,a=Questo\\ è\\ un\\ qualcosa b=4i\n' + @@ -1218,7 +1218,7 @@ def test_str_arrow_symbol(self): 'tbl1,a= b=6i\n' + 'tbl1 b=7i\n' + 'tbl1,a=嚜꓂ b=8i\n' + - 'tbl1,a=💩🦞 b=9i\n') + 'tbl1,a=💩🦞 b=9i\n').encode('utf-8')) def test_str_arrow_col(self): df = pd.DataFrame({ @@ -1237,7 +1237,7 @@ def test_str_arrow_col(self): buf = _dataframe(df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a="a",b=1i\n' + + ('tbl1 a="a",b=1i\n' + 'tbl1 a="q❤️p",b=2i\n' + 'tbl1 a="' + ('❤️' * 1200) + '",b=3i\n' + 'tbl1 a="Questo è un qualcosa",b=4i\n' + @@ -1245,7 +1245,7 @@ def test_str_arrow_col(self): 'tbl1 a="",b=6i\n' + 'tbl1 b=7i\n' + 'tbl1 a="嚜꓂",b=8i\n' + - 'tbl1 a="💩🦞",b=9i\n') + 'tbl1 a="💩🦞",b=9i\n').encode('utf-8')) def test_pyobj_int_col(self): int64_min = -2**63 @@ -1260,7 +1260,7 @@ def test_pyobj_int_col(self): int64_max], dtype='object'), 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), table_name='tbl1', at = qi.ServerTimestamp), - 'tbl1 a=1i,b=1i\n' + + ('tbl1 a=1i,b=1i\n' + 'tbl1 a=2i,b=2i\n' + 'tbl1 a=3i,b=3i\n' + 'tbl1 b=4i\n' + @@ -1269,7 +1269,7 @@ def test_pyobj_int_col(self): 'tbl1 a=7i,b=7i\n' + 'tbl1 a=0i,b=8i\n' + 'tbl1 a=' + str(int64_min) + 'i,b=9i\n' + - 'tbl1 a=' + str(int64_max) + 'i,b=10i\n') + 'tbl1 a=' + str(int64_max) + 'i,b=10i\n').encode('utf-8')) with self.assertRaisesRegex( qi.IngressError, "1 \\('STRING'\\): .*type int, got.*str\\."): @@ -1298,13 +1298,13 @@ def test_pyobj_float_col(self): dtype='object'), 'b': [1, 2, 3, 4, 5, 6, 7]}), table_name='tbl1', at = qi.ServerTimestamp), - 'tbl1 a=1.0,b=1i\n' + - 'tbl1 a=2.0,b=2i\n' + - 'tbl1 a=3.0,b=3i\n' + - 'tbl1 b=4i\n' + - 'tbl1 a=NaN,b=5i\n' + - 'tbl1 b=6i\n' + - 'tbl1 a=7.0,b=7i\n') + b'tbl1 a=1.0,b=1i\n' + + b'tbl1 a=2.0,b=2i\n' + + b'tbl1 a=3.0,b=3i\n' + + b'tbl1 b=4i\n' + + b'tbl1 a=NaN,b=5i\n' + + b'tbl1 b=6i\n' + + b'tbl1 a=7.0,b=7i\n') with self.assertRaisesRegex( qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."): @@ -1335,7 +1335,7 @@ def _test_cat_table(self, count): exp = ''.join( f'{s} b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1369,7 +1369,7 @@ def _test_cat_symbol(self, count): exp = ''.join( f'tbl1,a={s} b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1378,7 +1378,7 @@ def _test_cat_symbol(self, count): exp2 = exp.replace('tbl1,a=s2 b=2i\n', 'tbl1 b=2i\n') buf2 = _dataframe(df2, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2) + self.assertEqual(buf2, exp2.encode("utf-8")) def test_cat_i8_symbol(self): self._test_cat_symbol(30) @@ -1404,7 +1404,7 @@ def _test_cat_str(self, count): exp = ''.join( f'tbl1 a="{s}",b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1413,7 +1413,7 @@ def _test_cat_str(self, count): exp2 = exp.replace('tbl1 a="s2",b=2i\n', 'tbl1 b=2i\n') buf2 = _dataframe(df2, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2) + self.assertEqual(buf2, exp2.encode("utf-8")) def test_cat_i8_str(self): self._test_cat_str(30) @@ -1435,9 +1435,9 @@ def test_all_nulls_pyobj_col(self): buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 b=1i\n' + - 'tbl1 b=2i\n' + - 'tbl1 b=3i\n') + b'tbl1 b=1i\n' + + b'tbl1 b=2i\n' + + b'tbl1 b=3i\n') def test_strided_numpy_column(self): two_d = np.array([ @@ -1472,7 +1472,7 @@ def test_serializing_in_chunks(self): exp = ''.join( f'tbl1 a={i}i,b={i}i\n' for i in range(index * 10, (index + 1) * 10)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) def test_arrow_chunked_array(self): # We build a table with chunked arrow arrays as columns. @@ -1495,15 +1495,15 @@ def test_arrow_chunked_array(self): df = arr_tab.to_pandas() buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) exp = ( - 'tbl1 a=1i,b=10i\n' + - 'tbl1 a=2i,b=20i\n' + - 'tbl1 a=3i,b=30i\n' + - 'tbl1 a=4i,b=40i\n' + - 'tbl1 a=5i,b=50i\n' + - 'tbl1 a=6i,b=60i\n' + - 'tbl1 a=7i,b=70i\n' + - 'tbl1 a=8i,b=80i\n' + - 'tbl1 a=9i,b=90i\n') + b'tbl1 a=1i,b=10i\n' + + b'tbl1 a=2i,b=20i\n' + + b'tbl1 a=3i,b=30i\n' + + b'tbl1 a=4i,b=40i\n' + + b'tbl1 a=5i,b=50i\n' + + b'tbl1 a=6i,b=60i\n' + + b'tbl1 a=7i,b=70i\n' + + b'tbl1 a=8i,b=80i\n' + + b'tbl1 a=9i,b=90i\n') self.assertEqual(buf, exp) if not hasattr(pd, 'ArrowDtype'): @@ -1568,18 +1568,18 @@ def df_eq(exp_df, deser_df, exp_dtypes): df_eq(df, fp2fp_df, exp_dtypes) exp = ( - 'tbl1,s=a a=1i,b=10i,c=0.5\n' + - 'tbl1,s=b a=2i,b=20i,c=NaN\n' + - 'tbl1,s=a a=3i,b=30i,c=2.5\n' + - 'tbl1,s=c a=4i,c=3.5\n' + - 'tbl1,s=a a=5i,b=50i,c=NaN\n') + b'tbl1,s=a a=1i,b=10i,c=0.5\n' + + b'tbl1,s=b a=2i,b=20i,c=NaN\n' + + b'tbl1,s=a a=3i,b=30i,c=2.5\n' + + b'tbl1,s=c a=4i,c=3.5\n' + + b'tbl1,s=a a=5i,b=50i,c=NaN\n') fallback_exp = ( - 'tbl1 s="a",a=1i,b=10.0,c=0.5\n' + - 'tbl1 s="b",a=2i,b=20.0,c=NaN\n' + - 'tbl1 s="a",a=3i,b=30.0,c=2.5\n' + - 'tbl1 s="c",a=4i,b=NaN,c=3.5\n' + - 'tbl1 s="a",a=5i,b=50.0,c=NaN\n') + b'tbl1 s="a",a=1i,b=10.0,c=0.5\n' + + b'tbl1 s="b",a=2i,b=20.0,c=NaN\n' + + b'tbl1 s="a",a=3i,b=30.0,c=2.5\n' + + b'tbl1 s="c",a=4i,b=NaN,c=3.5\n' + + b'tbl1 s="a",a=5i,b=50.0,c=NaN\n') self.assertEqual(_dataframe(df, table_name='tbl1', at=qi.ServerTimestamp), exp) self.assertEqual(_dataframe(pa2pa_df, table_name='tbl1', at=qi.ServerTimestamp), exp)