diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 71c133f173f157..85bc2b827df8fb 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -40,3 +40,46 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) { return PyBytes_Join(sep, iterable); } + + +// --- PyBytesWriter API ----------------------------------------------------- + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( + Py_ssize_t size); +PyAPI_FUNC(void) PyBytesWriter_Discard( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithPointer( + PyBytesWriter *writer, + void *buf); + +PyAPI_FUNC(void*) PyBytesWriter_GetData( + PyBytesWriter *writer); +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize( + PyBytesWriter *writer); + +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + const void *bytes, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Format( + PyBytesWriter *writer, + const char *format, + ...); + +PyAPI_FUNC(int) PyBytesWriter_Resize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Grow( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(void*) PyBytesWriter_GrowAndUpdatePointer( + PyBytesWriter *writer, + Py_ssize_t size, + void *buf); diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..f4fe10eb5ad952 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -142,6 +142,10 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, const void *bytes, Py_ssize_t size); +// Export for '_testcapi' shared extension. +PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray( + Py_ssize_t size); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 4828dfd948f70a..8ae8115d973473 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -26,6 +26,7 @@ extern "C" { # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 # define Py_unicode_writers_MAXFREELIST 1 +# define Py_bytes_writers_MAXFREELIST 1 # define Py_pycfunctionobject_MAXFREELIST 16 # define Py_pycmethodobject_MAXFREELIST 16 # define Py_pymethodobjects_MAXFREELIST 20 @@ -59,6 +60,7 @@ struct _Py_freelists { struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; struct _Py_freelist unicode_writers; + struct _Py_freelist bytes_writers; struct _Py_freelist pycfunctionobject; struct _Py_freelist pycmethodobject; struct _Py_freelist pymethodobjects; diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ed6c435316708e..971d6031972b8a 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -135,7 +135,7 @@ extern int _PyLong_FormatWriter( int alternate); extern char* _PyLong_FormatBytesWriter( - _PyBytesWriter *writer, + PyBytesWriter *writer, char *str, PyObject *obj, int base, diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 5b61c73381542d..be82a2985eb4cd 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -291,5 +291,94 @@ def test_join(self): bytes_join(b'', NULL) +class BytesWriterTest(unittest.TestCase): + result_type = bytes + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string, 0) + + def test_create(self): + # Test PyBytesWriter_Create() + writer = self.create_writer() + self.assertEqual(writer.get_size(), 0) + self.assertEqual(writer.finish(), self.result_type(b'')) + + writer = self.create_writer(3, b'abc') + self.assertEqual(writer.get_size(), 3) + self.assertEqual(writer.finish(), self.result_type(b'abc')) + + writer = self.create_writer(10, b'abc') + self.assertEqual(writer.get_size(), 10) + self.assertEqual(writer.finish_with_size(3), self.result_type(b'abc')) + + def test_write_bytes(self): + # Test PyBytesWriter_WriteBytes() + writer = self.create_writer() + writer.write_bytes(b'Hello World!', -1) + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) + + writer = self.create_writer() + writer.write_bytes(b'Hello ', -1) + writer.write_bytes(b'World! ', 6) + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) + + def test_resize(self): + # Test PyBytesWriter_Resize() + writer = self.create_writer() + writer.resize(len(b'number=123456'), b'number=123456') + writer.resize(len(b'number=123456'), b'') + self.assertEqual(writer.get_size(), len(b'number=123456')) + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(0, b'') + writer.resize(len(b'number=123456'), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number='), b'') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + writer = self.create_writer() + writer.resize(len(b'number'), b'number') + writer.resize(len(b'number='), b'=') + writer.resize(len(b'number=123'), b'123') + writer.resize(len(b'number=123456'), b'456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) + + def test_format_i(self): + # Test PyBytesWriter_Format() + writer = self.create_writer() + writer.format_i(b'x=%i', 123456) + self.assertEqual(writer.finish(), self.result_type(b'x=123456')) + + writer = self.create_writer() + writer.format_i(b'x=%i, ', 123) + writer.format_i(b'y=%i', 456) + self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456')) + + def test_example_abc(self): + self.assertEqual(_testcapi.byteswriter_abc(), b'abc') + + def test_example_resize(self): + self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World') + + def test_example_highlevel(self): + self.assertEqual(_testcapi.byteswriter_highlevel(), b'Hello World!') + + +class ByteArrayWriterTest(BytesWriterTest): + result_type = bytearray + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string, 1) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 9e85e0de42cd8d..815cac652707b5 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -668,9 +668,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) self->bzs_avail_in_real = 0; self->input_buffer = NULL; self->input_buffer_size = 0; - self->unused_data = PyBytes_FromStringAndSize(NULL, 0); - if (self->unused_data == NULL) - goto error; + self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); if (catch_bz2_error(bzerror)) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 7cf3f152eeecc6..7478ae20e83703 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -201,52 +201,45 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data, const char *errors) /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/ { - Py_ssize_t size; - Py_ssize_t newsize; - PyObject *v; - - size = PyBytes_GET_SIZE(data); + Py_ssize_t size = PyBytes_GET_SIZE(data); if (size > PY_SSIZE_T_MAX / 4) { PyErr_SetString(PyExc_OverflowError, "string is too large to encode"); return NULL; } - newsize = 4*size; - v = PyBytes_FromStringAndSize(NULL, newsize); + Py_ssize_t alloc_size = 4*size; - if (v == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(alloc_size); + if (writer == NULL) { return NULL; } - else { - Py_ssize_t i; - char c; - char *p = PyBytes_AS_STRING(v); - - for (i = 0; i < size; i++) { - /* There's at least enough room for a hex escape */ - assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); - c = PyBytes_AS_STRING(data)[i]; - if (c == '\'' || c == '\\') - *p++ = '\\', *p++ = c; - else if (c == '\t') - *p++ = '\\', *p++ = 't'; - else if (c == '\n') - *p++ = '\\', *p++ = 'n'; - else if (c == '\r') - *p++ = '\\', *p++ = 'r'; - else if (c < ' ' || c >= 0x7f) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = Py_hexdigits[(c & 0xf0) >> 4]; - *p++ = Py_hexdigits[c & 0xf]; - } - else - *p++ = c; - } - *p = '\0'; - if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { - return NULL; + char *p = PyBytesWriter_GetData(writer); + + for (Py_ssize_t i = 0; i < size; i++) { + /* There's at least enough room for a hex escape */ + assert(alloc_size - (p - (char*)PyBytesWriter_GetData(writer)) >= 4); + char c = PyBytes_AS_STRING(data)[i]; + if (c == '\'' || c == '\\') + *p++ = '\\', *p++ = c; + else if (c == '\t') + *p++ = '\\', *p++ = 't'; + else if (c == '\n') + *p++ = '\\', *p++ = 'n'; + else if (c == '\r') + *p++ = '\\', *p++ = 'r'; + else if (c < ' ' || c >= 0x7f) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = Py_hexdigits[(c & 0xf0) >> 4]; + *p++ = Py_hexdigits[c & 0xf]; } + else + *p++ = c; + } + + PyObject *v = PyBytesWriter_FinishWithPointer(writer, p); + if (v == NULL) { + return NULL; } return codec_tuple(v, size); diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index cc65cbd98d71dc..99b321d46e2830 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -401,10 +401,7 @@ _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, return PyBytes_FromStringAndSize(val.dptr, val.dsize); } if (default_value == NULL) { - default_value = PyBytes_FromStringAndSize(NULL, 0); - if (default_value == NULL) { - return NULL; - } + default_value = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); val.dptr = NULL; val.dsize = 0; } diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 756a8b70931baa..e5c39b710c9477 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -806,15 +806,15 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) /*[clinic end generated code: output=ef9320c23280efad input=816a6537cea3d1db]*/ { EVP_MD_CTX *temp_ctx; - PyObject *retval = PyBytes_FromStringAndSize(NULL, length); - if (retval == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; } temp_ctx = EVP_MD_CTX_new(); if (temp_ctx == NULL) { - Py_DECREF(retval); + PyBytesWriter_Discard(writer); PyErr_NoMemory(); return NULL; } @@ -823,17 +823,17 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) goto error; } if (!EVP_DigestFinalXOF(temp_ctx, - (unsigned char*)PyBytes_AS_STRING(retval), + (unsigned char*)PyBytesWriter_GetData(writer), length)) { goto error; } EVP_MD_CTX_free(temp_ctx); - return retval; + return PyBytesWriter_Finish(writer); error: - Py_DECREF(retval); + PyBytesWriter_Discard(writer); EVP_MD_CTX_free(temp_ctx); notify_ssl_error_occurred(); return NULL; @@ -1414,8 +1414,6 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, long maxmem, long dklen) /*[clinic end generated code: output=14849e2aa2b7b46c input=48a7d63bf3f75c42]*/ { - PyObject *key_obj = NULL; - char *key; int retval; unsigned long n, r, p; @@ -1486,27 +1484,27 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, return NULL; } - key_obj = PyBytes_FromStringAndSize(NULL, dklen); - if (key_obj == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(dklen); + if (writer == NULL) { return NULL; } - key = PyBytes_AS_STRING(key_obj); + unsigned char *key = PyBytesWriter_GetData(writer); Py_BEGIN_ALLOW_THREADS retval = EVP_PBE_scrypt( (const char*)password->buf, (size_t)password->len, (const unsigned char *)salt->buf, (size_t)salt->len, n, r, p, maxmem, - (unsigned char *)key, (size_t)dklen + key, (size_t)dklen ); Py_END_ALLOW_THREADS if (!retval) { - Py_CLEAR(key_obj); + PyBytesWriter_Discard(writer); notify_ssl_error_occurred(); return NULL; } - return key_obj; + return PyBytesWriter_Finish(writer); } #endif /* PY_OPENSSL_HAS_SCRYPT */ diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index c05cc8a4e4cb49..a302dba84a785b 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -1259,10 +1259,7 @@ _lzma_LZMADecompressor_impl(PyTypeObject *type, int format, self->needs_input = 1; self->input_buffer = NULL; self->input_buffer_size = 0; - Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0)); - if (self->unused_data == NULL) { - goto error; - } + Py_XSETREF(self->unused_data, Py_GetConstant(Py_CONSTANT_EMPTY_BYTES)); switch (format) { case FORMAT_AUTO: @@ -1441,7 +1438,7 @@ _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter) { lzma_ret lzret; uint32_t encoded_size; - PyObject *result = NULL; + PyBytesWriter *writer = NULL; _lzma_state *state = get_lzma_state(module); assert(state != NULL); @@ -1449,20 +1446,20 @@ _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter) if (catch_lzma_error(state, lzret)) goto error; - result = PyBytes_FromStringAndSize(NULL, encoded_size); - if (result == NULL) + writer = PyBytesWriter_Create(encoded_size); + if (writer == NULL) { goto error; + } - lzret = lzma_properties_encode( - &filter, (uint8_t *)PyBytes_AS_STRING(result)); + lzret = lzma_properties_encode(&filter, PyBytesWriter_GetData(writer)); if (catch_lzma_error(state, lzret)) { goto error; } - return result; + return PyBytesWriter_Finish(writer); error: - Py_XDECREF(result); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..a1ca9875458b7a 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2615,31 +2615,26 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj) static PyObject * raw_unicode_escape(PyObject *obj) { - char *p; - Py_ssize_t i, size; - const void *data; - int kind; - _PyBytesWriter writer; + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + const void *data = PyUnicode_DATA(obj); + int kind = PyUnicode_KIND(obj); - _PyBytesWriter_Init(&writer); - - size = PyUnicode_GET_LENGTH(obj); - data = PyUnicode_DATA(obj); - kind = PyUnicode_KIND(obj); - - p = _PyBytesWriter_Alloc(&writer, size); - if (p == NULL) - goto error; - writer.overallocate = 1; + Py_ssize_t alloc = size; + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { + return NULL; + } + char *p = PyBytesWriter_GetData(writer); - for (i=0; i < size; i++) { + for (Py_ssize_t i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 10-1); - if (p == NULL) + p = PyBytesWriter_GrowAndUpdatePointer(writer, 10-1, p); + if (p == NULL) { goto error; + } *p++ = '\\'; *p++ = 'U'; @@ -2658,9 +2653,10 @@ raw_unicode_escape(PyObject *obj) ch == 0x1a) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 6-1); - if (p == NULL) + p = PyBytesWriter_GrowAndUpdatePointer(writer, 6-1, p); + if (p == NULL) { goto error; + } *p++ = '\\'; *p++ = 'u'; @@ -2674,10 +2670,10 @@ raw_unicode_escape(PyObject *obj) *p++ = (char) ch; } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_FinishWithPointer(writer, p); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/_struct.c b/Modules/_struct.c index f04805d9d6d1d7..8e84a8a949ad0a 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2272,7 +2272,6 @@ strings."); static PyObject * s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { - char *buf; PyStructObject *soself; _structmodulestate *state = get_struct_state_structinst(self); @@ -2288,21 +2287,19 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } /* Allocate a new string */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - buf = _PyBytesWriter_Alloc(&writer, soself->s_size); - if (buf == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter *writer = PyBytesWriter_Create(soself->s_size); + if (writer == NULL) { return NULL; } + char *buf = PyBytesWriter_GetData(writer); /* Call the guts */ if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } - return _PyBytesWriter_Finish(&writer, buf + soself->s_size); + return PyBytesWriter_FinishWithSize(writer, soself->s_size); } PyDoc_STRVAR(s_pack_into__doc__, diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 33903de14ba68d..388e65456c3a8b 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -1,6 +1,11 @@ +// Use pycore_bytes.h +#define PYTESTCAPI_NEED_INTERNAL_API + #include "parts.h" #include "util.h" +#include "pycore_bytesobject.h" // _PyBytesWriter_CreateByteArray() + /* Test _PyBytes_Resize() */ static PyObject * @@ -51,9 +56,308 @@ bytes_join(PyObject *Py_UNUSED(module), PyObject *args) } +// --- PyBytesWriter type --------------------------------------------------- + +typedef struct { + PyObject_HEAD + PyBytesWriter *writer; +} WriterObject; + + +static PyObject * +writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); + if (!self) { + return NULL; + } + self->writer = NULL; + return (PyObject*)self; +} + + +static int +writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)self_raw; + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + + if (kwargs && PyDict_GET_SIZE(kwargs)) { + PyErr_Format(PyExc_TypeError, + "PyBytesWriter() takes exactly no keyword arguments"); + return -1; + } + + Py_ssize_t alloc; + char *str; + Py_ssize_t str_size; + int use_bytearray; + if (!PyArg_ParseTuple(args, "ny#i", + &alloc, &str, &str_size, &use_bytearray)) { + return -1; + } + + if (use_bytearray) { + self->writer = _PyBytesWriter_CreateByteArray(alloc); + } + else { + self->writer = PyBytesWriter_Create(alloc); + } + if (self->writer == NULL) { + return -1; + } + + if (str_size) { + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf, str, str_size); + } + + return 0; +} + + +static void +writer_dealloc(PyObject *self_raw) +{ + WriterObject *self = (WriterObject *)self_raw; + PyTypeObject *tp = Py_TYPE(self); + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + tp->tp_free(self); + Py_DECREF(tp); +} + + +static inline int +writer_check(WriterObject *self) +{ + if (self->writer == NULL) { + PyErr_SetString(PyExc_ValueError, "operation on finished writer"); + return -1; + } + return 0; +} + + +static PyObject* +writer_write_bytes(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *bytes; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &bytes, &size)) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(self->writer, bytes, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_format_i(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *format; + int value; + if (!PyArg_ParseTuple(args, "yi", &format, &value)) { + return NULL; + } + + if (PyBytesWriter_Format(self->writer, format, value) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_resize(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, + "ny#", + &size, &str, &str_size)) { + return NULL; + } + assert(size >= str_size); + + Py_ssize_t pos = PyBytesWriter_GetSize(self->writer); + if (PyBytesWriter_Resize(self->writer, size) < 0) { + return NULL; + } + + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf + pos, str, str_size); + + Py_RETURN_NONE; +} + + +static PyObject* +writer_get_size(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t alloc = PyBytesWriter_GetSize(self->writer); + return PyLong_FromSsize_t(alloc); +} + + +static PyObject* +writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + PyObject *str = PyBytesWriter_Finish(self->writer); + self->writer = NULL; + return str; +} + + +static PyObject* +writer_finish_with_size(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "n", &size)) { + return NULL; + } + + PyObject *str = PyBytesWriter_FinishWithSize(self->writer, size); + self->writer = NULL; + return str; +} + + +static PyMethodDef writer_methods[] = { + {"write_bytes", _PyCFunction_CAST(writer_write_bytes), METH_VARARGS}, + {"format_i", _PyCFunction_CAST(writer_format_i), METH_VARARGS}, + {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, + {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, + {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, + {"finish_with_size", _PyCFunction_CAST(writer_finish_with_size), METH_VARARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_new, writer_new}, + {Py_tp_init, writer_init}, + {Py_tp_dealloc, writer_dealloc}, + {Py_tp_methods, writer_methods}, + {0, 0}, /* sentinel */ +}; + +static PyType_Spec Writer_spec = { + .name = "_testcapi.PyBytesWriter", + .basicsize = sizeof(WriterObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = Writer_Type_slots, +}; + + +static PyObject * +byteswriter_abc(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(3); + if (writer == NULL) { + return NULL; + } + + char *str = PyBytesWriter_GetData(writer); + memcpy(str, "abc", 3); + + return PyBytesWriter_Finish(writer); +} + + +static PyObject * +byteswriter_resize(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + // Allocate 10 bytes + PyBytesWriter *writer = PyBytesWriter_Create(10); + if (writer == NULL) { + return NULL; + } + char *buf = PyBytesWriter_GetData(writer); + + // Write some bytes + memcpy(buf, "Hello ", strlen("Hello ")); + buf += strlen("Hello "); + + // Allocate 10 more bytes + buf = PyBytesWriter_GrowAndUpdatePointer(writer, 10, buf); + if (buf == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + // Write more bytes + memcpy(buf, "World", strlen("World")); + buf += strlen("World"); + + // Truncate to the exact size and create a bytes object + return PyBytesWriter_FinishWithPointer(writer, buf); +} + + +static PyObject * +byteswriter_highlevel(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(0); + if (writer == NULL) { + goto error; + } + if (PyBytesWriter_WriteBytes(writer, "Hello", -1) < 0) { + goto error; + } + if (PyBytesWriter_Format(writer, " %s!", "World") < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, + {"byteswriter_abc", byteswriter_abc, METH_NOARGS}, + {"byteswriter_resize", byteswriter_resize, METH_NOARGS}, + {"byteswriter_highlevel", byteswriter_highlevel, METH_NOARGS}, {NULL}, }; @@ -64,5 +368,15 @@ _PyTestCapi_Init_Bytes(PyObject *m) return -1; } + PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); + if (writer_type == NULL) { + return -1; + } + if (PyModule_AddType(m, writer_type) < 0) { + Py_DECREF(writer_type); + return -1; + } + Py_DECREF(writer_type); + return 0; } diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..04dc37602a7572 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -205,11 +205,9 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/ { const unsigned char *ascii_data; - unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; Py_ssize_t ascii_len, bin_len; binascii_state *state; @@ -223,9 +221,11 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) ascii_len--; /* Allocate the buffer */ - if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { return NULL; - bin_data = (unsigned char *)PyBytes_AS_STRING(rv); + } + unsigned char *bin_data = PyBytesWriter_GetData(writer); for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) { /* XXX is it really best to add NULs if there's no more data */ @@ -248,8 +248,7 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) return NULL; } PyErr_SetString(state->Error, "Illegal char"); - Py_DECREF(rv); - return NULL; + goto error; } this_ch = (this_ch - ' ') & 077; } @@ -280,11 +279,14 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) return NULL; } PyErr_SetString(state->Error, "Trailing garbage"); - Py_DECREF(rv); - return NULL; + goto error; } } - return rv; + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; } /*[clinic input] @@ -302,16 +304,13 @@ static PyObject * binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; binascii_state *state; - Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; + Py_ssize_t bin_len; - _PyBytesWriter_Init(&writer); bin_data = data->buf; bin_len = data->len; if ( bin_len > 45 ) { @@ -325,10 +324,12 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } /* We're lazy and allocate to much (fixed up later) */ - out_len = 2 + (bin_len + 2) / 3 * 4; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + Py_ssize_t out_len = 2 + (bin_len + 2) / 3 * 4; + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_data = PyBytesWriter_GetData(writer); /* Store the length */ if (backtick && !bin_len) @@ -356,7 +357,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } /*[clinic input] @@ -387,12 +388,11 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len); - if (bin_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { return NULL; - unsigned char *bin_data_start = bin_data; + } + unsigned char *bin_data = PyBytesWriter_GetData(writer); if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { state = get_binascii_state(module); @@ -488,12 +488,14 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) state = get_binascii_state(module); if (state == NULL) { /* error already set, from get_binascii_state */ + assert(PyErr_Occurred()); } else if (quad_pos == 1) { /* ** There is exactly one extra valid, non-padding, base64 character. ** This is an invalid length, as there is no possible input that ** could encoded into such a base64 string. */ + unsigned char *bin_data_start = PyBytesWriter_GetData(writer); PyErr_Format(state->Error, "Invalid base64-encoded string: " "number of data characters (%zd) cannot be 1 more " @@ -502,13 +504,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) } else { PyErr_SetString(state->Error, "Incorrect padding"); } - error_end: - _PyBytesWriter_Dealloc(&writer); - return NULL; + goto error_end; } done: - return _PyBytesWriter_Finish(&writer, bin_data); + return PyBytesWriter_FinishWithPointer(writer, bin_data); + +error_end: + PyBytesWriter_Discard(writer); + return NULL; } @@ -527,18 +531,15 @@ static PyObject * binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; + Py_ssize_t bin_len; binascii_state *state; bin_data = data->buf; bin_len = data->len; - _PyBytesWriter_Init(&writer); assert(bin_len >= 0); @@ -554,12 +555,15 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /* We're lazy and allocate too much (fixed up later). "+2" leaves room for up to two pad characters. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ - out_len = bin_len*2 + 2; - if (newline) + Py_ssize_t out_len = bin_len*2 + 2; + if (newline) { out_len++; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + } + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_data = PyBytesWriter_GetData(writer); for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -584,7 +588,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } @@ -886,8 +890,6 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) { const char* argbuf; Py_ssize_t arglen; - PyObject *retval; - char* retbuf; Py_ssize_t i, j; binascii_state *state; @@ -909,10 +911,11 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) return NULL; } - retval = PyBytes_FromStringAndSize(NULL, (arglen/2)); - if (!retval) + PyBytesWriter *writer = PyBytesWriter_Create(arglen/2); + if (writer == NULL) { return NULL; - retbuf = PyBytes_AS_STRING(retval); + } + char *retbuf = PyBytesWriter_GetData(writer); for (i=j=0; i < arglen; i += 2) { unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; @@ -924,14 +927,14 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) } PyErr_SetString(state->Error, "Non-hexadecimal digit found"); - goto finally; + goto error; } retbuf[j++] = (top << 4) + bot; } - return retval; + return PyBytesWriter_Finish(writer); - finally: - Py_DECREF(retval); +error: + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 6a385562845849..8f5bec848a5394 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -450,14 +450,15 @@ _safe_PyBytes_FromStringAndSize(char *start, size_t num_bytes) { } } else { - PyObject *result = PyBytes_FromStringAndSize(NULL, num_bytes); - if (result == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(num_bytes); + if (writer == NULL) { return NULL; } - if (safe_memcpy(PyBytes_AS_STRING(result), start, num_bytes) < 0) { - Py_CLEAR(result); + if (safe_memcpy(PyBytesWriter_GetData(writer), start, num_bytes) < 0) { + PyBytesWriter_Discard(writer); + return NULL; } - return result; + return PyBytesWriter_Finish(writer); } } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index b7300def8dc75f..1cf6474ff25618 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -11436,9 +11436,6 @@ static PyObject * os_read_impl(PyObject *module, int fd, Py_ssize_t length) /*[clinic end generated code: output=dafbe9a5cddb987b input=1df2eaa27c0bf1d3]*/ { - Py_ssize_t n; - PyObject *buffer; - if (length < 0) { errno = EINVAL; return posix_error(); @@ -11446,20 +11443,18 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length) length = Py_MIN(length, _PY_READ_MAX); - buffer = PyBytes_FromStringAndSize((char *)NULL, length); - if (buffer == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } - n = _Py_read(fd, PyBytes_AS_STRING(buffer), length); + Py_ssize_t n = _Py_read(fd, PyBytesWriter_GetData(writer), length); if (n == -1) { - Py_DECREF(buffer); + PyBytesWriter_Discard(writer); return NULL; } - if (n != length) - _PyBytes_Resize(&buffer, n); - - return buffer; + return PyBytesWriter_FinishWithSize(writer, n); } /*[clinic input] @@ -11635,20 +11630,20 @@ os_pread_impl(PyObject *module, int fd, Py_ssize_t length, Py_off_t offset) { Py_ssize_t n; int async_err = 0; - PyObject *buffer; if (length < 0) { errno = EINVAL; return posix_error(); } - buffer = PyBytes_FromStringAndSize((char *)NULL, length); - if (buffer == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } do { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH - n = pread(fd, PyBytes_AS_STRING(buffer), length, offset); + n = pread(fd, PyBytesWriter_GetData(writer), length, offset); _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS } while (n < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); @@ -11657,12 +11652,10 @@ os_pread_impl(PyObject *module, int fd, Py_ssize_t length, Py_off_t offset) if (!async_err) { posix_error(); } - Py_DECREF(buffer); + PyBytesWriter_Discard(writer); return NULL; } - if (n != length) - _PyBytes_Resize(&buffer, n); - return buffer; + return PyBytesWriter_FinishWithSize(writer, n); } #endif /* HAVE_PREAD */ @@ -14864,9 +14857,6 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, int follow_symlinks) /*[clinic end generated code: output=5f2f44200a43cff2 input=025789491708f7eb]*/ { - Py_ssize_t i; - PyObject *buffer = NULL; - if (fd_and_follow_symlinks_invalid("getxattr", path->fd, follow_symlinks)) return NULL; @@ -14874,8 +14864,7 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, return NULL; } - for (i = 0; ; i++) { - void *ptr; + for (Py_ssize_t i = 0; ; i++) { ssize_t result; static const Py_ssize_t buffer_sizes[] = {128, XATTR_SIZE_MAX, 0}; Py_ssize_t buffer_size = buffer_sizes[i]; @@ -14883,10 +14872,11 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, path_error(path); return NULL; } - buffer = PyBytes_FromStringAndSize(NULL, buffer_size); - if (!buffer) + PyBytesWriter *writer = PyBytesWriter_Create(buffer_size); + if (writer == NULL) { return NULL; - ptr = PyBytes_AS_STRING(buffer); + } + void *ptr = PyBytesWriter_GetData(writer); Py_BEGIN_ALLOW_THREADS; if (path->fd >= 0) @@ -14898,23 +14888,16 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, Py_END_ALLOW_THREADS; if (result < 0) { + PyBytesWriter_Discard(writer); if (errno == ERANGE) { - Py_DECREF(buffer); continue; } path_error(path); - Py_DECREF(buffer); return NULL; } - if (result != buffer_size) { - /* Can only shrink. */ - _PyBytes_Resize(&buffer, result); - } - break; + return PyBytesWriter_FinishWithSize(writer, result); } - - return buffer; } @@ -15138,22 +15121,22 @@ static PyObject * os_urandom_impl(PyObject *module, Py_ssize_t size) /*[clinic end generated code: output=42c5cca9d18068e9 input=4067cdb1b6776c29]*/ { - PyObject *bytes; - int result; - - if (size < 0) + if (size < 0) { return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + } + + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; + } - result = _PyOS_URandom(PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes)); + int result = _PyOS_URandom(PyBytesWriter_GetData(writer), size); if (result == -1) { - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } #ifdef HAVE_MEMFD_CREATE @@ -16616,25 +16599,20 @@ static PyObject * os_getrandom_impl(PyObject *module, Py_ssize_t size, int flags) /*[clinic end generated code: output=b3a618196a61409c input=59bafac39c594947]*/ { - PyObject *bytes; - Py_ssize_t n; - if (size < 0) { errno = EINVAL; return posix_error(); } - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) { - PyErr_NoMemory(); + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; } + void *data = PyBytesWriter_GetData(writer); + Py_ssize_t n; while (1) { - n = syscall(SYS_getrandom, - PyBytes_AS_STRING(bytes), - PyBytes_GET_SIZE(bytes), - flags); + n = syscall(SYS_getrandom, data, size, flags); if (n < 0 && errno == EINTR) { if (PyErr_CheckSignals() < 0) { goto error; @@ -16651,14 +16629,10 @@ os_getrandom_impl(PyObject *module, Py_ssize_t size, int flags) goto error; } - if (n != size) { - _PyBytes_Resize(&bytes, n); - } - - return bytes; + return PyBytesWriter_FinishWithSize(writer, n); error: - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } #endif /* HAVE_GETRANDOM_SYSCALL */ diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 22b94e4b03a27c..416f127a2e4f90 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -3414,7 +3414,6 @@ sock_getsockopt(PyObject *self, PyObject *args) int level; int optname; int res; - PyObject *buf; socklen_t buflen = 0; int flag = 0; socklen_t flagsize; @@ -3459,17 +3458,17 @@ sock_getsockopt(PyObject *self, PyObject *args) "getsockopt buflen out of range"); return NULL; } - buf = PyBytes_FromStringAndSize((char *)NULL, buflen); - if (buf == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(buflen); + if (writer == NULL) { return NULL; + } res = getsockopt(get_sock_fd(s), level, optname, - (void *)PyBytes_AS_STRING(buf), &buflen); + PyBytesWriter_GetData(writer), &buflen); if (res < 0) { - Py_DECREF(buf); + PyBytesWriter_Discard(writer); return s->errorhandler(); } - _PyBytes_Resize(&buf, buflen); - return buf; + return PyBytesWriter_FinishWithSize(writer, buflen); } PyDoc_STRVAR(getsockopt_doc, diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index d4b4b91697c08e..1a8acfada325d4 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -816,22 +816,24 @@ save_unconsumed_input(compobject *self, Py_buffer *data, int err) input data in self->unused_data. */ if (self->zst.avail_in > 0) { Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); - Py_ssize_t new_size, left_size; - PyObject *new_data; + Py_ssize_t left_size; left_size = (Byte *)data->buf + data->len - self->zst.next_in; if (left_size > (PY_SSIZE_T_MAX - old_size)) { PyErr_NoMemory(); return -1; } - new_size = old_size + left_size; - new_data = PyBytes_FromStringAndSize(NULL, new_size); - if (new_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(old_size + left_size); + if (writer == NULL) { return -1; - memcpy(PyBytes_AS_STRING(new_data), - PyBytes_AS_STRING(self->unused_data), old_size); - memcpy(PyBytes_AS_STRING(new_data) + old_size, - self->zst.next_in, left_size); - Py_SETREF(self->unused_data, new_data); + } + char *new_data = PyBytesWriter_GetData(writer); + memcpy(new_data, PyBytes_AS_STRING(self->unused_data), old_size); + memcpy(new_data + old_size, self->zst.next_in, left_size); + PyObject *new_unused_data = PyBytesWriter_Finish(writer); + if (new_unused_data == NULL) { + return -1; + } + Py_SETREF(self->unused_data, new_unused_data); self->zst.avail_in = 0; } } @@ -994,7 +996,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in doing any work at all; just return an empty string. */ if (mode == Z_NO_FLUSH) { - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } ENTER_ZLIB(self); @@ -1744,11 +1746,7 @@ ZlibDecompressor__new__(PyTypeObject *cls, self->zst.zfree = PyZlib_Free; self->zst.next_in = NULL; self->zst.avail_in = 0; - self->unused_data = PyBytes_FromStringAndSize(NULL, 0); - if (self->unused_data == NULL) { - Py_CLEAR(self); - return NULL; - } + self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { Py_DECREF(self); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index fc407ec6bf99d6..c347dbd8f25fd8 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE() #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue @@ -35,6 +36,9 @@ class bytes "PyBytesObject *" "&PyBytes_Type" /* Forward declaration */ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); +static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, + Py_ssize_t size, void *data); +static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer); #define CHARACTERS _Py_SINGLETON(bytes_characters) @@ -195,10 +199,11 @@ PyBytes_FromString(const char *str) return (PyObject *) op; } -PyObject * -PyBytes_FromFormatV(const char *format, va_list vargs) + +static char* +bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, + const char *format, va_list vargs) { - char *s; const char *f; const char *p; Py_ssize_t prec; @@ -212,21 +217,20 @@ PyBytes_FromFormatV(const char *format, va_list vargs) Longest 64-bit pointer representation: "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); + char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos; - s = _PyBytesWriter_Alloc(&writer, strlen(format)); - if (s == NULL) - return NULL; - writer.overallocate = 1; - -#define WRITE_BYTES(str) \ +#define WRITE_BYTES_LEN(str, len_expr) \ do { \ - s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ - if (s == NULL) \ + size_t len = (len_expr); \ + s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \ + if (s == NULL) { \ goto error; \ + } \ + memcpy(s, (str), len); \ + s += len; \ } while (0) +#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str)) for (f = format; *f; f++) { if (*f != '%') { @@ -267,10 +271,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) ++f; } - /* subtract bytes preallocated for the format string - (ex: 2 for "%s") */ - writer.min_size -= (f - p + 1); - switch (*f) { case 'c': { @@ -281,7 +281,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "expects an integer in range [0; 255]"); goto error; } - writer.min_size++; *s++ = (unsigned char)c; break; } @@ -340,9 +339,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) i++; } } - s = _PyBytesWriter_WriteBytes(&writer, s, p, i); - if (s == NULL) - goto error; + WRITE_BYTES_LEN(p, i); break; } @@ -361,31 +358,45 @@ PyBytes_FromFormatV(const char *format, va_list vargs) break; case '%': - writer.min_size++; *s++ = '%'; break; default: - if (*f == 0) { - /* fix min_size if we reached the end of the format string */ - writer.min_size++; - } - /* invalid format string: copy unformatted string and exit */ WRITE_BYTES(p); - return _PyBytesWriter_Finish(&writer, s); + return s; } } #undef WRITE_BYTES +#undef WRITE_BYTES_LEN - return _PyBytesWriter_Finish(&writer, s); + return s; error: - _PyBytesWriter_Dealloc(&writer); return NULL; } + +PyObject * +PyBytes_FromFormatV(const char *format, va_list vargs) +{ + Py_ssize_t alloc = strlen(format); + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { + return NULL; + } + + char *s = bytes_fromformat(writer, 0, format, vargs); + if (s == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + return PyBytesWriter_FinishWithPointer(writer, s); +} + + PyObject * PyBytes_FromFormat(const char *format, ...) { @@ -398,6 +409,7 @@ PyBytes_FromFormat(const char *format, ...) return ret; } + /* Helpers for formatstring */ Py_LOCAL_INLINE(PyObject *) @@ -420,7 +432,7 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) static char* formatfloat(PyObject *v, int flags, int prec, int type, - PyObject **p_result, _PyBytesWriter *writer, char *str) + PyObject **p_result, PyBytesWriter *writer, char *str) { char *p; PyObject *result; @@ -448,7 +460,7 @@ formatfloat(PyObject *v, int flags, int prec, int type, len = strlen(p); if (writer != NULL) { - str = _PyBytesWriter_Prepare(writer, str, len); + str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str); if (str == NULL) { PyMem_Free(p); return NULL; @@ -599,12 +611,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, PyObject *args, int use_bytearray) { const char *fmt; - char *res; Py_ssize_t arglen, argidx; Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; - _PyBytesWriter writer; if (args == NULL) { PyErr_BadInternalCall(); @@ -613,14 +623,17 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, fmt = format; fmtcnt = format_len; - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; - - res = _PyBytesWriter_Alloc(&writer, fmtcnt); - if (res == NULL) + PyBytesWriter *writer; + if (use_bytearray) { + writer = _PyBytesWriter_CreateByteArray(fmtcnt); + } + else { + writer = PyBytesWriter_Create(fmtcnt); + } + if (writer == NULL) { return NULL; - if (!use_bytearray) - writer.overallocate = 1; + } + char *res = PyBytesWriter_GetData(writer); if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); @@ -823,11 +836,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (v == NULL) goto error; - if (fmtcnt == 0) { - /* last write: disable writer overallocation */ - writer.overallocate = 0; - } - sign = 0; fill = ' '; switch (c) { @@ -888,8 +896,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%d" */ - res = _PyLong_FormatBytesWriter(&writer, res, + res = _PyLong_FormatBytesWriter(writer, res, v, base, alternate); if (res == NULL) goto error; @@ -917,8 +924,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%f" */ - res = formatfloat(v, flags, prec, c, NULL, &writer, res); + res = formatfloat(v, flags, prec, c, NULL, writer, res); if (res == NULL) goto error; continue; @@ -974,9 +980,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, alloc++; /* 2: size preallocated for %s */ if (alloc > 2) { - res = _PyBytesWriter_Prepare(&writer, res, alloc - 2); - if (res == NULL) + res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res); + if (res == NULL) { goto error; + } } #ifndef NDEBUG char *before = res; @@ -1049,10 +1056,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, assert((res - before) == alloc); #endif } /* '%' */ - - /* If overallocation was disabled, ensure that it was the last - write. Otherwise, we missed an optimization */ - assert(writer.overallocate || fmtcnt == 0 || use_bytearray); } /* until end */ if (argidx < arglen && !dict) { @@ -1064,10 +1067,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (args_owned) { Py_DECREF(args); } - return _PyBytesWriter_Finish(&writer, res); + return PyBytesWriter_FinishWithPointer(writer, res); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); if (args_owned) { Py_DECREF(args); } @@ -1080,21 +1083,15 @@ PyObject *_PyBytes_DecodeEscape(const char *s, const char *errors, const char **first_invalid_escape) { - int c; - char *p; - const char *end; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - - p = _PyBytesWriter_Alloc(&writer, len); - if (p == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(len); + if (writer == NULL) { return NULL; - writer.overallocate = 1; + } + char *p = PyBytesWriter_GetData(writer); *first_invalid_escape = NULL; - end = s + len; + const char *end = s + len; while (s < end) { if (*s != '\\') { *p++ = *s++; @@ -1123,7 +1120,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, case 'a': *p++ = '\007'; break; /* BEL, not classic C */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - c = s[-1] - '0'; + { + int c = s[-1] - '0'; if (s < end && '0' <= *s && *s <= '7') { c = (c<<3) + *s++ - '0'; if (s < end && '0' <= *s && *s <= '7') @@ -1137,6 +1135,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } *p++ = c; break; + } case 'x': if (s+1 < end) { int digit1, digit2; @@ -1182,10 +1181,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_FinishWithPointer(writer, p); failed: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -1463,12 +1462,16 @@ bytes_concat(PyObject *a, PyObject *b) goto done; } - result = PyBytes_FromStringAndSize(NULL, va.len + vb.len); - if (result != NULL) { - memcpy(PyBytes_AS_STRING(result), va.buf, va.len); - memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len); + PyBytesWriter *writer = PyBytesWriter_Create(va.len + vb.len); + if (writer == NULL) { + goto done; } + char *data = PyBytesWriter_GetData(writer); + memcpy(data, va.buf, va.len); + memcpy(data + va.len, vb.buf, vb.len); + result = PyBytesWriter_Finish(writer); + done: if (va.len != -1) PyBuffer_Release(&va); @@ -1645,8 +1648,6 @@ bytes_subscript(PyObject *op, PyObject* item) Py_ssize_t start, stop, step, slicelength, i; size_t cur; const char* source_buf; - char* result_buf; - PyObject* result; if (PySlice_Unpack(item, &start, &stop, &step) < 0) { return NULL; @@ -1669,17 +1670,18 @@ bytes_subscript(PyObject *op, PyObject* item) } else { source_buf = PyBytes_AS_STRING(self); - result = PyBytes_FromStringAndSize(NULL, slicelength); - if (result == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(slicelength); + if (writer == NULL) { return NULL; + } + char *buf = PyBytesWriter_GetData(writer); - result_buf = PyBytes_AS_STRING(result); for (cur = start, i = 0; i < slicelength; cur += step, i++) { - result_buf[i] = source_buf[cur]; + buf[i] = source_buf[cur]; } - return result; + return PyBytesWriter_Finish(writer); } } else { @@ -2506,17 +2508,13 @@ bytes_fromhex_impl(PyTypeObject *type, PyObject *string) PyObject* _PyBytes_FromHex(PyObject *string, int use_bytearray) { - char *buf; Py_ssize_t hexlen, invalid_char; unsigned int top, bot; const Py_UCS1 *str, *start, *end; - _PyBytesWriter writer; + PyBytesWriter *writer = NULL; Py_buffer view; view.obj = NULL; - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; - if (PyUnicode_Check(string)) { hexlen = PyUnicode_GET_LENGTH(string); @@ -2552,10 +2550,16 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) } /* This overestimates if there are spaces */ - buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); - if (buf == NULL) { + if (use_bytearray) { + writer = _PyBytesWriter_CreateByteArray(hexlen / 2); + } + else { + writer = PyBytesWriter_Create(hexlen / 2); + } + if (writer == NULL) { goto release_buffer; } + char *buf = PyBytesWriter_GetData(writer); start = str; end = str + hexlen; @@ -2594,7 +2598,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) if (view.obj != NULL) { PyBuffer_Release(&view); } - return _PyBytesWriter_Finish(&writer, buf); + return PyBytesWriter_FinishWithPointer(writer, buf); error: if (invalid_char == -1) { @@ -2605,7 +2609,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) "non-hexadecimal number found in " "fromhex() arg at position %zd", invalid_char); } - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); release_buffer: if (view.obj != NULL) { @@ -2760,7 +2764,7 @@ bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, "errors without a string argument"); return NULL; } - bytes = PyBytes_FromStringAndSize(NULL, 0); + bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else if (encoding != NULL) { /* Encode via the codec registry */ @@ -2832,23 +2836,25 @@ bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, static PyObject* _PyBytes_FromBuffer(PyObject *x) { - PyObject *new; Py_buffer view; - if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0) return NULL; - new = PyBytes_FromStringAndSize(NULL, view.len); - if (!new) + PyBytesWriter *writer = PyBytesWriter_Create(view.len); + if (writer == NULL) { goto fail; - if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval, - &view, view.len, 'C') < 0) + } + + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + &view, view.len, 'C') < 0) { goto fail; + } + PyBuffer_Release(&view); - return new; + return PyBytesWriter_Finish(writer); fail: - Py_XDECREF(new); + PyBytesWriter_Discard(writer); PyBuffer_Release(&view); return NULL; } @@ -2856,23 +2862,18 @@ _PyBytes_FromBuffer(PyObject *x) static PyObject* _PyBytes_FromList(PyObject *x) { - Py_ssize_t i, size = PyList_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + Py_ssize_t size = PyList_GET_SIZE(x); + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } + char *str = PyBytesWriter_GetData(writer); + size = _PyBytesWriter_GetAllocated(writer); - for (i = 0; i < PyList_GET_SIZE(x); i++) { - item = PyList_GET_ITEM(x, i); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) { + PyObject *item = PyList_GET_ITEM(x, i); Py_INCREF(item); - value = PyNumber_AsSsize_t(item, NULL); + Py_ssize_t value = PyNumber_AsSsize_t(item, NULL); Py_DECREF(item); if (value == -1 && PyErr_Occurred()) goto error; @@ -2884,33 +2885,33 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + if (str == NULL) { + goto error; + } + size = _PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_FinishWithPointer(writer, str); - error: - _PyBytesWriter_Dealloc(&writer); +error: + PyBytesWriter_Discard(writer); return NULL; } static PyObject* _PyBytes_FromTuple(PyObject *x) { - PyObject *bytes; Py_ssize_t i, size = PyTuple_GET_SIZE(x); Py_ssize_t value; - char *str; PyObject *item; - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - str = ((PyBytesObject *)bytes)->ob_sval; + } + char *str = PyBytesWriter_GetData(writer); for (i = 0; i < size; i++) { item = PyTuple_GET_ITEM(x, i); @@ -2925,31 +2926,29 @@ _PyBytes_FromTuple(PyObject *x) } *str++ = (char) value; } - return bytes; + return PyBytesWriter_Finish(writer); error: - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } static PyObject * _PyBytes_FromIterator(PyObject *it, PyObject *x) { - char *str; Py_ssize_t i, size; - _PyBytesWriter writer; /* For iterator version, create a bytes object and resize as needed */ size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } + char *str = PyBytesWriter_GetData(writer); + size = _PyBytesWriter_GetAllocated(writer); /* Run the iterator to exhaustion */ for (i = 0; ; i++) { @@ -2979,18 +2978,18 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) /* Append the byte */ if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + if (str == NULL) { + goto error; + } + size = _PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } - - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_FinishWithPointer(writer, str); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -3655,7 +3654,7 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) if (size == 0 && !writer->use_bytearray) { Py_CLEAR(writer->buffer); /* Get the empty byte string singleton */ - result = PyBytes_FromStringAndSize(NULL, 0); + result = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else if (writer->use_small_buffer) { if (writer->use_bytearray) { @@ -3727,3 +3726,340 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, } } + +// --- PyBytesWriter API ----------------------------------------------------- + +struct PyBytesWriter { + char small_buffer[256]; + PyObject *obj; + Py_ssize_t size; + int use_bytearray; +}; + + +static inline char* +byteswriter_data(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return writer->small_buffer; + } + else if (writer->use_bytearray) { + return PyByteArray_AS_STRING(writer->obj); + } + else { + return PyBytes_AS_STRING(writer->obj); + } +} + + +static inline Py_ssize_t +byteswriter_allocated(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return sizeof(writer->small_buffer); + } + else if (writer->use_bytearray) { + return PyByteArray_GET_SIZE(writer->obj); + } + else { + return PyBytes_GET_SIZE(writer->obj); + } +} + + +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + + +static inline int +byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) +{ + assert(size >= 0); + + if (size <= byteswriter_allocated(writer)) { + return 0; + } + + if (overallocate && !writer->use_bytearray) { + if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { + size += size / OVERALLOCATE_FACTOR; + } + } + + if (writer->obj != NULL) { + if (writer->use_bytearray) { + if (PyByteArray_Resize(writer->obj, size)) { + return -1; + } + } + else { + if (_PyBytes_Resize(&writer->obj, size)) { + return -1; + } + } + assert(writer->obj != NULL); + } + else if (writer->use_bytearray) { + writer->obj = PyByteArray_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyByteArray_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); + } + else { + writer->obj = PyBytes_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); + } + return 0; +} + + +static PyBytesWriter* +byteswriter_create(Py_ssize_t size, int use_bytearray) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + return NULL; + } + + PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers); + if (writer == NULL) { + writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + } + writer->obj = NULL; + writer->size = 0; + writer->use_bytearray = use_bytearray; + + if (size >= 1) { + if (byteswriter_resize(writer, size, 0) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + writer->size = size; + } + return writer; +} + +PyBytesWriter* +PyBytesWriter_Create(Py_ssize_t size) +{ + return byteswriter_create(size, 0); +} + +PyBytesWriter* +_PyBytesWriter_CreateByteArray(Py_ssize_t size) +{ + return byteswriter_create(size, 1); +} + + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + if (writer == NULL) { + return; + } + + Py_XDECREF(writer->obj); + _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free); +} + + +PyObject* +PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size) +{ + PyObject *result; + if (size == 0) { + result = bytes_get_empty(); + } + else if (writer->obj != NULL) { + if (writer->use_bytearray) { + if (size != PyByteArray_GET_SIZE(writer->obj)) { + if (PyByteArray_Resize(writer->obj, size)) { + goto error; + } + } + } + else { + if (size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, size)) { + goto error; + } + } + } + result = writer->obj; + writer->obj = NULL; + } + else if (writer->use_bytearray) { + result = PyByteArray_FromStringAndSize(writer->small_buffer, size); + } + else { + result = PyBytes_FromStringAndSize(writer->small_buffer, size); + } + PyBytesWriter_Discard(writer); + return result; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + +PyObject* +PyBytesWriter_Finish(PyBytesWriter *writer) +{ + return PyBytesWriter_FinishWithSize(writer, writer->size); +} + + +PyObject* +PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf) +{ + Py_ssize_t size = (char*)buf - byteswriter_data(writer); + if (size < 0 || size > byteswriter_allocated(writer)) { + PyBytesWriter_Discard(writer); + PyErr_SetString(PyExc_ValueError, "invalid end pointer"); + return NULL; + } + + return PyBytesWriter_FinishWithSize(writer, size); +} + + +void* +PyBytesWriter_GetData(PyBytesWriter *writer) +{ + return byteswriter_data(writer); +} + + +Py_ssize_t +PyBytesWriter_GetSize(PyBytesWriter *writer) +{ + return writer->size; +} + + +static Py_ssize_t +_PyBytesWriter_GetAllocated(PyBytesWriter *writer) +{ + return byteswriter_allocated(writer); +} + + +int +PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + return -1; + } + if (byteswriter_resize(writer, size, 1) < 0) { + return -1; + } + writer->size = size; + return 0; +} + + +static void* +_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) +{ + Py_ssize_t pos = (char*)data - byteswriter_data(writer); + if (PyBytesWriter_Resize(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} + + +int +PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) +{ + if (size < 0 && writer->size + size < 0) { + PyErr_SetString(PyExc_ValueError, "invalid size"); + return -1; + } + if (size > PY_SSIZE_T_MAX - writer->size) { + PyErr_NoMemory(); + return -1; + } + size = writer->size + size; + + if (byteswriter_resize(writer, size, 1) < 0) { + return -1; + } + writer->size = size; + return 0; +} + + +void* +PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *buf) +{ + Py_ssize_t pos = (char*)buf - byteswriter_data(writer); + if (PyBytesWriter_Grow(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} + + +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, + const void *bytes, Py_ssize_t size) +{ + if (size < 0) { + size_t len = strlen(bytes); + if (len > (size_t)PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + return -1; + } + size = (Py_ssize_t)len; + } + + Py_ssize_t pos = writer->size; + if (PyBytesWriter_Grow(writer, size) < 0) { + return -1; + } + char *buf = byteswriter_data(writer); + memcpy(buf + pos, bytes, size); + return 0; +} + + +int +PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) +{ + Py_ssize_t pos = writer->size; + if (PyBytesWriter_Grow(writer, strlen(format)) < 0) { + return -1; + } + + va_list vargs; + va_start(vargs, format); + char *buf = bytes_fromformat(writer, pos, format, vargs); + va_end(vargs); + + Py_ssize_t size = buf - byteswriter_data(writer); + return PyBytesWriter_Resize(writer, size); +} diff --git a/Objects/longobject.c b/Objects/longobject.c index 2dfd82bab1a834..a3bb4a1148d4c3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2018,7 +2018,7 @@ static int pylong_int_to_decimal_string(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyObject *s = NULL; @@ -2049,7 +2049,8 @@ pylong_int_to_decimal_string(PyObject *aa, Py_ssize_t size = PyUnicode_GET_LENGTH(s); const void *data = PyUnicode_DATA(s); int kind = PyUnicode_KIND(s); - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, size); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, size, + *bytes_str); if (*bytes_str == NULL) { goto error; } @@ -2086,7 +2087,7 @@ static int long_to_decimal_string_internal(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *scratch, *a; @@ -2212,7 +2213,8 @@ long_to_decimal_string_internal(PyObject *aa, } } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, strlen, + *bytes_str); if (*bytes_str == NULL) { Py_DECREF(scratch); return -1; @@ -2322,7 +2324,7 @@ long_to_decimal_string(PyObject *aa) static int long_format_binary(PyObject *aa, int base, int alternate, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, char **bytes_str) + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *a = (PyLongObject *)aa; PyObject *v = NULL; @@ -2383,7 +2385,8 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, sz, + *bytes_str); if (*bytes_str == NULL) return -1; } @@ -2512,7 +2515,7 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer, } char* -_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str, +_PyLong_FormatBytesWriter(PyBytesWriter *writer, char *str, PyObject *obj, int base, int alternate) { @@ -6376,8 +6379,6 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, /*[clinic end generated code: output=89c801df114050a3 input=a0103d0e9ad85c2b]*/ { int little_endian; - PyObject *bytes; - if (byteorder == NULL) little_endian = 0; else if (_PyUnicode_Equal(byteorder, &_Py_ID(little))) @@ -6396,18 +6397,19 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, return NULL; } - bytes = PyBytes_FromStringAndSize(NULL, length); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } if (_PyLong_AsByteArray((PyLongObject *)self, - (unsigned char *)PyBytes_AS_STRING(bytes), + PyBytesWriter_GetData(writer), length, little_endian, is_signed, 1) < 0) { - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } /*[clinic input] diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index cf673fb379edcd..1e66ae062ddc58 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2284,7 +2284,6 @@ memoryview_tobytes_impl(PyMemoryViewObject *self, const char *order) { Py_buffer *src = VIEW_ADDR(self); char ord = 'C'; - PyObject *bytes; CHECK_RELEASED(self); @@ -2302,16 +2301,18 @@ memoryview_tobytes_impl(PyMemoryViewObject *self, const char *order) } } - bytes = PyBytes_FromStringAndSize(NULL, src->len); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(src->len); + if (writer == NULL) { return NULL; + } - if (PyBuffer_ToContiguous(PyBytes_AS_STRING(bytes), src, src->len, ord) < 0) { - Py_DECREF(bytes); + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + src, src->len, ord) < 0) { + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } /*[clinic input] @@ -2343,8 +2344,6 @@ memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, /*[clinic end generated code: output=430ca760f94f3ca7 input=539f6a3a5fb56946]*/ { Py_buffer *src = VIEW_ADDR(self); - PyObject *bytes; - PyObject *ret; CHECK_RELEASED(self); @@ -2352,19 +2351,22 @@ memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, return _Py_strhex_with_sep(src->buf, src->len, sep, bytes_per_sep); } - bytes = PyBytes_FromStringAndSize(NULL, src->len); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(src->len); + if (writer == NULL) { return NULL; + } - if (PyBuffer_ToContiguous(PyBytes_AS_STRING(bytes), src, src->len, 'C') < 0) { - Py_DECREF(bytes); + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + src, src->len, 'C') < 0) { + PyBytesWriter_Discard(writer); return NULL; } - ret = _Py_strhex_with_sep( - PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes), - sep, bytes_per_sep); - Py_DECREF(bytes); + PyObject *ret = _Py_strhex_with_sep( + PyBytesWriter_GetData(writer), + PyBytesWriter_GetSize(writer), + sep, bytes_per_sep); + PyBytesWriter_Discard(writer); return ret; } diff --git a/Objects/object.c b/Objects/object.c index 99bb1d9c0bfad5..8d3a57a5ae72e1 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -943,6 +943,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->bytes_writers, is_finalization, PyMem_Free); clear_freelist(&freelists->ints, is_finalization, free_object); clear_freelist(&freelists->pycfunctionobject, is_finalization, PyObject_GC_Del); clear_freelist(&freelists->pycmethodobject, is_finalization, PyObject_GC_Del); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e01a10fc19e904..6af4156f4a56c7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4931,7 +4931,7 @@ _PyUnicode_EncodeUTF7(PyObject *str, len = PyUnicode_GET_LENGTH(str); if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); /* It might be possible to tighten this worst case */ if (len > PY_SSIZE_T_MAX / 8) @@ -6922,7 +6922,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) len = PyUnicode_GET_LENGTH(unicode); if (len == 0) { - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } kind = PyUnicode_KIND(unicode); @@ -7372,7 +7372,7 @@ unicode_encode_ucs1(PyObject *unicode, /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); _PyBytesWriter_Init(&writer); str = _PyBytesWriter_Alloc(&writer, size); @@ -8317,7 +8317,7 @@ encode_code_page(int code_page, } if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); offset = 0; do