Skip to content

[WIP] gh-129813, PEP 782: Add PyBytesWriter C API #131681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 28 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e24d40e
gh-129813: Add PyBytesWriter C API (flavor with size)
vstinner Mar 24, 2025
8761a9b
Remove PyBytesWriter_SetSize()
vstinner Mar 25, 2025
92e1294
Add tests
vstinner Mar 25, 2025
eff71b5
Add PyBytesWriter_WriteBytes()
vstinner Mar 25, 2025
31c7ca7
Add PyBytesWriter_Format()
vstinner Mar 25, 2025
86d0fd9
Fix build on Windows
vstinner Mar 25, 2025
79fa5f8
Add PyBytesWriter_ResizeAndUpdatePointer() function
vstinner Mar 25, 2025
bf60f7f
Convert _PyBytes_FromIterator()
vstinner Mar 25, 2025
62a15be
Add _PyBytesWriter_CreateByteArray()
vstinner Mar 25, 2025
0a70d70
Convert _PyBytes_FormatEx()
vstinner Mar 26, 2025
457e21a
Rename PyBytesWriter_FinishWithPointer()
vstinner Mar 26, 2025
40ef4e1
Add PyBytesWriter_GrowAndUpdatePointer()
vstinner Mar 26, 2025
0313087
Make PyBytesWriter_ResizeAndUpdatePointer() private
vstinner Mar 27, 2025
c8ac889
Make PyBytesWriter_GetAllocated() private
vstinner Mar 27, 2025
7095ac4
Don't overallocate for bytearray()
vstinner Mar 27, 2025
befd574
Move _PyBytesWriter_CreateByteArray() to the internal C API
vstinner Mar 27, 2025
3ba1d1c
Move code
vstinner Mar 27, 2025
ede2776
Add examples
vstinner Mar 27, 2025
be56685
Add high-level API example
vstinner Mar 27, 2025
1135390
Fix tests
vstinner Mar 27, 2025
000ba58
fix linter
vstinner Mar 27, 2025
b864c26
Convert more functions
vstinner Mar 27, 2025
6d7e37d
Convert _hashopenssl function
vstinner Mar 27, 2025
d8a4659
Detect strlen() overflow
vstinner Mar 31, 2025
ed00f95
Fix mmap
vstinner Mar 31, 2025
6307895
Grow() can now shrink the buffer
vstinner Mar 31, 2025
18d41ff
Fix WriteBytes()
vstinner Mar 31, 2025
4cf51f3
Merge branch 'main' into bytes_writer_size
vstinner Apr 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions Include/cpython/bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,46 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable)
{
return PyBytes_Join(sep, iterable);
}


// --- PyBytesWriter API -----------------------------------------------------

typedef struct PyBytesWriter PyBytesWriter;

PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create(
Py_ssize_t size);
PyAPI_FUNC(void) PyBytesWriter_Discard(
PyBytesWriter *writer);
PyAPI_FUNC(PyObject*) PyBytesWriter_Finish(
PyBytesWriter *writer);
PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize(
PyBytesWriter *writer,
Py_ssize_t size);
PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithPointer(
PyBytesWriter *writer,
void *buf);

PyAPI_FUNC(void*) PyBytesWriter_GetData(
PyBytesWriter *writer);
PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize(
PyBytesWriter *writer);

PyAPI_FUNC(int) PyBytesWriter_WriteBytes(
PyBytesWriter *writer,
const void *bytes,
Py_ssize_t size);
PyAPI_FUNC(int) PyBytesWriter_Format(
PyBytesWriter *writer,
const char *format,
...);

PyAPI_FUNC(int) PyBytesWriter_Resize(
PyBytesWriter *writer,
Py_ssize_t size);
PyAPI_FUNC(int) PyBytesWriter_Grow(
PyBytesWriter *writer,
Py_ssize_t size);
PyAPI_FUNC(void*) PyBytesWriter_GrowAndUpdatePointer(
PyBytesWriter *writer,
Py_ssize_t size,
void *buf);
4 changes: 4 additions & 0 deletions Include/internal/pycore_bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
const void *bytes,
Py_ssize_t size);

// Export for '_testcapi' shared extension.
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(
Py_ssize_t size);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_freelist_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ extern "C" {
# define Py_futureiters_MAXFREELIST 255
# define Py_object_stack_chunks_MAXFREELIST 4
# define Py_unicode_writers_MAXFREELIST 1
# define Py_bytes_writers_MAXFREELIST 1
# define Py_pycfunctionobject_MAXFREELIST 16
# define Py_pycmethodobject_MAXFREELIST 16
# define Py_pymethodobjects_MAXFREELIST 20
Expand Down Expand Up @@ -59,6 +60,7 @@ struct _Py_freelists {
struct _Py_freelist futureiters;
struct _Py_freelist object_stack_chunks;
struct _Py_freelist unicode_writers;
struct _Py_freelist bytes_writers;
struct _Py_freelist pycfunctionobject;
struct _Py_freelist pycmethodobject;
struct _Py_freelist pymethodobjects;
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ extern int _PyLong_FormatWriter(
int alternate);

extern char* _PyLong_FormatBytesWriter(
_PyBytesWriter *writer,
PyBytesWriter *writer,
char *str,
PyObject *obj,
int base,
Expand Down
89 changes: 89 additions & 0 deletions Lib/test/test_capi/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,5 +291,94 @@ def test_join(self):
bytes_join(b'', NULL)


class BytesWriterTest(unittest.TestCase):
result_type = bytes

def create_writer(self, alloc=0, string=b''):
return _testcapi.PyBytesWriter(alloc, string, 0)

def test_create(self):
# Test PyBytesWriter_Create()
writer = self.create_writer()
self.assertEqual(writer.get_size(), 0)
self.assertEqual(writer.finish(), self.result_type(b''))

writer = self.create_writer(3, b'abc')
self.assertEqual(writer.get_size(), 3)
self.assertEqual(writer.finish(), self.result_type(b'abc'))

writer = self.create_writer(10, b'abc')
self.assertEqual(writer.get_size(), 10)
self.assertEqual(writer.finish_with_size(3), self.result_type(b'abc'))

def test_write_bytes(self):
# Test PyBytesWriter_WriteBytes()
writer = self.create_writer()
writer.write_bytes(b'Hello World!', -1)
self.assertEqual(writer.finish(), self.result_type(b'Hello World!'))

writer = self.create_writer()
writer.write_bytes(b'Hello ', -1)
writer.write_bytes(b'World! <truncated>', 6)
self.assertEqual(writer.finish(), self.result_type(b'Hello World!'))

def test_resize(self):
# Test PyBytesWriter_Resize()
writer = self.create_writer()
writer.resize(len(b'number=123456'), b'number=123456')
writer.resize(len(b'number=123456'), b'')
self.assertEqual(writer.get_size(), len(b'number=123456'))
self.assertEqual(writer.finish(), self.result_type(b'number=123456'))

writer = self.create_writer()
writer.resize(0, b'')
writer.resize(len(b'number=123456'), b'number=123456')
self.assertEqual(writer.finish(), self.result_type(b'number=123456'))

writer = self.create_writer()
writer.resize(len(b'number='), b'number=')
writer.resize(len(b'number=123456'), b'123456')
self.assertEqual(writer.finish(), self.result_type(b'number=123456'))

writer = self.create_writer()
writer.resize(len(b'number='), b'number=')
writer.resize(len(b'number='), b'')
writer.resize(len(b'number=123456'), b'123456')
self.assertEqual(writer.finish(), self.result_type(b'number=123456'))

writer = self.create_writer()
writer.resize(len(b'number'), b'number')
writer.resize(len(b'number='), b'=')
writer.resize(len(b'number=123'), b'123')
writer.resize(len(b'number=123456'), b'456')
self.assertEqual(writer.finish(), self.result_type(b'number=123456'))

def test_format_i(self):
# Test PyBytesWriter_Format()
writer = self.create_writer()
writer.format_i(b'x=%i', 123456)
self.assertEqual(writer.finish(), self.result_type(b'x=123456'))

writer = self.create_writer()
writer.format_i(b'x=%i, ', 123)
writer.format_i(b'y=%i', 456)
self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456'))

def test_example_abc(self):
self.assertEqual(_testcapi.byteswriter_abc(), b'abc')

def test_example_resize(self):
self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World')

def test_example_highlevel(self):
self.assertEqual(_testcapi.byteswriter_highlevel(), b'Hello World!')


class ByteArrayWriterTest(BytesWriterTest):
result_type = bytearray

def create_writer(self, alloc=0, string=b''):
return _testcapi.PyBytesWriter(alloc, string, 1)

if __name__ == "__main__":
unittest.main()
4 changes: 1 addition & 3 deletions Modules/_bz2module.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,9 +668,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type)
self->bzs_avail_in_real = 0;
self->input_buffer = NULL;
self->input_buffer_size = 0;
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
if (self->unused_data == NULL)
goto error;
self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);

bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
if (catch_bz2_error(bzerror))
Expand Down
67 changes: 30 additions & 37 deletions Modules/_codecsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,52 +201,45 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
const char *errors)
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
{
Py_ssize_t size;
Py_ssize_t newsize;
PyObject *v;

size = PyBytes_GET_SIZE(data);
Py_ssize_t size = PyBytes_GET_SIZE(data);
if (size > PY_SSIZE_T_MAX / 4) {
PyErr_SetString(PyExc_OverflowError,
"string is too large to encode");
return NULL;
}
newsize = 4*size;
v = PyBytes_FromStringAndSize(NULL, newsize);
Py_ssize_t alloc_size = 4*size;

if (v == NULL) {
PyBytesWriter *writer = PyBytesWriter_Create(alloc_size);
if (writer == NULL) {
return NULL;
}
else {
Py_ssize_t i;
char c;
char *p = PyBytes_AS_STRING(v);

for (i = 0; i < size; i++) {
/* There's at least enough room for a hex escape */
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
c = PyBytes_AS_STRING(data)[i];
if (c == '\'' || c == '\\')
*p++ = '\\', *p++ = c;
else if (c == '\t')
*p++ = '\\', *p++ = 't';
else if (c == '\n')
*p++ = '\\', *p++ = 'n';
else if (c == '\r')
*p++ = '\\', *p++ = 'r';
else if (c < ' ' || c >= 0x7f) {
*p++ = '\\';
*p++ = 'x';
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
*p++ = Py_hexdigits[c & 0xf];
}
else
*p++ = c;
}
*p = '\0';
if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
return NULL;
char *p = PyBytesWriter_GetData(writer);

for (Py_ssize_t i = 0; i < size; i++) {
/* There's at least enough room for a hex escape */
assert(alloc_size - (p - (char*)PyBytesWriter_GetData(writer)) >= 4);
char c = PyBytes_AS_STRING(data)[i];
if (c == '\'' || c == '\\')
*p++ = '\\', *p++ = c;
else if (c == '\t')
*p++ = '\\', *p++ = 't';
else if (c == '\n')
*p++ = '\\', *p++ = 'n';
else if (c == '\r')
*p++ = '\\', *p++ = 'r';
else if (c < ' ' || c >= 0x7f) {
*p++ = '\\';
*p++ = 'x';
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
*p++ = Py_hexdigits[c & 0xf];
}
else
*p++ = c;
}

PyObject *v = PyBytesWriter_FinishWithPointer(writer, p);
if (v == NULL) {
return NULL;
}

return codec_tuple(v, size);
Expand Down
5 changes: 1 addition & 4 deletions Modules/_dbmmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,7 @@ _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key,
return PyBytes_FromStringAndSize(val.dptr, val.dsize);
}
if (default_value == NULL) {
default_value = PyBytes_FromStringAndSize(NULL, 0);
if (default_value == NULL) {
return NULL;
}
default_value = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
val.dptr = NULL;
val.dsize = 0;
}
Expand Down
26 changes: 12 additions & 14 deletions Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,15 +806,15 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length)
/*[clinic end generated code: output=ef9320c23280efad input=816a6537cea3d1db]*/
{
EVP_MD_CTX *temp_ctx;
PyObject *retval = PyBytes_FromStringAndSize(NULL, length);

if (retval == NULL) {
PyBytesWriter *writer = PyBytesWriter_Create(length);
if (writer == NULL) {
return NULL;
}

temp_ctx = EVP_MD_CTX_new();
if (temp_ctx == NULL) {
Py_DECREF(retval);
PyBytesWriter_Discard(writer);
PyErr_NoMemory();
return NULL;
}
Expand All @@ -823,17 +823,17 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length)
goto error;
}
if (!EVP_DigestFinalXOF(temp_ctx,
(unsigned char*)PyBytes_AS_STRING(retval),
(unsigned char*)PyBytesWriter_GetData(writer),
length))
{
goto error;
}

EVP_MD_CTX_free(temp_ctx);
return retval;
return PyBytesWriter_Finish(writer);

error:
Py_DECREF(retval);
PyBytesWriter_Discard(writer);
EVP_MD_CTX_free(temp_ctx);
notify_ssl_error_occurred();
return NULL;
Expand Down Expand Up @@ -1414,8 +1414,6 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt,
long maxmem, long dklen)
/*[clinic end generated code: output=14849e2aa2b7b46c input=48a7d63bf3f75c42]*/
{
PyObject *key_obj = NULL;
char *key;
int retval;
unsigned long n, r, p;

Expand Down Expand Up @@ -1486,27 +1484,27 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt,
return NULL;
}

key_obj = PyBytes_FromStringAndSize(NULL, dklen);
if (key_obj == NULL) {
PyBytesWriter *writer = PyBytesWriter_Create(dklen);
if (writer == NULL) {
return NULL;
}
key = PyBytes_AS_STRING(key_obj);
unsigned char *key = PyBytesWriter_GetData(writer);

Py_BEGIN_ALLOW_THREADS
retval = EVP_PBE_scrypt(
(const char*)password->buf, (size_t)password->len,
(const unsigned char *)salt->buf, (size_t)salt->len,
n, r, p, maxmem,
(unsigned char *)key, (size_t)dklen
key, (size_t)dklen
);
Py_END_ALLOW_THREADS

if (!retval) {
Py_CLEAR(key_obj);
PyBytesWriter_Discard(writer);
notify_ssl_error_occurred();
return NULL;
}
return key_obj;
return PyBytesWriter_Finish(writer);
}
#endif /* PY_OPENSSL_HAS_SCRYPT */

Expand Down
Loading
Loading