Skip to content

Commit 9097e5f

Browse files
committed
gh-129813: Add PyBytesWriter C API (with size flavor)
Add functions: * PyBytesWriter_Create() * PyBytesWriter_Discard() * PyBytesWriter_Finish() * PyBytesWriter_FinishWithSize() * PyBytesWriter_FinishWithEndPointer() * PyBytesWriter_Data() * PyBytesWriter_Allocated() * PyBytesWriter_SetSize() * PyBytesWriter_Resize()
1 parent d16f455 commit 9097e5f

File tree

7 files changed

+359
-118
lines changed

7 files changed

+359
-118
lines changed

Include/cpython/bytesobject.h

+30
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,33 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable)
4040
{
4141
return PyBytes_Join(sep, iterable);
4242
}
43+
44+
45+
// --- PyBytesWriter API -----------------------------------------------------
46+
47+
typedef struct PyBytesWriter PyBytesWriter;
48+
49+
PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create(
50+
Py_ssize_t alloc);
51+
PyAPI_FUNC(void) PyBytesWriter_Discard(
52+
PyBytesWriter *writer);
53+
PyAPI_FUNC(PyObject*) PyBytesWriter_Finish(
54+
PyBytesWriter *writer);
55+
PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize(
56+
PyBytesWriter *writer,
57+
Py_ssize_t size);
58+
PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithEndPointer(
59+
PyBytesWriter *writer,
60+
void *data);
61+
62+
PyAPI_FUNC(void*) PyBytesWriter_Data(
63+
PyBytesWriter *writer);
64+
PyAPI_FUNC(Py_ssize_t) PyBytesWriter_Allocated(
65+
PyBytesWriter *writer);
66+
67+
PyAPI_FUNC(int) PyBytesWriter_SetSize(
68+
PyBytesWriter *writer,
69+
Py_ssize_t size);
70+
PyAPI_FUNC(int) PyBytesWriter_Resize(
71+
PyBytesWriter *writer,
72+
Py_ssize_t alloc);

Include/internal/pycore_freelist_state.h

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ extern "C" {
2424
# define Py_futureiters_MAXFREELIST 255
2525
# define Py_object_stack_chunks_MAXFREELIST 4
2626
# define Py_unicode_writers_MAXFREELIST 1
27+
# define Py_bytes_writers_MAXFREELIST 1
2728
# define Py_pymethodobjects_MAXFREELIST 20
2829

2930
// A generic freelist of either PyObjects or other data structures.
@@ -53,6 +54,7 @@ struct _Py_freelists {
5354
struct _Py_freelist futureiters;
5455
struct _Py_freelist object_stack_chunks;
5556
struct _Py_freelist unicode_writers;
57+
struct _Py_freelist bytes_writers;
5658
struct _Py_freelist pymethodobjects;
5759
};
5860

Modules/_pickle.c

+28-22
Original file line numberDiff line numberDiff line change
@@ -2615,31 +2615,31 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj)
26152615
static PyObject *
26162616
raw_unicode_escape(PyObject *obj)
26172617
{
2618-
char *p;
2619-
Py_ssize_t i, size;
2620-
const void *data;
2621-
int kind;
2622-
_PyBytesWriter writer;
2618+
Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
2619+
const void *data = PyUnicode_DATA(obj);
2620+
int kind = PyUnicode_KIND(obj);
26232621

2624-
_PyBytesWriter_Init(&writer);
2625-
2626-
size = PyUnicode_GET_LENGTH(obj);
2627-
data = PyUnicode_DATA(obj);
2628-
kind = PyUnicode_KIND(obj);
2629-
2630-
p = _PyBytesWriter_Alloc(&writer, size);
2631-
if (p == NULL)
2632-
goto error;
2633-
writer.overallocate = 1;
2622+
Py_ssize_t alloc = size;
2623+
PyBytesWriter *writer = PyBytesWriter_Create(alloc);
2624+
if (writer == NULL) {
2625+
return NULL;
2626+
}
2627+
char *p = PyBytesWriter_Data(writer);
26342628

2635-
for (i=0; i < size; i++) {
2629+
for (Py_ssize_t i=0; i < size; i++) {
26362630
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
26372631
/* Map 32-bit characters to '\Uxxxxxxxx' */
26382632
if (ch >= 0x10000) {
26392633
/* -1: subtract 1 preallocated byte */
2640-
p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2641-
if (p == NULL)
2634+
alloc += 10-1;
2635+
Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer);
2636+
if (PyBytesWriter_SetSize(writer, pos) < 0) {
26422637
goto error;
2638+
}
2639+
if (PyBytesWriter_Resize(writer, alloc) < 0) {
2640+
goto error;
2641+
}
2642+
p = (char*)PyBytesWriter_Data(writer) + pos;
26432643

26442644
*p++ = '\\';
26452645
*p++ = 'U';
@@ -2658,9 +2658,15 @@ raw_unicode_escape(PyObject *obj)
26582658
ch == 0x1a)
26592659
{
26602660
/* -1: subtract 1 preallocated byte */
2661-
p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2662-
if (p == NULL)
2661+
alloc += 6-1;
2662+
Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer);
2663+
if (PyBytesWriter_SetSize(writer, pos) < 0) {
26632664
goto error;
2665+
}
2666+
if (PyBytesWriter_Resize(writer, alloc) < 0) {
2667+
goto error;
2668+
}
2669+
p = (char*)PyBytesWriter_Data(writer) + pos;
26642670

26652671
*p++ = '\\';
26662672
*p++ = 'u';
@@ -2674,10 +2680,10 @@ raw_unicode_escape(PyObject *obj)
26742680
*p++ = (char) ch;
26752681
}
26762682

2677-
return _PyBytesWriter_Finish(&writer, p);
2683+
return PyBytesWriter_FinishWithEndPointer(writer, p);
26782684

26792685
error:
2680-
_PyBytesWriter_Dealloc(&writer);
2686+
PyBytesWriter_Discard(writer);
26812687
return NULL;
26822688
}
26832689

Modules/_struct.c

+5-8
Original file line numberDiff line numberDiff line change
@@ -2272,7 +2272,6 @@ strings.");
22722272
static PyObject *
22732273
s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
22742274
{
2275-
char *buf;
22762275
PyStructObject *soself;
22772276
_structmodulestate *state = get_struct_state_structinst(self);
22782277

@@ -2288,21 +2287,19 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
22882287
}
22892288

22902289
/* Allocate a new string */
2291-
_PyBytesWriter writer;
2292-
_PyBytesWriter_Init(&writer);
2293-
buf = _PyBytesWriter_Alloc(&writer, soself->s_size);
2294-
if (buf == NULL) {
2295-
_PyBytesWriter_Dealloc(&writer);
2290+
PyBytesWriter *writer = PyBytesWriter_Create(soself->s_size);
2291+
if (writer == NULL) {
22962292
return NULL;
22972293
}
2294+
char *buf = PyBytesWriter_Data(writer);
22982295

22992296
/* Call the guts */
23002297
if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) {
2301-
_PyBytesWriter_Dealloc(&writer);
2298+
PyBytesWriter_Discard(writer);
23022299
return NULL;
23032300
}
23042301

2305-
return _PyBytesWriter_Finish(&writer, buf + soself->s_size);
2302+
return PyBytesWriter_FinishWithSize(writer, soself->s_size);
23062303
}
23072304

23082305
PyDoc_STRVAR(s_pack_into__doc__,

Modules/binascii.c

+28-26
Original file line numberDiff line numberDiff line change
@@ -302,16 +302,13 @@ static PyObject *
302302
binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
303303
/*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/
304304
{
305-
unsigned char *ascii_data;
306305
const unsigned char *bin_data;
307306
int leftbits = 0;
308307
unsigned char this_ch;
309308
unsigned int leftchar = 0;
310309
binascii_state *state;
311-
Py_ssize_t bin_len, out_len;
312-
_PyBytesWriter writer;
310+
Py_ssize_t bin_len;
313311

314-
_PyBytesWriter_Init(&writer);
315312
bin_data = data->buf;
316313
bin_len = data->len;
317314
if ( bin_len > 45 ) {
@@ -325,10 +322,12 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
325322
}
326323

327324
/* We're lazy and allocate to much (fixed up later) */
328-
out_len = 2 + (bin_len + 2) / 3 * 4;
329-
ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
330-
if (ascii_data == NULL)
325+
Py_ssize_t out_len = 2 + (bin_len + 2) / 3 * 4;
326+
PyBytesWriter *writer = PyBytesWriter_Create(out_len);
327+
if (writer == NULL) {
331328
return NULL;
329+
}
330+
unsigned char *ascii_data = PyBytesWriter_Data(writer);
332331

333332
/* Store the length */
334333
if (backtick && !bin_len)
@@ -356,7 +355,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
356355
}
357356
*ascii_data++ = '\n'; /* Append a courtesy newline */
358357

359-
return _PyBytesWriter_Finish(&writer, ascii_data);
358+
return PyBytesWriter_FinishWithEndPointer(writer, ascii_data);
360359
}
361360

362361
/*[clinic input]
@@ -387,12 +386,11 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
387386

388387
/* Allocate the buffer */
389388
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
390-
_PyBytesWriter writer;
391-
_PyBytesWriter_Init(&writer);
392-
unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
393-
if (bin_data == NULL)
389+
PyBytesWriter *writer = PyBytesWriter_Create(bin_len);
390+
if (writer == NULL) {
394391
return NULL;
395-
unsigned char *bin_data_start = bin_data;
392+
}
393+
unsigned char *bin_data = PyBytesWriter_Data(writer);
396394

397395
if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
398396
state = get_binascii_state(module);
@@ -488,12 +486,14 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
488486
state = get_binascii_state(module);
489487
if (state == NULL) {
490488
/* error already set, from get_binascii_state */
489+
assert(PyErr_Occurred());
491490
} else if (quad_pos == 1) {
492491
/*
493492
** There is exactly one extra valid, non-padding, base64 character.
494493
** This is an invalid length, as there is no possible input that
495494
** could encoded into such a base64 string.
496495
*/
496+
unsigned char *bin_data_start = PyBytesWriter_Data(writer);
497497
PyErr_Format(state->Error,
498498
"Invalid base64-encoded string: "
499499
"number of data characters (%zd) cannot be 1 more "
@@ -502,13 +502,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
502502
} else {
503503
PyErr_SetString(state->Error, "Incorrect padding");
504504
}
505-
error_end:
506-
_PyBytesWriter_Dealloc(&writer);
507-
return NULL;
505+
goto error_end;
508506
}
509507

510508
done:
511-
return _PyBytesWriter_Finish(&writer, bin_data);
509+
return PyBytesWriter_FinishWithEndPointer(writer, bin_data);
510+
511+
error_end:
512+
PyBytesWriter_Discard(writer);
513+
return NULL;
512514
}
513515

514516

@@ -527,18 +529,15 @@ static PyObject *
527529
binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
528530
/*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/
529531
{
530-
unsigned char *ascii_data;
531532
const unsigned char *bin_data;
532533
int leftbits = 0;
533534
unsigned char this_ch;
534535
unsigned int leftchar = 0;
535-
Py_ssize_t bin_len, out_len;
536-
_PyBytesWriter writer;
536+
Py_ssize_t bin_len;
537537
binascii_state *state;
538538

539539
bin_data = data->buf;
540540
bin_len = data->len;
541-
_PyBytesWriter_Init(&writer);
542541

543542
assert(bin_len >= 0);
544543

@@ -554,12 +553,15 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
554553
/* We're lazy and allocate too much (fixed up later).
555554
"+2" leaves room for up to two pad characters.
556555
Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
557-
out_len = bin_len*2 + 2;
558-
if (newline)
556+
Py_ssize_t out_len = bin_len*2 + 2;
557+
if (newline) {
559558
out_len++;
560-
ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
561-
if (ascii_data == NULL)
559+
}
560+
PyBytesWriter *writer = PyBytesWriter_Create(out_len);
561+
if (writer == NULL) {
562562
return NULL;
563+
}
564+
unsigned char *ascii_data = PyBytesWriter_Data(writer);
563565

564566
for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
565567
/* Shift the data into our buffer */
@@ -584,7 +586,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
584586
if (newline)
585587
*ascii_data++ = '\n'; /* Append a courtesy newline */
586588

587-
return _PyBytesWriter_Finish(&writer, ascii_data);
589+
return PyBytesWriter_FinishWithEndPointer(writer, ascii_data);
588590
}
589591

590592

0 commit comments

Comments
 (0)