Skip to content

Commit bfb72fb

Browse files
Add PyString::from_fmt using new PyUnicodeWriter
1 parent a9e3f81 commit bfb72fb

File tree

5 files changed

+310
-1
lines changed

5 files changed

+310
-1
lines changed

pyo3-ffi/src/compat/py_3_14.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,83 @@ compat_function!(
2424
}
2525
}
2626
);
27+
28+
compat_function!(
29+
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));
30+
31+
pub unsafe fn PyUnicodeWriter_Create(length: crate::Py_ssize_t) -> *mut crate::PyUnicodeWriter {
32+
if length < 0 {
33+
crate::PyErr_SetString(
34+
crate::PyExc_ValueError,
35+
c_str!("length must be positive").as_ptr(),
36+
);
37+
return std::ptr::null_mut();
38+
}
39+
40+
let size = std::mem::size_of::<crate::_PyUnicodeWriter>();
41+
let writer: *mut crate::_PyUnicodeWriter = crate::PyMem_Malloc(size).cast();
42+
crate::_PyUnicodeWriter_Init(writer);
43+
if crate::_PyUnicodeWriter_Prepare(writer, length, 127) < 0 {
44+
PyUnicodeWriter_Discard(writer.cast());
45+
return std::ptr::null_mut();
46+
}
47+
(*writer).overallocate = 1;
48+
writer.cast()
49+
}
50+
);
51+
52+
compat_function!(
53+
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));
54+
55+
pub unsafe fn PyUnicodeWriter_Finish(writer: *mut crate::PyUnicodeWriter) -> *mut crate::PyObject {
56+
let str = crate::_PyUnicodeWriter_Finish(writer.cast());
57+
crate::PyMem_Free(writer.cast());
58+
str
59+
}
60+
);
61+
62+
compat_function!(
63+
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));
64+
65+
pub unsafe fn PyUnicodeWriter_Discard(writer: *mut crate::PyUnicodeWriter) -> () {
66+
crate::_PyUnicodeWriter_Dealloc(writer.cast());
67+
crate::PyMem_Free(writer.cast())
68+
}
69+
);
70+
71+
compat_function!(
72+
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));
73+
74+
pub unsafe fn PyUnicodeWriter_WriteChar(writer: *mut crate::PyUnicodeWriter, ch: crate::Py_UCS4) -> std::os::raw::c_int {
75+
if ch > 0x10ffff {
76+
crate::PyErr_SetString(
77+
crate::PyExc_ValueError,
78+
c_str!("character must be in range(0x110000)").as_ptr(),
79+
);
80+
return -1;
81+
}
82+
83+
crate::_PyUnicodeWriter_WriteChar(writer.cast(), ch)
84+
}
85+
);
86+
87+
compat_function!(
88+
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));
89+
90+
pub unsafe fn PyUnicodeWriter_WriteUTF8(writer: *mut crate::PyUnicodeWriter,str: *const std::os::raw::c_char, size: crate::Py_ssize_t) -> std::os::raw::c_int {
91+
let size = if size < 0 {
92+
libc::strlen(str) as isize
93+
} else {
94+
size
95+
};
96+
97+
let py_str = crate::PyUnicode_FromStringAndSize(str, size);
98+
if py_str.is_null() {
99+
return -1;
100+
}
101+
102+
let result = crate::_PyUnicodeWriter_WriteStr(writer.cast(), py_str);
103+
crate::Py_DECREF(py_str);
104+
result
105+
}
106+
);

pyo3-ffi/src/cpython/unicodeobject.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,77 @@ extern "C" {
686686
// skipped PyUnicode_GetMax
687687
}
688688

689+
opaque_struct!(pub PyUnicodeWriter);
690+
691+
#[cfg(not(Py_3_14))]
692+
#[repr(C)]
693+
pub(crate) struct _PyUnicodeWriter {
694+
buffer: *mut PyObject,
695+
data: *mut c_void,
696+
kind: c_int,
697+
pub(crate) maxchar: Py_UCS4,
698+
pub(crate) size: Py_ssize_t,
699+
pub(crate) pos: Py_ssize_t,
700+
min_length: Py_ssize_t,
701+
min_char: Py_UCS4,
702+
pub(crate) overallocate: c_char,
703+
readonly: c_char,
704+
}
705+
706+
extern "C" {
707+
#[cfg(Py_3_14)]
708+
pub fn PyUnicodeWriter_Create(length: Py_ssize_t) -> *mut PyUnicodeWriter;
709+
#[cfg(Py_3_14)]
710+
pub fn PyUnicodeWriter_Finish(writer: *mut PyUnicodeWriter) -> *mut PyObject;
711+
#[cfg(not(Py_3_14))]
712+
pub(crate) fn _PyUnicodeWriter_Finish(writer: *mut _PyUnicodeWriter) -> *mut PyObject;
713+
#[cfg(Py_3_14)]
714+
pub fn PyUnicodeWriter_Discard(writer: *mut PyUnicodeWriter);
715+
#[cfg(not(Py_3_14))]
716+
pub(crate) fn _PyUnicodeWriter_Dealloc(writer: *mut _PyUnicodeWriter);
717+
#[cfg(not(Py_3_14))]
718+
pub(crate) fn _PyUnicodeWriter_Init(writer: *mut _PyUnicodeWriter);
719+
#[cfg(not(Py_3_14))]
720+
pub(crate) fn _PyUnicodeWriter_PrepareInternal(
721+
writer: *mut _PyUnicodeWriter,
722+
length: Py_ssize_t,
723+
maxchars: Py_UCS4,
724+
) -> c_int;
725+
#[cfg(Py_3_14)]
726+
pub fn PyUnicodeWriter_WriteChar(writer: *mut PyUnicodeWriter, ch: Py_UCS4) -> c_int;
727+
#[cfg(not(Py_3_14))]
728+
pub(crate) fn _PyUnicodeWriter_WriteChar(writer: *mut _PyUnicodeWriter, ch: Py_UCS4) -> c_int;
729+
#[cfg(not(Py_3_14))]
730+
pub(crate) fn _PyUnicodeWriter_WriteStr(
731+
writer: *mut _PyUnicodeWriter,
732+
str: *mut PyObject,
733+
) -> c_int;
734+
#[cfg(Py_3_14)]
735+
pub fn PyUnicodeWriter_WriteUTF8(
736+
writer: *mut PyUnicodeWriter,
737+
str: *const c_char,
738+
size: Py_ssize_t,
739+
) -> c_int;
740+
}
741+
742+
#[cfg(not(Py_3_14))]
743+
#[inline(always)]
744+
pub(crate) unsafe fn _PyUnicodeWriter_Prepare(
745+
writer: *mut _PyUnicodeWriter,
746+
length: Py_ssize_t,
747+
maxchars: Py_UCS4,
748+
) -> c_int {
749+
if maxchars <= (*writer).maxchar && length <= (*writer).size - (*writer).pos {
750+
return 0;
751+
}
752+
753+
if length == 0 {
754+
return 0;
755+
}
756+
757+
_PyUnicodeWriter_PrepareInternal(writer, length, maxchars)
758+
}
759+
689760
// skipped _PyUnicodeWriter
690761
// skipped _PyUnicodeWriter_Init
691762
// skipped _PyUnicodeWriter_Prepare

src/fmt.rs

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//! This module provides the `PyUnicodeWriter` struct, which is a utility for efficiently
2+
//! constructing Python strings using Rust's `fmt::Write` trait.
3+
//! It allows for incremental string construction, without the need for repeated allocations, and
4+
//! is particularly useful for building strings in a performance-sensitive context.
5+
use crate::ffi::compat::{
6+
PyUnicodeWriter_Create, PyUnicodeWriter_Discard, PyUnicodeWriter_Finish,
7+
PyUnicodeWriter_WriteChar, PyUnicodeWriter_WriteUTF8,
8+
};
9+
use crate::ffi_ptr_ext::FfiPtrExt;
10+
use crate::impl_::callback::WrappingCastTo;
11+
use crate::types::{PyAnyMethods, PyString};
12+
use crate::{ffi, Bound, PyErr, PyResult, Python};
13+
use std::ptr::NonNull;
14+
use std::{fmt, mem};
15+
16+
/// The `PyUnicodeWriter` is a utility for efficiently constructing Python strings
17+
pub struct PyUnicodeWriter {
18+
writer: NonNull<ffi::PyUnicodeWriter>,
19+
last_error: Option<PyErr>,
20+
}
21+
22+
impl PyUnicodeWriter {
23+
/// Creates a new `PyUnicodeWriter`.
24+
pub fn new(py: Python<'_>) -> PyResult<Self> {
25+
Self::with_capacity(py, 0)
26+
}
27+
28+
/// Creates a new `PyUnicodeWriter` with the specified initial capacity.
29+
pub fn with_capacity(py: Python<'_>, capacity: usize) -> PyResult<Self> {
30+
match NonNull::new(unsafe { PyUnicodeWriter_Create(capacity.wrapping_cast()) }) {
31+
Some(ptr) => Ok(PyUnicodeWriter {
32+
writer: ptr,
33+
last_error: None,
34+
}),
35+
None => Err(PyErr::fetch(py)),
36+
}
37+
}
38+
39+
/// Consumes the `PyUnicodeWriter` and returns a `Bound<PyString>` containing the constructed string.
40+
pub fn into_py_string(self, py: Python<'_>) -> PyResult<Bound<'_, PyString>> {
41+
let writer_ptr = self.as_ptr();
42+
mem::forget(self);
43+
Ok(unsafe {
44+
PyUnicodeWriter_Finish(writer_ptr)
45+
.assume_owned_or_err(py)?
46+
.downcast_into_unchecked()
47+
})
48+
}
49+
50+
/// When fmt::Write returned an error, this function can be used to retrieve the last error that occurred.
51+
pub fn take_error(&mut self) -> Option<PyErr> {
52+
self.last_error.take()
53+
}
54+
55+
fn as_ptr(&self) -> *mut ffi::PyUnicodeWriter {
56+
self.writer.as_ptr()
57+
}
58+
59+
fn set_error(&mut self) {
60+
Python::with_gil(|py| {
61+
self.last_error = Some(PyErr::fetch(py));
62+
})
63+
}
64+
}
65+
66+
impl fmt::Write for PyUnicodeWriter {
67+
fn write_str(&mut self, s: &str) -> fmt::Result {
68+
let result = unsafe {
69+
PyUnicodeWriter_WriteUTF8(self.as_ptr(), s.as_ptr().cast(), s.len() as isize)
70+
};
71+
if result < 0 {
72+
self.set_error();
73+
Err(fmt::Error)
74+
} else {
75+
Ok(())
76+
}
77+
}
78+
79+
fn write_char(&mut self, c: char) -> fmt::Result {
80+
let result = unsafe { PyUnicodeWriter_WriteChar(self.as_ptr(), c as u32) };
81+
if result < 0 {
82+
self.set_error();
83+
Err(fmt::Error)
84+
} else {
85+
Ok(())
86+
}
87+
}
88+
}
89+
90+
impl Drop for PyUnicodeWriter {
91+
fn drop(&mut self) {
92+
unsafe {
93+
PyUnicodeWriter_Discard(self.as_ptr());
94+
}
95+
}
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
use crate::types::PyStringMethods;
102+
use crate::{IntoPyObject, Python};
103+
use std::fmt::Write;
104+
105+
#[test]
106+
fn unicode_writer_test() {
107+
Python::with_gil(|py| {
108+
let mut writer = PyUnicodeWriter::new(py).unwrap();
109+
write!(writer, "Hello {}!", "world").unwrap();
110+
writer.write_char('😎').unwrap();
111+
let result = writer.into_py_string(py).unwrap();
112+
assert_eq!(result.to_string(), "Hello world!😎");
113+
});
114+
}
115+
116+
#[test]
117+
fn test_pystring_from_fmt() {
118+
Python::with_gil(|py| {
119+
PyString::from_fmt(py, format_args!("Hello {}!", "world")).unwrap();
120+
});
121+
}
122+
123+
#[test]
124+
fn test_complex_format() {
125+
Python::with_gil(|py| {
126+
let complex_value = (42, "foo", 3.14).into_pyobject(py).unwrap();
127+
let py_string = PyString::from_fmt(
128+
py,
129+
format_args!("This is some complex value: {complex_value}"),
130+
)
131+
.unwrap();
132+
let actual = py_string.to_cow().unwrap();
133+
let expected = "This is some complex value: (42, 'foo', 3.14)";
134+
assert_eq!(actual, expected);
135+
});
136+
}
137+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ pub mod coroutine;
421421
mod err;
422422
pub mod exceptions;
423423
pub mod ffi;
424+
pub mod fmt;
424425
mod gil;
425426
#[doc(hidden)]
426427
pub mod impl_;

src/types/string.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#[cfg(not(Py_LIMITED_API))]
22
use crate::exceptions::PyUnicodeDecodeError;
33
use crate::ffi_ptr_ext::FfiPtrExt;
4+
use crate::fmt::PyUnicodeWriter;
45
use crate::instance::Borrowed;
56
use crate::py_result_ext::PyResultExt;
67
use crate::types::any::PyAnyMethods;
@@ -9,7 +10,8 @@ use crate::types::PyBytes;
910
use crate::{ffi, Bound, Py, PyAny, PyResult, Python};
1011
use std::borrow::Cow;
1112
use std::ffi::CString;
12-
use std::str;
13+
use std::fmt::Write as _;
14+
use std::{fmt, str};
1315

1416
/// Represents raw data backing a Python `str`.
1517
///
@@ -209,6 +211,24 @@ impl PyString {
209211
.downcast_into_unchecked()
210212
}
211213
}
214+
215+
/// Creates a Python string using a format string.
216+
///
217+
/// This function is similar to [`format!`], but it returns a Python string object instead of a Rust string.
218+
pub fn from_fmt<'py>(
219+
py: Python<'py>,
220+
args: fmt::Arguments<'_>,
221+
) -> PyResult<Bound<'py, PyString>> {
222+
if let Some(static_string) = args.as_str() {
223+
return Ok(PyString::new(py, static_string));
224+
};
225+
226+
let mut writer = PyUnicodeWriter::new(py)?;
227+
writer
228+
.write_fmt(args)
229+
.map_err(|_| writer.take_error().expect("expected error"))?;
230+
writer.into_py_string(py)
231+
}
212232
}
213233

214234
/// Implementation of functionality for [`PyString`].

0 commit comments

Comments
 (0)