Skip to content

Commit 21ad82f

Browse files
leftygbaloghlefty
andauthored
fix: check codepoint count not byte length in deserialize_char (#107)
The guard s.len() != 1 used byte length, causing depythonize::<char> to return Err(InvalidLengthChar) for any non-ASCII single-codepoint character (e.g. 'ä' U+00E4 is 1 codepoint but 2 UTF-8 bytes). Fix: use s.chars().count() != 1 which counts Unicode codepoints. A test for the multibyte-codepoint case is added to de.rs. Co-authored-by: lefty <geza.balogh@trayport.com>
1 parent 0085a18 commit 21ad82f

1 file changed

Lines changed: 14 additions & 1 deletion

File tree

src/de.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ impl<'de> de::Deserializer<'de> for &'_ mut Depythonizer<'_, '_> {
187187
V: de::Visitor<'de>,
188188
{
189189
let s = self.input.cast::<PyString>()?.to_cow()?;
190-
if s.len() != 1 {
190+
if s.chars().count() != 1 {
191191
return Err(PythonizeError::invalid_length_char());
192192
}
193193
visitor.visit_char(s.chars().next().unwrap())
@@ -1017,6 +1017,19 @@ mod test {
10171017
test_de(code, &expected, &expected_json);
10181018
}
10191019

1020+
#[test]
1021+
fn test_char_multibyte_codepoint() {
1022+
// 'ä' is U+00E4: one Unicode codepoint, two UTF-8 bytes.
1023+
// Previously, deserialize_char checked s.len() (byte length) != 1,
1024+
// which incorrectly rejected any non-ASCII char. The fix checks
1025+
// s.chars().count() (codepoint count) != 1 instead.
1026+
Python::attach(|py| {
1027+
let py_str = pyo3::types::PyString::new(py, "ä");
1028+
let result = depythonize::<char>(py_str.as_any());
1029+
assert_eq!(result.unwrap(), 'ä');
1030+
});
1031+
}
1032+
10201033
#[test]
10211034
fn test_unknown_type() {
10221035
Python::attach(|py| {

0 commit comments

Comments
 (0)