diff --git a/Cargo.toml b/Cargo.toml index acdca236..82b46b69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ exclude = [ "rustfmt.toml", ".travis.yml", ".evergreen/**", - ".gitignore" + ".gitignore", ] [features] @@ -54,20 +54,31 @@ name = "bson" [dependencies] ahash = "0.8.0" -chrono = { version = "0.4.15", features = ["std"], default-features = false, optional = true } +chrono = { version = "0.4.15", features = [ + "std", +], default-features = false, optional = true } rand = "0.8" serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", features = ["preserve_order"] } +simdutf8 = "0.1.4" indexmap = "1.6.2" hex = "0.4.2" base64 = "0.13.0" once_cell = "1.5.1" -uuid-0_8 = { package = "uuid", version = "0.8.1", features = ["serde", "v4"], optional = true } +uuid-0_8 = { package = "uuid", version = "0.8.1", features = [ + "serde", + "v4", +], optional = true } uuid = { version = "1.1.2", features = ["serde", "v4"] } serde_bytes = "0.11.5" serde_with = { version = "1.3.1", optional = true } serde_with-3 = { package = "serde_with", version = "3.1.0", optional = true } -time = { version = "0.3.9", features = ["formatting", "parsing", "macros", "large-dates"] } +time = { version = "0.3.9", features = [ + "formatting", + "parsing", + "macros", + "large-dates", +] } bitvec = "1.0.1" [target.'cfg(target_arch = "wasm32")'.dependencies] @@ -78,7 +89,11 @@ criterion = "0.3.0" pretty_assertions = "0.6.1" proptest = "1.0.0" serde_bytes = "0.11" -chrono = { version = "0.4", features = ["serde", "clock", "std"], default-features = false } +chrono = { version = "0.4", features = [ + "serde", + "clock", + "std", +], default-features = false } [package.metadata.docs.rs] all-features = true diff --git a/src/de/error.rs b/src/de/error.rs index 30d4f51a..41d17643 100644 --- a/src/de/error.rs +++ b/src/de/error.rs @@ -1,6 +1,7 @@ -use std::{error, fmt, fmt::Display, io, string, sync::Arc}; +use std::{error, fmt, fmt::Display, io, sync::Arc}; use serde::de::{self, Unexpected}; +use simdutf8::basic::Utf8Error; use crate::Bson; @@ -13,7 +14,7 @@ pub enum Error { /// A [`std::string::FromUtf8Error`](https://doc.rust-lang.org/std/string/struct.FromUtf8Error.html) encountered /// while decoding a UTF-8 String from the input data. - InvalidUtf8String(string::FromUtf8Error), + InvalidUtf8String(Utf8Error), /// While decoding a [`Document`](crate::Document) from bytes, an unexpected or unsupported /// element type was encountered. @@ -44,8 +45,8 @@ impl From for Error { } } -impl From for Error { - fn from(err: string::FromUtf8Error) -> Error { +impl From for Error { + fn from(err: Utf8Error) -> Error { Error::InvalidUtf8String(err) } } diff --git a/src/de/mod.rs b/src/de/mod.rs index 909b71cd..900eb9d3 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -42,7 +42,7 @@ use crate::{ Decimal128, }; -use ::serde::{ +use serde::{ de::{DeserializeOwned, Error as _, Unexpected}, Deserialize, }; @@ -108,14 +108,12 @@ pub(crate) fn read_string(reader: &mut R, utf8_lossy: bool) -> )); } + let mut buf = Vec::with_capacity(len as usize - 1); + reader.take(len as u64 - 1).read_to_end(&mut buf)?; let s = if utf8_lossy { - let mut buf = Vec::with_capacity(len as usize - 1); - reader.take(len as u64 - 1).read_to_end(&mut buf)?; String::from_utf8_lossy(&buf).to_string() } else { - let mut s = String::with_capacity(len as usize - 1); - reader.take(len as u64 - 1).read_to_string(&mut s)?; - s + to_string(buf)? }; // read the null terminator @@ -152,7 +150,13 @@ fn read_cstring(reader: &mut R) -> Result { v.push(c); } - Ok(String::from_utf8(v)?) + to_string(v) +} + +fn to_string(v: Vec) -> Result { + let _ = simdutf8::basic::from_utf8(&v)?; + // Safety: `v` is a valid UTF-8 string. + unsafe { Ok(String::from_utf8_unchecked(v)) } } #[inline] diff --git a/src/de/raw.rs b/src/de/raw.rs index 874d025e..d71e1bde 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -1759,7 +1759,7 @@ impl<'a> BsonBuf<'a> { let s = if utf8_lossy_override.unwrap_or(self.utf8_lossy) { String::from_utf8_lossy(bytes) } else { - Cow::Borrowed(std::str::from_utf8(bytes).map_err(Error::custom)?) + Cow::Borrowed(simdutf8::basic::from_utf8(bytes).map_err(Error::custom)?) }; // consume the null byte diff --git a/src/raw/error.rs b/src/raw/error.rs index 556b7fa0..02207ac0 100644 --- a/src/raw/error.rs +++ b/src/raw/error.rs @@ -1,4 +1,4 @@ -use std::str::Utf8Error; +use simdutf8::basic::Utf8Error; use crate::spec::ElementType; diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a96f6d13..02da2d3e 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -255,7 +255,8 @@ fn read_lenencoded(buf: &[u8]) -> Result<&str> { } fn try_to_str(data: &[u8]) -> Result<&str> { - std::str::from_utf8(data).map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e))) + simdutf8::basic::from_utf8(data) + .map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e))) } fn usize_try_from_i32(i: i32) -> Result {