Skip to content

Add OsStr inherent fns to test for and strip str prefixes. #111317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions library/core/src/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ pub trait Pattern<'a>: Sized {
None
}
}

/// Return the pattern as a fixed slice of UTF-8 bytes, if possible.
#[inline]
fn as_bytes(&self) -> Option<&[u8]> {
None
}
}

// Searcher
Expand Down Expand Up @@ -917,6 +923,11 @@ where
/// Delegates to the `&str` impl.
impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str {
pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);

#[inline]
fn as_bytes(&self) -> Option<&[u8]> {
(*self).as_bytes()
}
}

/////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1001,6 +1012,11 @@ impl<'a, 'b> Pattern<'a> for &'b str {
None
}
}

#[inline]
fn as_bytes(&self) -> Option<&[u8]> {
Some(str::as_bytes(self))
}
}

/////////////////////////////////////////////////////////////////////////////
Expand Down
64 changes: 64 additions & 0 deletions library/std/src/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt;
use crate::hash::{Hash, Hasher};
use crate::ops;
use crate::rc::Rc;
use crate::str::pattern::Pattern;
use crate::str::FromStr;
use crate::sync::Arc;

Expand Down Expand Up @@ -978,6 +979,69 @@ impl OsStr {
pub fn eq_ignore_ascii_case<S: AsRef<OsStr>>(&self, other: S) -> bool {
self.inner.eq_ignore_ascii_case(&other.as_ref().inner)
}

/// Returns `true` if the given pattern matches a prefix of this `OsStr`.
///
/// Returns `false` if it does not.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(osstr_str_prefix_fns)]
///
/// use std::ffi::OsString;
///
/// let bananas = OsString::from("bananas");
///
/// assert!(bananas.starts_with("bana"));
/// assert!(!bananas.starts_with("nana"));
/// ```
#[unstable(feature = "osstr_str_prefix_fns", issue = "none")]
#[must_use]
#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
self.inner.starts_with(pattern)
}

/// Returns this `OsStr` with the given prefix removed.
///
/// If the `OsStr` starts with the pattern `prefix`, returns the substring
/// after the prefix, wrapped in `Some`.
///
/// If the `OsStr` does not start with `prefix`, returns `None`.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_fns)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(foobar.strip_prefix("bar"), None);
/// ```
#[unstable(feature = "osstr_str_prefix_fns", issue = "none")]
#[must_use]
#[inline]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> {
Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?))
}
}

#[stable(feature = "box_from_os_str", since = "1.17.0")]
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
#![feature(pattern)]
#![feature(platform_intrinsics)]
#![feature(prelude_import)]
#![feature(rustc_attrs)]
Expand Down
43 changes: 43 additions & 0 deletions library/std/src/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt::Write;
use crate::mem;
use crate::rc::Rc;
use crate::str;
use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::{AsInner, IntoInner};

Expand Down Expand Up @@ -270,4 +271,46 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}

fn to_str_prefix(&self) -> &str {
let utf8_err = match str::from_utf8(&self.inner) {
Ok(prefix) => return prefix,
Err(err) => err,
};
let utf8_len = utf8_err.valid_up_to();
if utf8_len == 0 {
return "";
}
// SAFETY: `Utf8Error::valid_up_to()` returns an index up to which
// valid UTF-8 has been verified.
unsafe { str::from_utf8_unchecked(&self.inner[..utf8_len]) }
}

#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
if let Some(pattern_bytes) = pattern.as_bytes() {
return self.inner.starts_with(pattern_bytes);
}
self.to_str_prefix().starts_with(pattern)
}

pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
if let Some(prefix_bytes) = prefix.as_bytes() {
let suffix = self.inner.strip_prefix(prefix_bytes)?;
return Some(Slice::from_u8_slice(suffix));
}

let p = self.to_str_prefix();
let prefix_len = match prefix.into_searcher(p).next() {
SearchStep::Match(0, prefix_len) => prefix_len,
_ => return None,
};

// SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
// and `Searcher` is known to return valid indices.
unsafe {
let suffix = self.inner.get_unchecked(prefix_len..);
Some(Slice::from_u8_slice(suffix))
}
}
}
34 changes: 34 additions & 0 deletions library/std/src/sys/unix/os_str/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,37 @@ fn display() {
Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(),
);
}

#[test]
fn slice_starts_with() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

assert!(slice.starts_with('h'));
assert!(slice.starts_with("héllô"));
assert!(!slice.starts_with("héllô=wørld"));
}

#[test]
fn slice_strip_prefix() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

assert!(slice.strip_prefix("héllô=wørld").is_none());

{
let suffix = slice.strip_prefix('h');
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",);
}

{
let suffix = slice.strip_prefix("héllô");
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld");
}
}
18 changes: 18 additions & 0 deletions library/std/src/sys/windows/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::collections::TryReserveError;
use crate::fmt;
use crate::mem;
use crate::rc::Rc;
use crate::str::pattern::Pattern;
use crate::sync::Arc;
use crate::sys_common::wtf8::{Wtf8, Wtf8Buf};
use crate::sys_common::{AsInner, FromInner, IntoInner};
Expand Down Expand Up @@ -156,6 +157,13 @@ impl Slice {
unsafe { mem::transmute(Wtf8::from_str(s)) }
}

#[inline]
fn from_inner(inner: &Wtf8) -> &Slice {
// SAFETY: Slice is just a wrapper of Wtf8,
// therefore converting &Wtf8 to &Slice is safe.
unsafe { &*(inner as *const Wtf8 as *const Slice) }
}

pub fn to_str(&self) -> Option<&str> {
self.inner.as_str()
}
Expand Down Expand Up @@ -222,4 +230,14 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}

#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
self.inner.starts_with(pattern)
}

#[inline]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
Some(Slice::from_inner(self.inner.strip_prefix(prefix)?))
}
}
47 changes: 47 additions & 0 deletions library/std/src/sys_common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::ops;
use crate::rc::Rc;
use crate::slice;
use crate::str;
use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::AsInner;

Expand Down Expand Up @@ -781,6 +782,52 @@ impl Wtf8 {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.bytes.eq_ignore_ascii_case(&other.bytes)
}

fn to_str_prefix(&self) -> &str {
let utf8_bytes = match self.next_surrogate(0) {
None => &self.bytes,
Some((0, _)) => b"",
Some((surrogate_pos, _)) => {
let (utf8_bytes, _) = self.bytes.split_at(surrogate_pos);
utf8_bytes
}
};

// SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no
// surrogates, and well-formed WTF-8 that contains no surrogates is
// also well-formed UTF-8.
unsafe { str::from_utf8_unchecked(utf8_bytes) }
}

#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
if let Some(pattern_bytes) = pattern.as_bytes() {
return self.bytes.starts_with(pattern_bytes);
}
self.to_str_prefix().starts_with(pattern)
}

pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Wtf8> {
if let Some(prefix_bytes) = prefix.as_bytes() {
let suffix = self.bytes.strip_prefix(prefix_bytes)?;
// SAFETY: WTF-8 is a superset of UTF-8, so stripping off a UTF-8
// prefix will yield a suffix that is valid WTF-8.
return unsafe { Some(Wtf8::from_bytes_unchecked(suffix)) };
}

let p = self.to_str_prefix();
let prefix_len = match prefix.into_searcher(p).next() {
SearchStep::Match(0, prefix_len) => prefix_len,
_ => return None,
};

// SAFETY: `p` is guaranteed to be a prefix of `self.bytes`,
// and `Searcher` is known to return valid indices.
unsafe {
let suffix = self.bytes.get_unchecked(prefix_len..);
Some(Wtf8::from_bytes_unchecked(suffix))
}
}
}

/// Returns a slice of the given string for the byte range \[`begin`..`end`).
Expand Down
34 changes: 34 additions & 0 deletions library/std/src/sys_common/wtf8/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -664,3 +664,37 @@ fn wtf8_to_owned() {
assert_eq!(string.bytes, b"\xED\xA0\x80");
assert!(!string.is_known_utf8);
}

#[test]
fn wtf8_starts_with() {
let mut string = Wtf8Buf::from_str("héllô=");
string.push(CodePoint::from_u32(0xD800).unwrap());
string.push_str("wørld");
let slice = string.as_slice();

assert!(slice.starts_with('h'));
assert!(slice.starts_with("héllô"));
assert!(!slice.starts_with("héllô=wørld"));
}

#[test]
fn wtf8_strip_prefix() {
let mut string = Wtf8Buf::from_str("héllô=");
string.push(CodePoint::from_u32(0xD800).unwrap());
string.push_str("wørld");
let slice = string.as_slice();

assert!(slice.strip_prefix("héllô=wørld").is_none());

{
let suffix = slice.strip_prefix('h');
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().bytes, b"\xC3\xA9ll\xC3\xB4=\xED\xA0\x80w\xC3\xB8rld",);
}

{
let suffix = slice.strip_prefix("héllô");
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld");
}
}