22//! turning escape sequences into the values they represent.
33
44use std:: ffi:: CStr ;
5+ use std:: num:: NonZero ;
56use std:: ops:: Range ;
67use std:: str:: Chars ;
78
@@ -105,7 +106,10 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
105106/// and produces a sequence of characters or errors,
106107/// which are returned by invoking `callback`.
107108/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
108- pub fn check_raw_c_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
109+ pub fn check_raw_c_str (
110+ src : & str ,
111+ callback : impl FnMut ( Range < usize > , Result < NonZero < char > , EscapeError > ) ,
112+ ) {
109113 CStr :: check_raw ( src, callback) ;
110114}
111115
@@ -181,15 +185,11 @@ fn char2byte(c: char) -> Result<u8, EscapeError> {
181185}
182186
183187impl CheckRaw for CStr {
184- type RawUnit = char ;
188+ type RawUnit = NonZero < char > ;
185189
186190 #[ inline]
187191 fn char2raw_unit ( c : char ) -> Result < Self :: RawUnit , EscapeError > {
188- if c == '\0' {
189- Err ( EscapeError :: NulInCStr )
190- } else {
191- Ok ( c)
192- }
192+ NonZero :: new ( c) . ok_or ( EscapeError :: NulInCStr )
193193 }
194194}
195195
@@ -253,42 +253,67 @@ pub enum MixedUnit {
253253 /// For example, if '¥' appears in a string it is represented here as
254254 /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
255255 /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
256- Char ( char ) ,
256+ Char ( NonZero < char > ) ,
257257
258258 /// Used for high bytes (`\x80`..`\xff`).
259259 ///
260260 /// For example, if `\xa5` appears in a string it is represented here as
261261 /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
262262 /// byte string as the single byte `0xa5`.
263- HighByte ( u8 ) ,
263+ HighByte ( NonZero < u8 > ) ,
264264}
265265
266- impl From < char > for MixedUnit {
266+ impl From < NonZero < char > > for MixedUnit {
267267 #[ inline]
268- fn from ( c : char ) -> Self {
268+ fn from ( c : NonZero < char > ) -> Self {
269269 MixedUnit :: Char ( c)
270270 }
271271}
272272
273- impl From < u8 > for MixedUnit {
273+ impl From < NonZero < u8 > > for MixedUnit {
274274 #[ inline]
275- fn from ( n : u8 ) -> Self {
276- if n . is_ascii ( ) {
277- MixedUnit :: Char ( n as char )
275+ fn from ( byte : NonZero < u8 > ) -> Self {
276+ if byte . get ( ) . is_ascii ( ) {
277+ MixedUnit :: Char ( NonZero :: new ( byte . get ( ) as char ) . unwrap ( ) )
278278 } else {
279- MixedUnit :: HighByte ( n )
279+ MixedUnit :: HighByte ( byte )
280280 }
281281 }
282282}
283283
284+ impl TryFrom < char > for MixedUnit {
285+ type Error = EscapeError ;
286+
287+ #[ inline]
288+ fn try_from ( c : char ) -> Result < Self , EscapeError > {
289+ NonZero :: new ( c)
290+ . map ( MixedUnit :: Char )
291+ . ok_or ( EscapeError :: NulInCStr )
292+ }
293+ }
294+
295+ impl TryFrom < u8 > for MixedUnit {
296+ type Error = EscapeError ;
297+
298+ #[ inline]
299+ fn try_from ( byte : u8 ) -> Result < Self , EscapeError > {
300+ NonZero :: new ( byte)
301+ . map ( From :: from)
302+ . ok_or ( EscapeError :: NulInCStr )
303+ }
304+ }
305+
284306/// Trait for unescaping escape sequences in strings
285307trait Unescape {
286308 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
287- type Unit : From < u8 > ;
309+ type Unit ;
288310
289311 /// Result of unescaping the zero char ('\0')
290312 const ZERO_RESULT : Result < Self :: Unit , EscapeError > ;
291313
314+ /// Converts non-zero bytes to the unit type
315+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit ;
316+
292317 /// Converts chars to the unit type
293318 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > ;
294319
@@ -319,18 +344,20 @@ trait Unescape {
319344 if c == '0' {
320345 Self :: ZERO_RESULT
321346 } else {
322- simple_escape ( c) . map ( |b| b. into ( ) ) . or_else ( |c| match c {
323- 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
324- 'u' => Self :: unicode2unit ( {
325- let value = unicode_escape ( chars) ?;
326- if value > char:: MAX as u32 {
327- Err ( EscapeError :: OutOfRangeUnicodeEscape )
328- } else {
329- char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
330- }
331- } ) ,
332- _ => Err ( EscapeError :: InvalidEscape ) ,
333- } )
347+ simple_escape ( c)
348+ . map ( |b| Self :: nonzero_byte2unit ( b) )
349+ . or_else ( |c| match c {
350+ 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
351+ 'u' => Self :: unicode2unit ( {
352+ let value = unicode_escape ( chars) ?;
353+ if value > char:: MAX as u32 {
354+ Err ( EscapeError :: OutOfRangeUnicodeEscape )
355+ } else {
356+ char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
357+ }
358+ } ) ,
359+ _ => Err ( EscapeError :: InvalidEscape ) ,
360+ } )
334361 }
335362 }
336363
@@ -373,9 +400,9 @@ trait Unescape {
373400///
374401/// Parses the character of an ASCII escape (except nul) without the leading backslash.
375402#[ inline] // single use in Unescape::unescape_1
376- fn simple_escape ( c : char ) -> Result < u8 , char > {
403+ fn simple_escape ( c : char ) -> Result < NonZero < u8 > , char > {
377404 // Previous character was '\\', unescape what follows.
378- Ok ( match c {
405+ Ok ( NonZero :: new ( match c {
379406 '"' => b'"' ,
380407 'n' => b'\n' ,
381408 'r' => b'\r' ,
@@ -384,6 +411,7 @@ fn simple_escape(c: char) -> Result<u8, char> {
384411 '\'' => b'\'' ,
385412 _ => Err ( c) ?,
386413 } )
414+ . unwrap ( ) )
387415}
388416
389417/// Interpret a hexadecimal escape
@@ -489,6 +517,11 @@ impl Unescape for str {
489517
490518 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( '\0' ) ;
491519
520+ #[ inline]
521+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
522+ b. get ( ) . into ( )
523+ }
524+
492525 #[ inline]
493526 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
494527 Ok ( c)
@@ -514,6 +547,11 @@ impl Unescape for [u8] {
514547
515548 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( b'\0' ) ;
516549
550+ #[ inline]
551+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
552+ b. get ( )
553+ }
554+
517555 #[ inline]
518556 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
519557 char2byte ( c)
@@ -535,24 +573,19 @@ impl Unescape for CStr {
535573
536574 const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Err ( EscapeError :: NulInCStr ) ;
537575
576+ #[ inline]
577+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
578+ b. into ( )
579+ }
580+
538581 #[ inline]
539582 fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
540- if c == '\0' {
541- Err ( EscapeError :: NulInCStr )
542- } else {
543- Ok ( MixedUnit :: Char ( c) )
544- }
583+ c. try_into ( )
545584 }
546585
547586 #[ inline]
548587 fn hex2unit ( byte : u8 ) -> Result < Self :: Unit , EscapeError > {
549- if byte == b'\0' {
550- Err ( EscapeError :: NulInCStr )
551- } else if byte. is_ascii ( ) {
552- Ok ( MixedUnit :: Char ( byte as char ) )
553- } else {
554- Ok ( MixedUnit :: HighByte ( byte) )
555- }
588+ byte. try_into ( )
556589 }
557590
558591 #[ inline]
0 commit comments