@@ -6,7 +6,7 @@ mod utf8;
6
6
7
7
use self :: buffer:: Buffer ;
8
8
use self :: machine:: { transition, Action , State } ;
9
- use self :: utf8:: scan_utf8 ;
9
+ use self :: utf8:: scan_utf8_length ;
10
10
11
11
use super :: err:: { Error , ErrorKind } ;
12
12
use super :: opt:: Options ;
@@ -73,6 +73,7 @@ impl<R: std::io::Read> Scanner<R> {
73
73
// Manage the internal buffer
74
74
75
75
/// Determine whether the scanner's buffer has readable content available.
76
+ #[ inline]
76
77
pub fn is_readable ( & self ) -> bool {
77
78
self . buffer . is_readable ( )
78
79
}
@@ -110,6 +111,7 @@ impl<R: std::io::Read> Scanner<R> {
110
111
// Support for reading bytes
111
112
112
113
/// Determine whether this scanner's state machine is in-flight.
114
+ #[ inline]
113
115
pub fn in_flight ( & self ) -> bool {
114
116
!matches ! ( self . state, State :: Ground )
115
117
}
@@ -150,7 +152,7 @@ impl<R: std::io::Read> Scanner<R> {
150
152
///
151
153
/// This method returns a wrapped boolean indicating whether to return a
152
154
/// text token. It also handles malformed UTF-8 errors.
153
- fn scan_text ( & mut self , batch : bool ) -> Result < bool , Error > {
155
+ fn scan_text ( & mut self ) -> Result < bool , Error > {
154
156
let mut bytes = self . buffer . peek_many ( ) ;
155
157
let mut index = 0 ;
156
158
@@ -159,12 +161,18 @@ impl<R: std::io::Read> Scanner<R> {
159
161
break ;
160
162
}
161
163
162
- // Oops: So, aggressive linting with Clippy suggest to use an
163
- // assertion that preempts repeated bounds checking. But "0 <
164
- // bytes.len()" triggers Clippy because it's not idiomatic and
165
- // "!bytes.is_empty()" is not recognized by the assertion lint. Oh
166
- // and for good measure, we can only add attributes to items, not
167
- // macro invocations. Hence, let's create a nested scope.
164
+ // The first byte of an UTF-8 character is either ASCII or
165
+ // 0xC2..=0xF4. That means that treating 0x80..0xA0 as C1 does not
166
+ // interfere with UTF-8 start bytes. That, however, is not possible
167
+ // for continuation bytes.
168
+
169
+ // Oops: Aggressive linting with Clippy suggests to use an assertion
170
+ // that preempts repeated bounds checking. But "0 < bytes.len()"
171
+ // triggers Clippy because it's not idiomatic and
172
+ // "!bytes.is_empty()" is not recognized by the assertion lint. On
173
+ // top of that, we can only add attributes to items, not macro
174
+ // invocations. Hence, we create an annotated, nested scope and use
175
+ // the non-idiomatic test in the assertion.
168
176
#[ allow( clippy:: len_zero) ]
169
177
{
170
178
assert ! ( 0 < bytes. len( ) , "a nonempty slice must contain 1 byte" ) ;
@@ -173,7 +181,7 @@ impl<R: std::io::Read> Scanner<R> {
173
181
}
174
182
}
175
183
176
- match scan_utf8 ( bytes) {
184
+ match scan_utf8_length ( bytes) {
177
185
Ok ( size) => {
178
186
index += size;
179
187
bytes = & bytes[ size..] ;
@@ -187,10 +195,6 @@ impl<R: std::io::Read> Scanner<R> {
187
195
}
188
196
}
189
197
}
190
-
191
- if !batch {
192
- break ;
193
- }
194
198
}
195
199
196
200
if 0 < index {
@@ -263,12 +267,14 @@ impl<R: std::io::Read> Scanner<R> {
263
267
}
264
268
265
269
/// Create a control token for the byte.
270
+ #[ inline]
266
271
fn new_control_token ( & mut self , byte : u8 ) -> Result < Token , Error > {
267
272
self . extra [ 0 ] = byte;
268
273
Ok ( Token :: Control ( & self . extra ) )
269
274
}
270
275
271
276
/// Create a new sequence token.
277
+ #[ inline]
272
278
fn new_sequence_token ( & self ) -> Result < Token , Error > {
273
279
if self . did_overflow {
274
280
Err ( ErrorKind :: OutOfMemory . into ( ) )
@@ -291,7 +297,7 @@ impl<R: std::io::Read> Scanner<R> {
291
297
}
292
298
293
299
// Try fast path for text
294
- if matches ! ( self . state, State :: Ground ) && self . scan_text ( true ) ? {
300
+ if matches ! ( self . state, State :: Ground ) && self . scan_text ( ) ? {
295
301
return Ok ( Token :: Text ( self . buffer . token ( ) ) ) ;
296
302
}
297
303
0 commit comments