Skip to content

Commit 5c98bc0

Browse files
committed
make minor code improvements
1 parent 3808835 commit 5c98bc0

File tree

3 files changed

+46
-16
lines changed

3 files changed

+46
-16
lines changed

crates/prettytty/src/api.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ macro_rules! fuse_sgr {
151151
// ------------------------------------------------------------------------------------------------
152152

153153
/// Control codes that start or end ANSI escape sequences.
154-
#[derive(Clone, Copy, Debug, PartialEq)]
154+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
155155
pub enum Control {
156156
/// Bell (C0)
157157
BEL = 0x07,

crates/prettytty/src/scan.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ mod utf8;
66

77
use self::buffer::Buffer;
88
use self::machine::{transition, Action, State};
9-
use self::utf8::scan_utf8;
9+
use self::utf8::scan_utf8_length;
1010

1111
use super::err::{Error, ErrorKind};
1212
use super::opt::Options;
@@ -73,6 +73,7 @@ impl<R: std::io::Read> Scanner<R> {
7373
// Manage the internal buffer
7474

7575
/// Determine whether the scanner's buffer has readable content available.
76+
#[inline]
7677
pub fn is_readable(&self) -> bool {
7778
self.buffer.is_readable()
7879
}
@@ -110,6 +111,7 @@ impl<R: std::io::Read> Scanner<R> {
110111
// Support for reading bytes
111112

112113
/// Determine whether this scanner's state machine is in-flight.
114+
#[inline]
113115
pub fn in_flight(&self) -> bool {
114116
!matches!(self.state, State::Ground)
115117
}
@@ -150,7 +152,7 @@ impl<R: std::io::Read> Scanner<R> {
150152
///
151153
/// This method returns a wrapped boolean indicating whether to return a
152154
/// text token. It also handles malformed UTF-8 errors.
153-
fn scan_text(&mut self, batch: bool) -> Result<bool, Error> {
155+
fn scan_text(&mut self) -> Result<bool, Error> {
154156
let mut bytes = self.buffer.peek_many();
155157
let mut index = 0;
156158

@@ -159,12 +161,18 @@ impl<R: std::io::Read> Scanner<R> {
159161
break;
160162
}
161163

162-
// Oops: So, aggressive linting with Clippy suggest to use an
163-
// assertion that preempts repeated bounds checking. But "0 <
164-
// bytes.len()" triggers Clippy because it's not idiomatic and
165-
// "!bytes.is_empty()" is not recognized by the assertion lint. Oh
166-
// and for good measure, we can only add attributes to items, not
167-
// macro invocations. Hence, let's create a nested scope.
164+
// The first byte of an UTF-8 character is either ASCII or
165+
// 0xC2..=0xF4. That means that treating 0x80..0xA0 as C1 does not
166+
// interfere with UTF-8 start bytes. That, however, is not possible
167+
// for continuation bytes.
168+
169+
// Oops: Aggressive linting with Clippy suggests to use an assertion
170+
// that preempts repeated bounds checking. But "0 < bytes.len()"
171+
// triggers Clippy because it's not idiomatic and
172+
// "!bytes.is_empty()" is not recognized by the assertion lint. On
173+
// top of that, we can only add attributes to items, not macro
174+
// invocations. Hence, we create an annotated, nested scope and use
175+
// the non-idiomatic test in the assertion.
168176
#[allow(clippy::len_zero)]
169177
{
170178
assert!(0 < bytes.len(), "a nonempty slice must contain 1 byte");
@@ -173,7 +181,7 @@ impl<R: std::io::Read> Scanner<R> {
173181
}
174182
}
175183

176-
match scan_utf8(bytes) {
184+
match scan_utf8_length(bytes) {
177185
Ok(size) => {
178186
index += size;
179187
bytes = &bytes[size..];
@@ -187,10 +195,6 @@ impl<R: std::io::Read> Scanner<R> {
187195
}
188196
}
189197
}
190-
191-
if !batch {
192-
break;
193-
}
194198
}
195199

196200
if 0 < index {
@@ -263,12 +267,14 @@ impl<R: std::io::Read> Scanner<R> {
263267
}
264268

265269
/// Create a control token for the byte.
270+
#[inline]
266271
fn new_control_token(&mut self, byte: u8) -> Result<Token, Error> {
267272
self.extra[0] = byte;
268273
Ok(Token::Control(&self.extra))
269274
}
270275

271276
/// Create a new sequence token.
277+
#[inline]
272278
fn new_sequence_token(&self) -> Result<Token, Error> {
273279
if self.did_overflow {
274280
Err(ErrorKind::OutOfMemory.into())
@@ -291,7 +297,7 @@ impl<R: std::io::Read> Scanner<R> {
291297
}
292298

293299
// Try fast path for text
294-
if matches!(self.state, State::Ground) && self.scan_text(true)? {
300+
if matches!(self.state, State::Ground) && self.scan_text()? {
295301
return Ok(Token::Text(self.buffer.token()));
296302
}
297303

crates/prettytty/src/scan/utf8.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,28 @@
1-
pub(super) fn scan_utf8(bytes: &[u8]) -> core::result::Result<usize, usize> {
1+
#[allow(dead_code)]
2+
pub(super) fn is_utf8_start(byte: u8) -> bool {
3+
byte < 0x80 || (0xc2..=0xf4).contains(&byte)
4+
}
5+
6+
#[allow(dead_code)]
7+
pub(super) fn read_utf8(bytes: &[u8]) -> core::result::Result<(char, usize), usize> {
8+
// See https://github.com/rust-lang/rust/blob/master/library/core/src/str/validations.rs
9+
const CONTINUATION_MASK: u8 = 0b0011_1111;
10+
11+
// scan_utf8_length() inspects all bytes of a valid UTF-8 character.
12+
let length = scan_utf8_length(bytes)?;
13+
assert!(length <= bytes.len());
14+
15+
let mut codepoint = (bytes[0] & (0x7f >> length)) as u32;
16+
for index in 1..length {
17+
codepoint = (codepoint << 6) | (bytes[index] & CONTINUATION_MASK) as u32;
18+
}
19+
20+
// SAFETY: scan_utf8_length() validated length bytes as UTF-8, above loop
21+
// converted to u32, using same logic as Rust standard library.
22+
Ok((unsafe { char::from_u32_unchecked(codepoint) }, length))
23+
}
24+
25+
pub(super) fn scan_utf8_length(bytes: &[u8]) -> core::result::Result<usize, usize> {
226
// See https://github.com/rust-lang/rust/blob/master/library/core/src/str/validations.rs
327
let mut index = 0;
428
let len = bytes.len();

0 commit comments

Comments
 (0)