Skip to content

Commit b2f097a

Browse files
committed
Remove most of the unnecessary UTF-8 logic from the tokenizer.
1 parent e02d16a commit b2f097a

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

src/tokenizer.rs

+8-11
Original file line numberDiff line numberDiff line change
@@ -359,17 +359,14 @@ impl<'a> Tokenizer<'a> {
359359
}
360360

361361
#[inline]
362-
fn next_char(&self) -> char { self.char_at(0) }
363-
364-
#[inline]
365-
fn char_at(&self, offset: usize) -> char {
366-
self.input[self.position + offset..].chars().next().unwrap()
362+
fn next_char(&self) -> char {
363+
self.input[self.position..].chars().next().unwrap()
367364
}
368365

369366
#[inline]
370367
fn has_newline_at(&self, offset: usize) -> bool {
371368
self.position + offset < self.input.len() &&
372-
matches!(self.char_at(offset), '\n' | '\r' | '\x0C')
369+
matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C')
373370
}
374371

375372
#[inline]
@@ -759,9 +756,9 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
759756
// Do all the math in f64 so that large numbers overflow to +/-inf
760757
// and i32::{MIN, MAX} are within range.
761758

762-
let (has_sign, sign) = match tokenizer.next_char() {
763-
'-' => (true, -1.),
764-
'+' => (true, 1.),
759+
let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
760+
b'-' => (true, -1.),
761+
b'+' => (true, 1.),
765762
_ => (false, 1.),
766763
};
767764
if has_sign {
@@ -780,8 +777,8 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
780777
let mut is_integer = true;
781778

782779
let mut fractional_part: f64 = 0.;
783-
if tokenizer.has_at_least(1) && tokenizer.next_char() == '.'
784-
&& matches!(tokenizer.char_at(1), '0'...'9') {
780+
if tokenizer.has_at_least(1) && tokenizer.next_byte_unchecked() == b'.'
781+
&& matches!(tokenizer.byte_at(1), b'0'...b'9') {
785782
is_integer = false;
786783
tokenizer.advance(1); // Consume '.'
787784
let mut factor = 0.1;

0 commit comments

Comments
 (0)