@@ -214,7 +214,8 @@ pub struct Tokenizer<'a> {
214214 /// ensure that computing the column will give the result in units
215215 /// of UTF-16 characters.
216216 current_line_start_position : usize ,
217- current_position : usize ,
217+ position_difference : u16 ,
218+ current_line_start_difference : u16 ,
218219 current_line_number : u32 ,
219220 var_or_env_functions : SeenStatus ,
220221 source_map_url : Option < & ' a str > ,
@@ -235,8 +236,9 @@ impl<'a> Tokenizer<'a> {
235236 input,
236237 position : 0 ,
237238 current_line_start_position : 0 ,
238- current_position : 0 ,
239+ current_line_start_difference : 0 ,
239240 current_line_number : 0 ,
241+ position_difference : 0 ,
240242 var_or_env_functions : SeenStatus :: DontCare ,
241243 source_map_url : None ,
242244 source_url : None ,
@@ -279,7 +281,12 @@ impl<'a> Tokenizer<'a> {
279281 pub fn current_source_location ( & self ) -> SourceLocation {
280282 SourceLocation {
281283 line : self . current_line_number ,
282- column : ( self . position - self . current_line_start_position + 1 ) as u32 ,
284+ column : (
285+ self . position -
286+ self . current_line_start_position -
287+ ( self . position_difference - self . current_line_start_difference ) as usize
288+ + 1
289+ ) as u32 ,
283290 }
284291 }
285292
@@ -298,7 +305,8 @@ impl<'a> Tokenizer<'a> {
298305 ParserState {
299306 position : self . position ,
300307 current_line_start_position : self . current_line_start_position ,
301- current_position : self . current_position ,
308+ current_line_start_difference : self . current_line_start_difference ,
309+ position_difference : self . position_difference ,
302310 current_line_number : self . current_line_number ,
303311 at_start_of : None ,
304312 }
@@ -308,7 +316,8 @@ impl<'a> Tokenizer<'a> {
308316 pub fn reset ( & mut self , state : & ParserState ) {
309317 self . position = state. position ;
310318 self . current_line_start_position = state. current_line_start_position ;
311- self . current_position = state. current_position ;
319+ self . current_line_start_difference = state. current_line_start_difference ;
320+ self . position_difference = state. position_difference ;
312321 self . current_line_number = state. current_line_number ;
313322 }
314323
@@ -372,7 +381,6 @@ impl<'a> Tokenizer<'a> {
372381 debug_assert ! ( b != b'\r' && b != b'\n' && b != b'\x0C' ) ;
373382 }
374383 }
375- self . current_position = self . current_position . wrapping_add ( n) ;
376384 self . position += n
377385 }
378386
@@ -394,8 +402,7 @@ impl<'a> Tokenizer<'a> {
394402 debug_assert ! ( self . next_byte_unchecked( ) & 0xF0 == 0xF0 ) ;
395403 // This takes two UTF-16 characters to represent, so we
396404 // actually have an undercount.
397- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
398- self . current_position = self . current_position . wrapping_add ( 2 ) ;
405+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
399406 self . position += 1 ;
400407 }
401408
@@ -407,7 +414,7 @@ impl<'a> Tokenizer<'a> {
407414 // Continuation bytes contribute to column overcount. Note
408415 // that due to the special case for the 4-byte sequence intro,
409416 // we must use wrapping add here.
410- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
417+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
411418 self . position += 1 ;
412419 }
413420
@@ -420,14 +427,11 @@ impl<'a> Tokenizer<'a> {
420427 if byte & 0xF0 == 0xF0 {
421428 // This takes two UTF-16 characters to represent, so we
422429 // actually have an undercount.
423- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
424- self . current_position = self . current_position . wrapping_add ( 2 ) ;
430+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
425431 } else if byte & 0xC0 == 0x80 {
426432 // Note that due to the special case for the 4-byte
427433 // sequence intro, we must use wrapping add here.
428- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
429- } else {
430- self . current_position = self . current_position . wrapping_add ( 1 ) ;
434+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
431435 }
432436 }
433437
@@ -443,12 +447,11 @@ impl<'a> Tokenizer<'a> {
443447 let byte = self . next_byte_unchecked ( ) ;
444448 debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
445449 self . position += 1 ;
446- self . current_position = self . current_position . wrapping_add ( 1 ) ;
447450 if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
448451 self . position += 1 ;
449- self . current_position = self . current_position . wrapping_add ( 1 ) ;
450452 }
451453 self . current_line_start_position = self . position ;
454+ self . current_line_start_difference = self . position_difference ;
452455 self . current_line_number += 1 ;
453456 }
454457
@@ -462,14 +465,13 @@ impl<'a> Tokenizer<'a> {
462465 fn consume_char ( & mut self ) -> char {
463466 let c = self . next_char ( ) ;
464467 let len_utf8 = c. len_utf8 ( ) ;
468+ let len_utf16 = c. len_utf16 ( ) ;
465469 self . position += len_utf8;
466470 // Note that due to the special case for the 4-byte sequence
467471 // intro, we must use wrapping add here.
468- let len_utf16 = c. len_utf16 ( ) ;
469- self . current_line_start_position = self
470- . current_line_start_position
471- . wrapping_add ( len_utf8 - len_utf16) ;
472- self . current_position = self . current_position . wrapping_add ( len_utf16) ;
472+ self . position_difference = self
473+ . position_difference
474+ . wrapping_add ( ( len_utf8 - len_utf16) as u16 ) ;
473475 c
474476 }
475477
@@ -1160,16 +1162,12 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
11601162 }
11611163 } ;
11621164 match_byte ! { b,
1163- b' ' | b'\t' => {
1164- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1165- } ,
1165+ b' ' | b'\t' => { } ,
11661166 b'\n' | b'\x0C' => {
11671167 newlines += 1 ;
11681168 last_newline = offset;
1169- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
11701169 }
11711170 b'\r' => {
1172- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
11731171 if from_start. as_bytes( ) . get( offset + 1 ) != Some ( & b'\n' ) {
11741172 newlines += 1 ;
11751173 last_newline = offset;
0 commit comments