@@ -214,7 +214,8 @@ pub struct Tokenizer<'a> {
214
214
/// ensure that computing the column will give the result in units
215
215
/// of UTF-16 characters.
216
216
current_line_start_position : usize ,
217
- current_position : usize ,
217
+ position_difference : u16 ,
218
+ current_line_start_difference : u16 ,
218
219
current_line_number : u32 ,
219
220
var_or_env_functions : SeenStatus ,
220
221
source_map_url : Option < & ' a str > ,
@@ -235,8 +236,9 @@ impl<'a> Tokenizer<'a> {
235
236
input,
236
237
position : 0 ,
237
238
current_line_start_position : 0 ,
238
- current_position : 0 ,
239
+ current_line_start_difference : 0 ,
239
240
current_line_number : 0 ,
241
+ position_difference : 0 ,
240
242
var_or_env_functions : SeenStatus :: DontCare ,
241
243
source_map_url : None ,
242
244
source_url : None ,
@@ -279,7 +281,12 @@ impl<'a> Tokenizer<'a> {
279
281
pub fn current_source_location ( & self ) -> SourceLocation {
280
282
SourceLocation {
281
283
line : self . current_line_number ,
282
- column : ( self . position - self . current_line_start_position + 1 ) as u32 ,
284
+ column : (
285
+ self . position -
286
+ self . current_line_start_position -
287
+ ( self . position_difference - self . current_line_start_difference ) as usize
288
+ + 1
289
+ ) as u32 ,
283
290
}
284
291
}
285
292
@@ -298,7 +305,8 @@ impl<'a> Tokenizer<'a> {
298
305
ParserState {
299
306
position : self . position ,
300
307
current_line_start_position : self . current_line_start_position ,
301
- current_position : self . current_position ,
308
+ current_line_start_difference : self . current_line_start_difference ,
309
+ position_difference : self . position_difference ,
302
310
current_line_number : self . current_line_number ,
303
311
at_start_of : None ,
304
312
}
@@ -308,7 +316,8 @@ impl<'a> Tokenizer<'a> {
308
316
pub fn reset ( & mut self , state : & ParserState ) {
309
317
self . position = state. position ;
310
318
self . current_line_start_position = state. current_line_start_position ;
311
- self . current_position = state. current_position ;
319
+ self . current_line_start_difference = state. current_line_start_difference ;
320
+ self . position_difference = state. position_difference ;
312
321
self . current_line_number = state. current_line_number ;
313
322
}
314
323
@@ -372,7 +381,6 @@ impl<'a> Tokenizer<'a> {
372
381
debug_assert ! ( b != b'\r' && b != b'\n' && b != b'\x0C' ) ;
373
382
}
374
383
}
375
- self . current_position = self . current_position . wrapping_add ( n) ;
376
384
self . position += n
377
385
}
378
386
@@ -394,8 +402,7 @@ impl<'a> Tokenizer<'a> {
394
402
debug_assert ! ( self . next_byte_unchecked( ) & 0xF0 == 0xF0 ) ;
395
403
// This takes two UTF-16 characters to represent, so we
396
404
// actually have an undercount.
397
- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
398
- self . current_position = self . current_position . wrapping_add ( 2 ) ;
405
+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
399
406
self . position += 1 ;
400
407
}
401
408
@@ -407,7 +414,7 @@ impl<'a> Tokenizer<'a> {
407
414
// Continuation bytes contribute to column overcount. Note
408
415
// that due to the special case for the 4-byte sequence intro,
409
416
// we must use wrapping add here.
410
- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
417
+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
411
418
self . position += 1 ;
412
419
}
413
420
@@ -420,14 +427,11 @@ impl<'a> Tokenizer<'a> {
420
427
if byte & 0xF0 == 0xF0 {
421
428
// This takes two UTF-16 characters to represent, so we
422
429
// actually have an undercount.
423
- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
424
- self . current_position = self . current_position . wrapping_add ( 2 ) ;
430
+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
425
431
} else if byte & 0xC0 == 0x80 {
426
432
// Note that due to the special case for the 4-byte
427
433
// sequence intro, we must use wrapping add here.
428
- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
429
- } else {
430
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
434
+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
431
435
}
432
436
}
433
437
@@ -443,12 +447,11 @@ impl<'a> Tokenizer<'a> {
443
447
let byte = self . next_byte_unchecked ( ) ;
444
448
debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
445
449
self . position += 1 ;
446
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
447
450
if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
448
451
self . position += 1 ;
449
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
450
452
}
451
453
self . current_line_start_position = self . position ;
454
+ self . current_line_start_difference = self . position_difference ;
452
455
self . current_line_number += 1 ;
453
456
}
454
457
@@ -462,14 +465,13 @@ impl<'a> Tokenizer<'a> {
462
465
fn consume_char ( & mut self ) -> char {
463
466
let c = self . next_char ( ) ;
464
467
let len_utf8 = c. len_utf8 ( ) ;
468
+ let len_utf16 = c. len_utf16 ( ) ;
465
469
self . position += len_utf8;
466
470
// Note that due to the special case for the 4-byte sequence
467
471
// intro, we must use wrapping add here.
468
- let len_utf16 = c. len_utf16 ( ) ;
469
- self . current_line_start_position = self
470
- . current_line_start_position
471
- . wrapping_add ( len_utf8 - len_utf16) ;
472
- self . current_position = self . current_position . wrapping_add ( len_utf16) ;
472
+ self . position_difference = self
473
+ . position_difference
474
+ . wrapping_add ( ( len_utf8 - len_utf16) as u16 ) ;
473
475
c
474
476
}
475
477
@@ -1160,16 +1162,12 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
1160
1162
}
1161
1163
} ;
1162
1164
match_byte ! { b,
1163
- b' ' | b'\t' => {
1164
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1165
- } ,
1165
+ b' ' | b'\t' => { } ,
1166
1166
b'\n' | b'\x0C' => {
1167
1167
newlines += 1 ;
1168
1168
last_newline = offset;
1169
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1170
1169
}
1171
1170
b'\r' => {
1172
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1173
1171
if from_start. as_bytes( ) . get( offset + 1 ) != Some ( & b'\n' ) {
1174
1172
newlines += 1 ;
1175
1173
last_newline = offset;
0 commit comments