@@ -312,13 +312,14 @@ public class WhitespaceLinter {
312
312
formattedRun: ArraySlice < UTF8 . CodeUnit >
313
313
) {
314
314
guard userRun != formattedRun else { return }
315
-
315
+ let userString = String ( decoding: userRun, as: UTF8 . self)
316
+ let formattedString = String ( decoding: formattedRun, as: UTF8 . self)
316
317
// This assumes tabs will always be forbidden for inter-token spacing (but not for leading
317
318
// indentation).
318
319
if userRun. contains ( utf8Tab) {
319
320
diagnose ( . spacingCharError, category: . spacingCharacter, utf8Offset: userIndex)
320
- } else if formattedRun . count != userRun . count {
321
- let delta = formattedRun . count - userRun . count
321
+ } else if formattedString . count != userString . count {
322
+ let delta = formattedString . count - userString . count
322
323
diagnose ( . spacingError( delta) , category: . spacing, utf8Offset: userIndex)
323
324
}
324
325
}
@@ -339,20 +340,26 @@ public class WhitespaceLinter {
339
340
startingAt offset: Int ,
340
341
in data: [ UTF8 . CodeUnit ]
341
342
) -> ArraySlice < UTF8 . CodeUnit > {
342
- func isWhitespace( _ char: UTF8 . CodeUnit ) -> Bool {
343
- switch char {
344
- case UInt8 ( ascii: " " ) , UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \t " ) , UInt8 ( ascii: " \r " ) , /*VT*/ 0x0B , /*FF*/ 0x0C :
345
- return true
343
+ var currentIndex = offset
344
+ while currentIndex < data. count {
345
+ if let unicodeException = UnicodeWhitespace . allCases. first ( where: { exception in
346
+ let bytes = exception. utf8Bytes
347
+ return currentIndex + bytes. count <= data. count
348
+ && data [ currentIndex..< currentIndex + bytes. count] . elementsEqual ( bytes)
349
+ } ) {
350
+ currentIndex += unicodeException. utf8Bytes. count
351
+ continue
352
+ }
353
+
354
+ switch data [ currentIndex] {
355
+ case UInt8 ( ascii: " " ) , UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \t " ) , UInt8 ( ascii: " \r " ) ,
356
+ /*VT*/ 0x0B , /*FF*/ 0x0C :
357
+ currentIndex += 1
346
358
default :
347
- return false
359
+ return data [ offset ..< currentIndex ]
348
360
}
349
361
}
350
- guard
351
- let whitespaceEnd = data [ offset... ] . firstIndex ( where: { !isWhitespace( $0) } )
352
- else {
353
- return data [ offset..< data. endIndex]
354
- }
355
- return data [ offset..< whitespaceEnd]
362
+ return data [ offset..< currentIndex]
356
363
}
357
364
358
365
/// Returns the code unit at the given index, or nil if the index is the end of the data.
@@ -412,6 +419,22 @@ public class WhitespaceLinter {
412
419
}
413
420
}
414
421
422
+ /// A collection of Unicode code points that represent non-standard whitespace.
423
+ private enum UnicodeWhitespace : CaseIterable {
424
+ case u2028 // U+2028 LINE SEPARATOR
425
+ case u2029 // U+2029 PARAGRAPH SEPARATOR
426
+
427
+ /// Returns the UTF-8 byte sequence corresponding to the Unicode exception.
428
+ var utf8Bytes : [ UTF8 . CodeUnit ] {
429
+ switch self {
430
+ case . u2028:
431
+ return [ 0xE2 , 0x80 , 0xA8 ]
432
+ case . u2029:
433
+ return [ 0xE2 , 0x80 , 0xA9 ]
434
+ }
435
+ }
436
+ }
437
+
415
438
/// Describes the composition of the whitespace that creates an indentation for a line of code.
416
439
public enum WhitespaceIndentation : Equatable {
417
440
/// The line has no preceding whitespace, meaning there's no indentation.
0 commit comments