@@ -291,7 +291,7 @@ extension Processor {
291
291
_ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
292
292
isScalarSemantics: Bool
293
293
) -> Bool {
294
- guard let next = input. matchBitset (
294
+ guard let next = input. matchASCIIBitset (
295
295
bitset,
296
296
at: currentPosition,
297
297
limitedBy: end,
@@ -723,22 +723,53 @@ extension String {
723
723
return idx
724
724
}
725
725
726
- func matchBitset (
726
+ func matchASCIIBitset (
727
727
_ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
728
728
at pos: Index ,
729
729
limitedBy end: Index ,
730
730
isScalarSemantics: Bool
731
731
) -> Index ? {
732
- // TODO: extremely quick-check-able
733
- // TODO: can be sped up with string internals
734
- if isScalarSemantics {
735
- guard pos < end else { return nil }
736
- guard bitset. matches ( unicodeScalars [ pos] ) else { return nil }
737
- return unicodeScalars. index ( after: pos)
738
- } else {
739
- guard let ( char, next) = characterAndEnd ( at: pos, limitedBy: end) ,
740
- bitset. matches ( char) else { return nil }
741
- return next
732
+
733
+ // FIXME: Inversion should be tracked and handled in only one place.
734
+ // That is, we should probably store it as a bit in the instruction, so that
735
+ // bitset matching and bitset inversion is bit-based rather that semantically
736
+ // inverting the notion of a match or not. As-is, we need to track both
737
+ // meanings in some code paths.
738
+ let isInverted = bitset. isInverted
739
+
740
+ // TODO: More fodder for refactoring `_quickASCIICharacter`, see the comment
741
+ // there
742
+ guard let ( asciiByte, next, isCRLF) = _quickASCIICharacter (
743
+ at: pos,
744
+ limitedBy: end
745
+ ) else {
746
+ if isScalarSemantics {
747
+ guard pos < end else { return nil }
748
+ guard bitset. matches ( unicodeScalars [ pos] ) else { return nil }
749
+ return unicodeScalars. index ( after: pos)
750
+ } else {
751
+ guard let ( char, next) = characterAndEnd ( at: pos, limitedBy: end) ,
752
+ bitset. matches ( char) else { return nil }
753
+ return next
754
+ }
755
+ }
756
+
757
+ guard bitset. matches ( asciiByte) else {
758
+ // FIXME: check inversion here after refactored out of bitset
759
+ return nil
742
760
}
761
+
762
+ // CR-LF should only match `[\r]` in scalar semantic mode or if inverted
763
+ if isCRLF {
764
+ if isScalarSemantics {
765
+ return self . unicodeScalars. index ( before: next)
766
+ }
767
+ if isInverted {
768
+ return next
769
+ }
770
+ return nil
771
+ }
772
+
773
+ return next
743
774
}
744
775
}
0 commit comments