@@ -125,9 +125,9 @@ fileprivate extension Compiler.ByteCodeGen {
125
125
let boundaryCheck = idx == lastIdx
126
126
let scalar = s. unicodeScalars [ idx]
127
127
if options. isCaseInsensitive && scalar. properties. isCased {
128
- builder. buildMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck)
128
+ builder. buildMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck, reverse : false )
129
129
} else {
130
- builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
130
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck, reverse : false )
131
131
}
132
132
}
133
133
return
@@ -153,9 +153,9 @@ fileprivate extension Compiler.ByteCodeGen {
153
153
let boundaryCheck = idx == lastIdx
154
154
let scalar = s. unicodeScalars [ idx]
155
155
if options. isCaseInsensitive && scalar. properties. isCased {
156
- builder. buildReverseMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck)
156
+ builder. buildMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck, reverse : true )
157
157
} else {
158
- builder. buildReverseMatchScalar ( scalar, boundaryCheck: boundaryCheck)
158
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck, reverse : true )
159
159
}
160
160
}
161
161
return
@@ -207,36 +207,23 @@ fileprivate extension Compiler.ByteCodeGen {
207
207
}
208
208
209
209
mutating func emitCharacterClass( _ cc: DSLTree . Atom . CharacterClass ) {
210
- builder. buildMatchBuiltin ( model: cc. asRuntimeModel ( options) )
210
+ builder. buildMatchBuiltin ( model: cc. asRuntimeModel ( options) , reverse : reverse )
211
211
}
212
212
213
213
mutating func emitMatchScalar( _ s: UnicodeScalar ) {
214
214
assert ( options. semanticLevel == . unicodeScalar)
215
215
if options. isCaseInsensitive && s. properties. isCased {
216
- builder. buildMatchScalarCaseInsensitive ( s, boundaryCheck: false )
216
+ builder. buildMatchScalarCaseInsensitive ( s, boundaryCheck: false , reverse : reverse )
217
217
} else {
218
- builder. buildMatchScalar ( s, boundaryCheck: false )
219
- }
220
- }
221
-
222
- mutating func emitReverseMatchScalar( _ s: UnicodeScalar ) {
223
- assert ( options. semanticLevel == . unicodeScalar)
224
- if options. isCaseInsensitive && s. properties. isCased {
225
- builder. buildReverseMatchScalarCaseInsensitive ( s, boundaryCheck: false )
226
- } else {
227
- builder. buildReverseMatchScalar ( s, boundaryCheck: false )
218
+ builder. buildMatchScalar ( s, boundaryCheck: false , reverse: reverse)
228
219
}
229
220
}
230
221
231
222
mutating func emitCharacter( _ c: Character ) {
232
223
// Unicode scalar mode matches the specific scalars that comprise a character
233
224
if options. semanticLevel == . unicodeScalar {
234
225
for scalar in c. unicodeScalars {
235
- if reverse {
236
- emitReverseMatchScalar ( scalar)
237
- } else {
238
- emitMatchScalar ( scalar)
239
- }
226
+ emitMatchScalar ( scalar)
240
227
}
241
228
return
242
229
}
@@ -248,13 +235,10 @@ fileprivate extension Compiler.ByteCodeGen {
248
235
assert ( c. unicodeScalars. count == 1 )
249
236
builder. buildMatchScalarCaseInsensitive (
250
237
c. unicodeScalars. last!,
251
- boundaryCheck: true )
238
+ boundaryCheck: true ,
239
+ reverse: reverse)
252
240
} else {
253
- if reverse {
254
- builder. buildReverseMatch ( c, isCaseInsensitive: true )
255
- } else {
256
- builder. buildMatch ( c, isCaseInsensitive: true )
257
- }
241
+ builder. buildMatch ( c, isCaseInsensitive: true , reverse: reverse)
258
242
}
259
243
return
260
244
}
@@ -264,20 +248,12 @@ fileprivate extension Compiler.ByteCodeGen {
264
248
for idx in c. unicodeScalars. indices {
265
249
let scalar = c. unicodeScalars [ idx]
266
250
let boundaryCheck = idx == lastIdx
267
- if reverse {
268
- builder. buildReverseMatchScalar ( scalar, boundaryCheck: boundaryCheck)
269
- } else {
270
- builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
271
- }
251
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck, reverse: reverse)
272
252
}
273
253
return
274
254
}
275
255
276
- if reverse {
277
- builder. buildReverseMatch ( c, isCaseInsensitive: false )
278
- } else {
279
- builder. buildMatch ( c, isCaseInsensitive: false )
280
- }
256
+ builder. buildMatch ( c, isCaseInsensitive: false , reverse: reverse)
281
257
}
282
258
283
259
mutating func emitAny( ) {
@@ -292,9 +268,9 @@ fileprivate extension Compiler.ByteCodeGen {
292
268
mutating func emitAnyNonNewline( ) {
293
269
switch options. semanticLevel {
294
270
case . graphemeCluster:
295
- builder. buildConsumeNonNewline ( )
271
+ builder. buildConsumeNonNewline ( reverse : reverse )
296
272
case . unicodeScalar:
297
- builder. buildConsumeScalarNonNewline ( )
273
+ builder. buildConsumeScalarNonNewline ( reverse : reverse )
298
274
}
299
275
}
300
276
@@ -794,121 +770,8 @@ fileprivate extension Compiler.ByteCodeGen {
794
770
guard let bitset = ccc. asAsciiBitset ( options) else {
795
771
return false
796
772
}
797
- if reverse {
798
- builder. buildReverseQuantify ( bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
799
- } else {
800
- builder. buildQuantify ( bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
801
- }
802
773
803
- case . atom( let atom) :
804
- switch atom {
805
- case . char( let c) :
806
- // Single scalar ascii value character
807
- guard let val = c. _singleScalarAsciiValue else {
808
- return false
809
- }
810
- if reverse {
811
- builder. buildReverseQuantify ( asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
812
- } else {
813
- builder. buildQuantify ( asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
814
- }
815
-
816
- case . any:
817
- if reverse {
818
- builder. buildReverseQuantifyAny (
819
- matchesNewlines: true , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
820
- } else {
821
- builder. buildQuantifyAny (
822
- matchesNewlines: true , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
823
- }
824
- case . anyNonNewline:
825
- if reverse {
826
- builder. buildReverseQuantifyAny (
827
- matchesNewlines: false , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
828
- } else {
829
- builder. buildQuantifyAny (
830
- matchesNewlines: false , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
831
- }
832
- case . dot:
833
- if reverse {
834
- builder. buildReverseQuantifyAny (
835
- matchesNewlines: options. dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
836
- } else {
837
- builder. buildQuantifyAny (
838
- matchesNewlines: options. dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
839
- }
840
-
841
- case . characterClass( let cc) :
842
- // Custom character class that consumes a single grapheme
843
- let model = cc. asRuntimeModel ( options)
844
- if reverse {
845
- builder. buildReverseQuantify (
846
- model: model,
847
- kind,
848
- minTrips,
849
- maxExtraTrips,
850
- isScalarSemantics: isScalarSemantics
851
- )
852
- } else {
853
- builder. buildQuantify (
854
- model: model,
855
- kind,
856
- minTrips,
857
- maxExtraTrips,
858
- isScalarSemantics: isScalarSemantics
859
- )
860
- }
861
- default :
862
- return false
863
- }
864
- case . convertedRegexLiteral( let node, _) :
865
- if reverse {
866
- return tryEmitFastReverseQuant ( node, kind, minTrips, maxExtraTrips)
867
- } else {
868
- return tryEmitFastQuant ( node, kind, minTrips, maxExtraTrips)
869
- }
870
- case . nonCapturingGroup( let groupKind, let node) :
871
- // .nonCapture nonCapturingGroups are ignored during compilation
872
- guard groupKind. ast == . nonCapture else {
873
- return false
874
- }
875
- if reverse {
876
- return tryEmitFastReverseQuant ( node, kind, minTrips, maxExtraTrips)
877
- } else {
878
- return tryEmitFastQuant ( node, kind, minTrips, maxExtraTrips)
879
- }
880
- default :
881
- return false
882
- }
883
- return true
884
- }
885
-
886
- /// Specialized quantification instruction for repetition of certain nodes in grapheme semantic mode
887
- /// Allowed nodes are:
888
- /// - single ascii scalar .char
889
- /// - ascii .customCharacterClass
890
- /// - single grapheme consumgin built in character classes
891
- /// - .any, .anyNonNewline, .dot
892
- mutating func tryEmitFastReverseQuant(
893
- _ child: DSLTree . Node ,
894
- _ kind: AST . Quantification . Kind ,
895
- _ minTrips: Int ,
896
- _ maxExtraTrips: Int ?
897
- ) -> Bool {
898
- let isScalarSemantics = options. semanticLevel == . unicodeScalar
899
- guard optimizationsEnabled
900
- && minTrips <= QuantifyPayload . maxStorableTrips
901
- && maxExtraTrips ?? 0 <= QuantifyPayload . maxStorableTrips
902
- && kind != . reluctant else {
903
- return false
904
- }
905
- switch child {
906
- case . customCharacterClass( let ccc) :
907
- // ascii only custom character class
908
- guard let bitset = ccc. asAsciiBitset ( options) else {
909
- return false
910
- }
911
- builder. buildReverseQuantify ( bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
774
+ builder. buildQuantify ( bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
912
775
913
776
case . atom( let atom) :
914
777
switch atom {
@@ -917,45 +780,46 @@ fileprivate extension Compiler.ByteCodeGen {
917
780
guard let val = c. _singleScalarAsciiValue else {
918
781
return false
919
782
}
920
- builder. buildReverseQuantify ( asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
783
+ builder. buildQuantify ( asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse : reverse )
921
784
922
785
case . any:
923
- builder. buildReverseQuantifyAny (
924
- matchesNewlines: true , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
786
+ builder. buildQuantifyAny (
787
+ matchesNewlines: true , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse : reverse )
925
788
case . anyNonNewline:
926
- builder. buildReverseQuantifyAny (
927
- matchesNewlines: false , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
789
+ builder. buildQuantifyAny (
790
+ matchesNewlines: false , kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse : reverse )
928
791
case . dot:
929
- builder. buildReverseQuantifyAny (
930
- matchesNewlines: options. dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
792
+ builder. buildQuantifyAny (
793
+ matchesNewlines: options. dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse : reverse )
931
794
932
795
case . characterClass( let cc) :
933
796
// Custom character class that consumes a single grapheme
934
797
let model = cc. asRuntimeModel ( options)
935
- builder. buildReverseQuantify (
798
+ builder. buildQuantify (
936
799
model: model,
937
800
kind,
938
801
minTrips,
939
802
maxExtraTrips,
940
- isScalarSemantics: isScalarSemantics)
803
+ isScalarSemantics: isScalarSemantics,
804
+ reverse: reverse
805
+ )
941
806
default :
942
807
return false
943
808
}
944
809
case . convertedRegexLiteral( let node, _) :
945
- return tryEmitFastReverseQuant ( node, kind, minTrips, maxExtraTrips)
810
+ return tryEmitFastQuant ( node, kind, minTrips, maxExtraTrips)
946
811
case . nonCapturingGroup( let groupKind, let node) :
947
812
// .nonCapture nonCapturingGroups are ignored during compilation
948
813
guard groupKind. ast == . nonCapture else {
949
814
return false
950
815
}
951
- return tryEmitFastReverseQuant ( node, kind, minTrips, maxExtraTrips)
816
+ return tryEmitFastQuant ( node, kind, minTrips, maxExtraTrips)
952
817
default :
953
818
return false
954
819
}
955
820
return true
956
821
}
957
822
958
-
959
823
/// Coalesce any adjacent scalar members in a custom character class together.
960
824
/// This is required in order to produce correct grapheme matching behavior.
961
825
func coalescingCustomCharacterClassMembers(
@@ -1505,51 +1369,6 @@ extension DSLTree.Node {
1505
1369
default : return false
1506
1370
}
1507
1371
}
1508
-
1509
- /// A Boolean value indicating whether this node reverses the match position
1510
- /// on a successful match.
1511
- ///
1512
- /// For example, an alternation like `(a|b|c)` always advances the position
1513
- /// by a character, but `(a|b|)` has an empty branch, which matches without
1514
- /// advancing.
1515
- var guaranteesBackwardProgress : Bool {
1516
- switch self {
1517
- case . orderedChoice( let children) :
1518
- return children. allSatisfy { $0. guaranteesBackwardProgress }
1519
- case . concatenation( let children) :
1520
- return children. contains ( where: { $0. guaranteesBackwardProgress } )
1521
- case . capture( _, _, let node, _) :
1522
- return node. guaranteesBackwardProgress
1523
- case . nonCapturingGroup( let kind, let child) :
1524
- switch kind. ast {
1525
- case . lookahead, . negativeLookahead, . lookbehind, . negativeLookbehind:
1526
- return false
1527
- default : return child. guaranteesBackwardProgress
1528
- }
1529
- case . atom( let atom) :
1530
- switch atom {
1531
- case . changeMatchingOptions, . assertion: return false
1532
- // Captures may be nil so backreferences may be zero length matches
1533
- case . backreference: return false
1534
- default : return true
1535
- }
1536
- case . trivia, . empty:
1537
- return false
1538
- case . quotedLiteral( let string) :
1539
- return !string. isEmpty
1540
- case . convertedRegexLiteral( let node, _) :
1541
- return node. guaranteesBackwardProgress
1542
- case . consumer, . matcher:
1543
- // Allow zero width consumers and matchers
1544
- return false
1545
- case . customCharacterClass( let ccc) :
1546
- return ccc. guaranteesBackwardProgress
1547
- case . quantification( let amount, _, let child) :
1548
- let ( atLeast, _) = amount. ast. bounds
1549
- return atLeast ?? 0 > 0 && child. guaranteesBackwardProgress
1550
- default : return false
1551
- }
1552
- }
1553
1372
}
1554
1373
1555
1374
extension DSLTree . CustomCharacterClass {
@@ -1572,24 +1391,4 @@ extension DSLTree.CustomCharacterClass {
1572
1391
}
1573
1392
return false
1574
1393
}
1575
-
1576
- /// We allow trivia into CustomCharacterClass, which could result in a CCC
1577
- /// that matches nothing, ie `(?x)[ ]`.
1578
- var guaranteesBackwardProgress : Bool {
1579
- for m in members {
1580
- switch m {
1581
- case . trivia:
1582
- continue
1583
- case let . intersection( lhs, rhs) :
1584
- return lhs. guaranteesBackwardProgress && rhs. guaranteesBackwardProgress
1585
- case let . subtraction( lhs, _) :
1586
- return lhs. guaranteesBackwardProgress
1587
- case let . symmetricDifference( lhs, rhs) :
1588
- return lhs. guaranteesBackwardProgress && rhs. guaranteesBackwardProgress
1589
- default :
1590
- return true
1591
- }
1592
- }
1593
- return false
1594
- }
1595
1394
}
0 commit comments