Skip to content

Commit 25d1e6a

Browse files
author
Jacob Hearst
committed
Move reverse branching from ByteCodeGen to MEBuiltins
1 parent d82f906 commit 25d1e6a

File tree

4 files changed

+67
-327
lines changed

4 files changed

+67
-327
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

+29-230
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ fileprivate extension Compiler.ByteCodeGen {
125125
let boundaryCheck = idx == lastIdx
126126
let scalar = s.unicodeScalars[idx]
127127
if options.isCaseInsensitive && scalar.properties.isCased {
128-
builder.buildMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck)
128+
builder.buildMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck, reverse: false)
129129
} else {
130-
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
130+
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck, reverse: false)
131131
}
132132
}
133133
return
@@ -153,9 +153,9 @@ fileprivate extension Compiler.ByteCodeGen {
153153
let boundaryCheck = idx == lastIdx
154154
let scalar = s.unicodeScalars[idx]
155155
if options.isCaseInsensitive && scalar.properties.isCased {
156-
builder.buildReverseMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck)
156+
builder.buildMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck, reverse: true)
157157
} else {
158-
builder.buildReverseMatchScalar(scalar, boundaryCheck: boundaryCheck)
158+
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck, reverse: true)
159159
}
160160
}
161161
return
@@ -207,36 +207,23 @@ fileprivate extension Compiler.ByteCodeGen {
207207
}
208208

209209
mutating func emitCharacterClass(_ cc: DSLTree.Atom.CharacterClass) {
210-
builder.buildMatchBuiltin(model: cc.asRuntimeModel(options))
210+
builder.buildMatchBuiltin(model: cc.asRuntimeModel(options), reverse: reverse)
211211
}
212212

213213
mutating func emitMatchScalar(_ s: UnicodeScalar) {
214214
assert(options.semanticLevel == .unicodeScalar)
215215
if options.isCaseInsensitive && s.properties.isCased {
216-
builder.buildMatchScalarCaseInsensitive(s, boundaryCheck: false)
216+
builder.buildMatchScalarCaseInsensitive(s, boundaryCheck: false, reverse: reverse)
217217
} else {
218-
builder.buildMatchScalar(s, boundaryCheck: false)
219-
}
220-
}
221-
222-
mutating func emitReverseMatchScalar(_ s: UnicodeScalar) {
223-
assert(options.semanticLevel == .unicodeScalar)
224-
if options.isCaseInsensitive && s.properties.isCased {
225-
builder.buildReverseMatchScalarCaseInsensitive(s, boundaryCheck: false)
226-
} else {
227-
builder.buildReverseMatchScalar(s, boundaryCheck: false)
218+
builder.buildMatchScalar(s, boundaryCheck: false, reverse: reverse)
228219
}
229220
}
230221

231222
mutating func emitCharacter(_ c: Character) {
232223
// Unicode scalar mode matches the specific scalars that comprise a character
233224
if options.semanticLevel == .unicodeScalar {
234225
for scalar in c.unicodeScalars {
235-
if reverse {
236-
emitReverseMatchScalar(scalar)
237-
} else {
238-
emitMatchScalar(scalar)
239-
}
226+
emitMatchScalar(scalar)
240227
}
241228
return
242229
}
@@ -248,13 +235,10 @@ fileprivate extension Compiler.ByteCodeGen {
248235
assert(c.unicodeScalars.count == 1)
249236
builder.buildMatchScalarCaseInsensitive(
250237
c.unicodeScalars.last!,
251-
boundaryCheck: true)
238+
boundaryCheck: true,
239+
reverse: reverse)
252240
} else {
253-
if reverse {
254-
builder.buildReverseMatch(c, isCaseInsensitive: true)
255-
} else {
256-
builder.buildMatch(c, isCaseInsensitive: true)
257-
}
241+
builder.buildMatch(c, isCaseInsensitive: true, reverse: reverse)
258242
}
259243
return
260244
}
@@ -264,20 +248,12 @@ fileprivate extension Compiler.ByteCodeGen {
264248
for idx in c.unicodeScalars.indices {
265249
let scalar = c.unicodeScalars[idx]
266250
let boundaryCheck = idx == lastIdx
267-
if reverse {
268-
builder.buildReverseMatchScalar(scalar, boundaryCheck: boundaryCheck)
269-
} else {
270-
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
271-
}
251+
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck, reverse: reverse)
272252
}
273253
return
274254
}
275255

276-
if reverse {
277-
builder.buildReverseMatch(c, isCaseInsensitive: false)
278-
} else {
279-
builder.buildMatch(c, isCaseInsensitive: false)
280-
}
256+
builder.buildMatch(c, isCaseInsensitive: false, reverse: reverse)
281257
}
282258

283259
mutating func emitAny() {
@@ -292,9 +268,9 @@ fileprivate extension Compiler.ByteCodeGen {
292268
mutating func emitAnyNonNewline() {
293269
switch options.semanticLevel {
294270
case .graphemeCluster:
295-
builder.buildConsumeNonNewline()
271+
builder.buildConsumeNonNewline(reverse: reverse)
296272
case .unicodeScalar:
297-
builder.buildConsumeScalarNonNewline()
273+
builder.buildConsumeScalarNonNewline(reverse: reverse)
298274
}
299275
}
300276

@@ -794,121 +770,8 @@ fileprivate extension Compiler.ByteCodeGen {
794770
guard let bitset = ccc.asAsciiBitset(options) else {
795771
return false
796772
}
797-
if reverse {
798-
builder.buildReverseQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
799-
} else {
800-
builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
801-
}
802773

803-
case .atom(let atom):
804-
switch atom {
805-
case .char(let c):
806-
// Single scalar ascii value character
807-
guard let val = c._singleScalarAsciiValue else {
808-
return false
809-
}
810-
if reverse {
811-
builder.buildReverseQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
812-
} else {
813-
builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
814-
}
815-
816-
case .any:
817-
if reverse {
818-
builder.buildReverseQuantifyAny(
819-
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
820-
} else {
821-
builder.buildQuantifyAny(
822-
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
823-
}
824-
case .anyNonNewline:
825-
if reverse {
826-
builder.buildReverseQuantifyAny(
827-
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
828-
} else {
829-
builder.buildQuantifyAny(
830-
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
831-
}
832-
case .dot:
833-
if reverse {
834-
builder.buildReverseQuantifyAny(
835-
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
836-
} else {
837-
builder.buildQuantifyAny(
838-
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
839-
}
840-
841-
case .characterClass(let cc):
842-
// Custom character class that consumes a single grapheme
843-
let model = cc.asRuntimeModel(options)
844-
if reverse {
845-
builder.buildReverseQuantify(
846-
model: model,
847-
kind,
848-
minTrips,
849-
maxExtraTrips,
850-
isScalarSemantics: isScalarSemantics
851-
)
852-
} else {
853-
builder.buildQuantify(
854-
model: model,
855-
kind,
856-
minTrips,
857-
maxExtraTrips,
858-
isScalarSemantics: isScalarSemantics
859-
)
860-
}
861-
default:
862-
return false
863-
}
864-
case .convertedRegexLiteral(let node, _):
865-
if reverse {
866-
return tryEmitFastReverseQuant(node, kind, minTrips, maxExtraTrips)
867-
} else {
868-
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
869-
}
870-
case .nonCapturingGroup(let groupKind, let node):
871-
// .nonCapture nonCapturingGroups are ignored during compilation
872-
guard groupKind.ast == .nonCapture else {
873-
return false
874-
}
875-
if reverse {
876-
return tryEmitFastReverseQuant(node, kind, minTrips, maxExtraTrips)
877-
} else {
878-
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
879-
}
880-
default:
881-
return false
882-
}
883-
return true
884-
}
885-
886-
/// Specialized quantification instruction for repetition of certain nodes in grapheme semantic mode
887-
/// Allowed nodes are:
888-
/// - single ascii scalar .char
889-
/// - ascii .customCharacterClass
890-
/// - single grapheme consumgin built in character classes
891-
/// - .any, .anyNonNewline, .dot
892-
mutating func tryEmitFastReverseQuant(
893-
_ child: DSLTree.Node,
894-
_ kind: AST.Quantification.Kind,
895-
_ minTrips: Int,
896-
_ maxExtraTrips: Int?
897-
) -> Bool {
898-
let isScalarSemantics = options.semanticLevel == .unicodeScalar
899-
guard optimizationsEnabled
900-
&& minTrips <= QuantifyPayload.maxStorableTrips
901-
&& maxExtraTrips ?? 0 <= QuantifyPayload.maxStorableTrips
902-
&& kind != .reluctant else {
903-
return false
904-
}
905-
switch child {
906-
case .customCharacterClass(let ccc):
907-
// ascii only custom character class
908-
guard let bitset = ccc.asAsciiBitset(options) else {
909-
return false
910-
}
911-
builder.buildReverseQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
774+
builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
912775

913776
case .atom(let atom):
914777
switch atom {
@@ -917,45 +780,46 @@ fileprivate extension Compiler.ByteCodeGen {
917780
guard let val = c._singleScalarAsciiValue else {
918781
return false
919782
}
920-
builder.buildReverseQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
783+
builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
921784

922785
case .any:
923-
builder.buildReverseQuantifyAny(
924-
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
786+
builder.buildQuantifyAny(
787+
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
925788
case .anyNonNewline:
926-
builder.buildReverseQuantifyAny(
927-
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
789+
builder.buildQuantifyAny(
790+
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
928791
case .dot:
929-
builder.buildReverseQuantifyAny(
930-
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
792+
builder.buildQuantifyAny(
793+
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics, reverse: reverse)
931794

932795
case .characterClass(let cc):
933796
// Custom character class that consumes a single grapheme
934797
let model = cc.asRuntimeModel(options)
935-
builder.buildReverseQuantify(
798+
builder.buildQuantify(
936799
model: model,
937800
kind,
938801
minTrips,
939802
maxExtraTrips,
940-
isScalarSemantics: isScalarSemantics)
803+
isScalarSemantics: isScalarSemantics,
804+
reverse: reverse
805+
)
941806
default:
942807
return false
943808
}
944809
case .convertedRegexLiteral(let node, _):
945-
return tryEmitFastReverseQuant(node, kind, minTrips, maxExtraTrips)
810+
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
946811
case .nonCapturingGroup(let groupKind, let node):
947812
// .nonCapture nonCapturingGroups are ignored during compilation
948813
guard groupKind.ast == .nonCapture else {
949814
return false
950815
}
951-
return tryEmitFastReverseQuant(node, kind, minTrips, maxExtraTrips)
816+
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
952817
default:
953818
return false
954819
}
955820
return true
956821
}
957822

958-
959823
/// Coalesce any adjacent scalar members in a custom character class together.
960824
/// This is required in order to produce correct grapheme matching behavior.
961825
func coalescingCustomCharacterClassMembers(
@@ -1505,51 +1369,6 @@ extension DSLTree.Node {
15051369
default: return false
15061370
}
15071371
}
1508-
1509-
/// A Boolean value indicating whether this node reverses the match position
1510-
/// on a successful match.
1511-
///
1512-
/// For example, an alternation like `(a|b|c)` always advances the position
1513-
/// by a character, but `(a|b|)` has an empty branch, which matches without
1514-
/// advancing.
1515-
var guaranteesBackwardProgress: Bool {
1516-
switch self {
1517-
case .orderedChoice(let children):
1518-
return children.allSatisfy { $0.guaranteesBackwardProgress }
1519-
case .concatenation(let children):
1520-
return children.contains(where: { $0.guaranteesBackwardProgress })
1521-
case .capture(_, _, let node, _):
1522-
return node.guaranteesBackwardProgress
1523-
case .nonCapturingGroup(let kind, let child):
1524-
switch kind.ast {
1525-
case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind:
1526-
return false
1527-
default: return child.guaranteesBackwardProgress
1528-
}
1529-
case .atom(let atom):
1530-
switch atom {
1531-
case .changeMatchingOptions, .assertion: return false
1532-
// Captures may be nil so backreferences may be zero length matches
1533-
case .backreference: return false
1534-
default: return true
1535-
}
1536-
case .trivia, .empty:
1537-
return false
1538-
case .quotedLiteral(let string):
1539-
return !string.isEmpty
1540-
case .convertedRegexLiteral(let node, _):
1541-
return node.guaranteesBackwardProgress
1542-
case .consumer, .matcher:
1543-
// Allow zero width consumers and matchers
1544-
return false
1545-
case .customCharacterClass(let ccc):
1546-
return ccc.guaranteesBackwardProgress
1547-
case .quantification(let amount, _, let child):
1548-
let (atLeast, _) = amount.ast.bounds
1549-
return atLeast ?? 0 > 0 && child.guaranteesBackwardProgress
1550-
default: return false
1551-
}
1552-
}
15531372
}
15541373

15551374
extension DSLTree.CustomCharacterClass {
@@ -1572,24 +1391,4 @@ extension DSLTree.CustomCharacterClass {
15721391
}
15731392
return false
15741393
}
1575-
1576-
/// We allow trivia into CustomCharacterClass, which could result in a CCC
1577-
/// that matches nothing, ie `(?x)[ ]`.
1578-
var guaranteesBackwardProgress: Bool {
1579-
for m in members {
1580-
switch m {
1581-
case .trivia:
1582-
continue
1583-
case let .intersection(lhs, rhs):
1584-
return lhs.guaranteesBackwardProgress && rhs.guaranteesBackwardProgress
1585-
case let .subtraction(lhs, _):
1586-
return lhs.guaranteesBackwardProgress
1587-
case let .symmetricDifference(lhs, rhs):
1588-
return lhs.guaranteesBackwardProgress && rhs.guaranteesBackwardProgress
1589-
default:
1590-
return true
1591-
}
1592-
}
1593-
return false
1594-
}
15951394
}

0 commit comments

Comments
 (0)