Skip to content

Commit 83ca6c8

Browse files
committed
Make CCC compilation deterministic
This fixes an issue where using a set to unique the elements of a CCC led to a nondeterministic ordering, which could then lead to unexpected coalescing of separate scalars into a single character. (e.g. `[\nA\r]` -> `["\r", "\n", "A"]` -> `["\r\n", "A"]`) This change keeps the order of the original CCC, so coalescing into characters should not occur unless the scalars are already in that order. Fixes swiftlang/swift#81427. rdar://151046715
1 parent 650b2c1 commit 83ca6c8

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -940,20 +940,26 @@ fileprivate extension Compiler.ByteCodeGen {
940940
case .atom(let atom):
941941
switch atom {
942942
case let .char(char):
943-
characters.insert(char)
943+
if characters.insert(char).inserted {
944+
result.append(member)
945+
}
944946
case let .scalar(scalar):
945-
scalars.insert(scalar)
947+
if scalars.insert(scalar).inserted {
948+
result.append(member)
949+
}
946950
default:
947951
result.append(member)
948952
}
949953
case let .quotedLiteral(str):
950-
characters.formUnion(str)
954+
for char in str {
955+
if characters.insert(char).inserted {
956+
result.append(.atom(.char(char)))
957+
}
958+
}
951959
default:
952960
result.append(member)
953961
}
954962
}
955-
result.append(contentsOf: characters.map { .atom(.char($0)) })
956-
result.append(contentsOf: scalars.map { .atom(.scalar($0)) })
957963
return result
958964
}
959965

Tests/RegexTests/MatchTests.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,6 +2864,18 @@ extension RegexTests {
28642864
XCTAssertNil(additionalInput.wholeMatch(of: additionalRegex))
28652865
}
28662866

2867+
func testIssueSwift81427() throws {
2868+
// This issue is a nondeterministic matching failure, where this character
2869+
// set is occasionally compiled incorrectly. Multiple test runs (not just
2870+
// multiple executions of this test) are required for verification.
2871+
firstMatchTests(
2872+
"[(?:\r\n)\n\r]",
2873+
("\n", "\n"),
2874+
("\r", "\r"),
2875+
("\r\n", "\r\n")
2876+
)
2877+
}
2878+
28672879
func testNSRECompatibility() throws {
28682880
// NSRE-compatibility includes scalar matching, so `[\r\n]` should match
28692881
// either `\r` or `\n`.

0 commit comments

Comments
 (0)