swiftlang
diff --git a/Diff for: ‎Documentation/ProgrammersManual.md
+30 b/Diff for: ‎Documentation/ProgrammersManual.md
+30
diff --git a/Diff for: ‎Package.swift
+1-1 b/Diff for: ‎Package.swift
+1-1
diff --git a/Diff for: ‎Sources/RegexBenchmark/BenchmarkRunner.swift
+13-5 b/Diff for: ‎Sources/RegexBenchmark/BenchmarkRunner.swift
+13-5
diff --git a/Diff for: ‎Sources/TestSupport/TestSupport.swift
+1-1 b/Diff for: ‎Sources/TestSupport/TestSupport.swift
+1-1
diff --git a/Diff for: ‎Sources/VariadicsGenerator/VariadicsGenerator.swift
+1-1 b/Diff for: ‎Sources/VariadicsGenerator/VariadicsGenerator.swift
+1-1
diff --git a/Diff for: ‎Sources/_StringProcessing/ByteCodeGen.swift
-3 b/Diff for: ‎Sources/_StringProcessing/ByteCodeGen.swift
-3
diff --git a/Diff for: ‎Sources/_StringProcessing/CMakeLists.txt
+1 b/Diff for: ‎Sources/_StringProcessing/CMakeLists.txt
+1
diff --git a/Diff for: ‎Sources/_StringProcessing/Engine/MEBuiltins.swift
+152-99 b/Diff for: ‎Sources/_StringProcessing/Engine/MEBuiltins.swift
+152-99
@@ -0,0 +1,30 @@
+# Programmer's Manual
+
+## Programming patterns
+
+### Engine quick checks and fast paths
+
+In the engine nomenclature, a quick-check results in a yes/no/maybe while a thorough check always results in a definite answer.
+
+The nature of quick checks and fast paths is that they bifurcate testing coverage. One easy way to prevent this in simple cases is to assert that a definite quick result matches the thorough result.
+
+One example of this pattern is matching against a builtin character class. The engine has a `_matchBuiltinCC`
+
+```swift
+  func _matchBuiltinCC(...) -> Input.Index? {
+    // Calls _quickMatchBuiltinCC, if that gives a definite result
+    // asserts that it is the same as the result of
+    // _thoroughMatchBuiltinCC and returns it. Otherwise returns the
+    // result of _thoroughMatchBuiltinCC
+  }
+
+  @inline(__always)
+  func _quickMatchBuiltinCC(...) -> QuickResult<Input.Index?>
+
+  @inline(never)
+  func _thoroughMatchBuiltinCC(...) -> Input.Index?
+```
+
+The thorough check is never inlined, as it is a lot of cold code. Note that quick and thorough functions should be pure, that is they shouldn't update processor state.
+
+
@@ -7,7 +7,7 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
     "-Xfrontend",
     "-define-availability",
     "-Xfrontend",
-    "SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
+    "SwiftStdlib 5.7:macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0",
     "-Xfrontend",
     "-define-availability",
     "-Xfrontend",
 
@@ -1,6 +1,9 @@
 import Foundation
 @_spi(RegexBenchmark) import _StringProcessing
 
+/// The number of times to re-run the benchmark if results are too varying
+private var rerunCount: Int { 3 }
+
 struct BenchmarkRunner {
   let suiteName: String
   var suite: [any RegexBenchmark] = []
@@ -82,11 +85,16 @@ struct BenchmarkRunner {
     for b in suite {
       var result = measure(benchmark: b, samples: samples)
       if result.runtimeIsTooVariant {
-        print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
-        print(result.runtime)
-        print("Rerunning \(b.name)")
-        result = measure(benchmark: b, samples: result.runtime.samples*2)
-        print(result.runtime)
+        for _ in 0..<rerunCount {
+          print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)")
+          print(result.runtime)
+          print("Rerunning \(b.name)")
+          result = measure(benchmark: b, samples: result.runtime.samples*2)
+          print(result.runtime)
+          if !result.runtimeIsTooVariant {
+            break
+          }
+        }
         if result.runtimeIsTooVariant {
           fatalError("Benchmark \(b.name) is too variant")
         }
 
@@ -15,7 +15,7 @@ import XCTest
 // *without* `-disable-availability-checking` to ensure the #available check is
 // not compiled into a no-op.
 
-#if os(Linux)
+#if os(Linux) || os(Android)
 public func XCTExpectFailure(
   _ message: String? = nil, body: () throws -> Void
 ) rethrows {}
 
@@ -14,7 +14,7 @@
 import ArgumentParser
 #if os(macOS)
 import Darwin
-#elseif os(Linux)
+#elseif canImport(Glibc)
 import Glibc
 #elseif os(Windows)
 import CRT
 
@@ -702,9 +702,6 @@ fileprivate extension Compiler.ByteCodeGen {
       case .characterClass(let cc):
         // Custom character class that consumes a single grapheme
         let model = cc.asRuntimeModel(options)
-        guard model.consumesSingleGrapheme else {
-          return false
-        }
         builder.buildQuantify(
           model: model,
           kind,
 
@@ -47,6 +47,7 @@ add_library(_StringProcessing
   Regex/DSLTree.swift
   Regex/Match.swift
   Regex/Options.swift
+  Unicode/ASCII.swift
   Unicode/CaseConversion.swift
   Unicode/CharacterProps.swift
   Unicode/Comparison.swift
 
@@ -9,114 +9,26 @@ extension Character {
 }
 
 extension Processor {
-  mutating func matchBuiltin(
+  mutating func matchBuiltinCC(
     _ cc: _CharacterClassModel.Representation,
-    _ isInverted: Bool,
-    _ isStrictASCII: Bool,
-    _ isScalarSemantics: Bool
+    isInverted: Bool,
+    isStrictASCII: Bool,
+    isScalarSemantics: Bool
   ) -> Bool {
-    guard let next = _doMatchBuiltin(
+    guard let next = input._matchBuiltinCC(
       cc,
-      isInverted,
-      isStrictASCII,
-      isScalarSemantics
+      at: currentPosition,
+      isInverted: isInverted,
+      isStrictASCII: isStrictASCII,
+      isScalarSemantics: isScalarSemantics
     ) else {
       signalFailure()
       return false
     }
     currentPosition = next
     return true
   }
-  
-  func _doMatchBuiltin(
-    _ cc: _CharacterClassModel.Representation,
-    _ isInverted: Bool,
-    _ isStrictASCII: Bool,
-    _ isScalarSemantics: Bool
-  ) -> Input.Index? {
-    guard let char = load(), let scalar = loadScalar() else {
-      return nil
-    }
 
-    let asciiCheck = (char.isASCII && !isScalarSemantics)
-      || (scalar.isASCII && isScalarSemantics)
-      || !isStrictASCII
-
-    var matched: Bool
-    var next: Input.Index
-    switch (isScalarSemantics, cc) {
-    case (_, .anyGrapheme):
-      next = input.index(after: currentPosition)
-    case (_, .anyScalar):
-      next = input.unicodeScalars.index(after: currentPosition)
-    case (true, _):
-      next = input.unicodeScalars.index(after: currentPosition)
-    case (false, _):
-      next = input.index(after: currentPosition)
-    }
-
-    switch cc {
-    case .any, .anyGrapheme:
-      matched = true
-    case .anyScalar:
-      if isScalarSemantics {
-        matched = true
-      } else {
-        matched = input.isOnGraphemeClusterBoundary(next)
-      }
-    case .digit:
-      if isScalarSemantics {
-        matched = scalar.properties.numericType != nil && asciiCheck
-      } else {
-        matched = char.isNumber && asciiCheck
-      }
-    case .horizontalWhitespace:
-      if isScalarSemantics {
-        matched = scalar.isHorizontalWhitespace && asciiCheck
-      } else {
-        matched = char._isHorizontalWhitespace && asciiCheck
-      }
-    case .verticalWhitespace:
-      if isScalarSemantics {
-        matched = scalar.isNewline && asciiCheck
-      } else {
-        matched = char._isNewline && asciiCheck
-      }
-    case .newlineSequence:
-      if isScalarSemantics {
-        matched = scalar.isNewline && asciiCheck
-        if matched && scalar == "\r"
-            && next != input.endIndex && input.unicodeScalars[next] == "\n" {
-          // Match a full CR-LF sequence even in scalar semantics
-          input.unicodeScalars.formIndex(after: &next)
-        }
-      } else {
-        matched = char._isNewline && asciiCheck
-      }
-    case .whitespace:
-      if isScalarSemantics {
-        matched = scalar.properties.isWhitespace && asciiCheck
-      } else {
-        matched = char.isWhitespace && asciiCheck
-      }
-    case .word:
-      if isScalarSemantics {
-        matched = scalar.properties.isAlphabetic && asciiCheck
-      } else {
-        matched = char.isWordCharacter && asciiCheck
-      }
-    }
-
-    if isInverted {
-      matched.toggle()
-    }
-
-    guard matched else {
-      return nil
-    }
-    return next
-  }
-  
   func isAtStartOfLine(_ payload: AssertionPayload) -> Bool {
     if currentPosition == subjectBounds.lowerBound { return true }
     switch payload.semanticLevel {
@@ -126,7 +38,7 @@ extension Processor {
       return input.unicodeScalars[input.unicodeScalars.index(before: currentPosition)].isNewline
     }
   }
-  
+
   func isAtEndOfLine(_ payload: AssertionPayload) -> Bool {
     if currentPosition == subjectBounds.upperBound { return true }
     switch payload.semanticLevel {
@@ -169,7 +81,7 @@ extension Processor {
       return isAtStartOfLine(payload)
     case .endOfLine:
       return isAtEndOfLine(payload)
-      
+
     case .caretAnchor:
       if payload.anchorsMatchNewlines {
         return isAtStartOfLine(payload)
@@ -202,3 +114,144 @@ extension Processor {
     }
   }
 }
+
+// MARK: Built-in character class matching
+
+extension String {
+
+  // Mentioned in ProgrammersManual.md, update docs if redesigned
+  func _matchBuiltinCC(
+    _ cc: _CharacterClassModel.Representation,
+    at currentPosition: String.Index,
+    isInverted: Bool,
+    isStrictASCII: Bool,
+    isScalarSemantics: Bool
+  ) -> String.Index? {
+    guard currentPosition < endIndex else {
+      return nil
+    }
+    if case .definite(let result) = _quickMatchBuiltinCC(
+      cc,
+      at: currentPosition,
+      isInverted: isInverted,
+      isStrictASCII: isStrictASCII,
+      isScalarSemantics: isScalarSemantics
+    ) {
+      assert(result == _thoroughMatchBuiltinCC(
+        cc,
+        at: currentPosition,
+        isInverted: isInverted,
+        isStrictASCII: isStrictASCII,
+        isScalarSemantics: isScalarSemantics))
+      return result
+    }
+    return _thoroughMatchBuiltinCC(
+      cc,
+      at: currentPosition,
+      isInverted: isInverted,
+      isStrictASCII: isStrictASCII,
+      isScalarSemantics: isScalarSemantics)
+  }
+
+  // Mentioned in ProgrammersManual.md, update docs if redesigned
+  @inline(__always)
+  func _quickMatchBuiltinCC(
+    _ cc: _CharacterClassModel.Representation,
+    at currentPosition: String.Index,
+    isInverted: Bool,
+    isStrictASCII: Bool,
+    isScalarSemantics: Bool
+  ) -> QuickResult<String.Index?> {
+    assert(currentPosition < endIndex)
+    guard let (next, result) = _quickMatch(
+      cc, at: currentPosition, isScalarSemantics: isScalarSemantics
+    ) else {
+      return .unknown
+    }
+    return .definite(result == isInverted ? nil : next)
+  }
+
+  // Mentioned in ProgrammersManual.md, update docs if redesigned
+  @inline(never)
+  func _thoroughMatchBuiltinCC(
+    _ cc: _CharacterClassModel.Representation,
+    at currentPosition: String.Index,
+    isInverted: Bool,
+    isStrictASCII: Bool,
+    isScalarSemantics: Bool
+  ) -> String.Index? {
+    assert(currentPosition < endIndex)
+    let char = self[currentPosition]
+    let scalar = unicodeScalars[currentPosition]
+
+    let asciiCheck = !isStrictASCII
+    || (scalar.isASCII && isScalarSemantics)
+    || char.isASCII
+
+    var matched: Bool
+    var next: String.Index
+    switch (isScalarSemantics, cc) {
+    case (_, .anyGrapheme):
+      next = index(after: currentPosition)
+    case (true, _):
+      next = unicodeScalars.index(after: currentPosition)
+    case (false, _):
+      next = index(after: currentPosition)
+    }
+
+    switch cc {
+    case .any, .anyGrapheme:
+      matched = true
+    case .digit:
+      if isScalarSemantics {
+        matched = scalar.properties.numericType != nil && asciiCheck
+      } else {
+        matched = char.isNumber && asciiCheck
+      }
+    case .horizontalWhitespace:
+      if isScalarSemantics {
+        matched = scalar.isHorizontalWhitespace && asciiCheck
+      } else {
+        matched = char._isHorizontalWhitespace && asciiCheck
+      }
+    case .verticalWhitespace:
+      if isScalarSemantics {
+        matched = scalar.isNewline && asciiCheck
+      } else {
+        matched = char._isNewline && asciiCheck
+      }
+    case .newlineSequence:
+      if isScalarSemantics {
+        matched = scalar.isNewline && asciiCheck
+        if matched && scalar == "\r"
+            && next != endIndex && unicodeScalars[next] == "\n" {
+          // Match a full CR-LF sequence even in scalar semantics
+          unicodeScalars.formIndex(after: &next)
+        }
+      } else {
+        matched = char._isNewline && asciiCheck
+      }
+    case .whitespace:
+      if isScalarSemantics {
+        matched = scalar.properties.isWhitespace && asciiCheck
+      } else {
+        matched = char.isWhitespace && asciiCheck
+      }
+    case .word:
+      if isScalarSemantics {
+        matched = scalar.properties.isAlphabetic && asciiCheck
+      } else {
+        matched = char.isWordCharacter && asciiCheck
+      }
+    }
+
+    if isInverted {
+      matched.toggle()
+    }
+
+    guard matched else {
+      return nil
+    }
+    return next
+  }
+}