Skip to content

Benchmarker improvements + more benchmarks #581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 15, 2022
113 changes: 89 additions & 24 deletions Sources/RegexBenchmark/Benchmark.swift
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import _StringProcessing
import Foundation

public protocol RegexBenchmark {
protocol RegexBenchmark {
var name: String { get }
func run()
func debug()
}

public struct Benchmark: RegexBenchmark {
public let name: String
struct Benchmark: RegexBenchmark {
let name: String
let regex: Regex<AnyRegexOutput>
let type: MatchType
let target: String

public enum MatchType {
enum MatchType {
case whole
case first
case allMatches
}

public func run() {
func run() {
switch type {
case .whole: blackHole(target.wholeMatch(of: regex))
case .allMatches: blackHole(target.matches(of: regex))
Expand All @@ -28,8 +28,8 @@ public struct Benchmark: RegexBenchmark {
}
}

public struct NSBenchmark: RegexBenchmark {
public let name: String
struct NSBenchmark: RegexBenchmark {
let name: String
let regex: NSRegularExpression
let type: NSMatchType
let target: String
Expand All @@ -38,19 +38,55 @@ public struct NSBenchmark: RegexBenchmark {
NSRange(target.startIndex..<target.endIndex, in: target)
}

public enum NSMatchType {
enum NSMatchType {
case allMatches
case first
}

public func run() {
func run() {
switch type {
case .allMatches: blackHole(regex.matches(in: target, range: range))
case .first: blackHole(regex.firstMatch(in: target, range: range))
}
}
}

/// A benchmark running a regex on strings in input set
struct InputListBenchmark: RegexBenchmark {
let name: String
let regex: Regex<AnyRegexOutput>
let targets: [String]

func run() {
for target in targets {
blackHole(target.wholeMatch(of: regex))
}
}
}

struct InputListNSBenchmark: RegexBenchmark {
let name: String
let regex: NSRegularExpression
let targets: [String]

init(name: String, regex: String, targets: [String]) {
self.name = name
self.regex = try! NSRegularExpression(pattern: "^" + regex + "$")
self.targets = targets
}

func range(in target: String) -> NSRange {
NSRange(target.startIndex..<target.endIndex, in: target)
}

func run() {
for target in targets {
let range = range(in: target)
blackHole(regex.firstMatch(in: target, range: range))
}
}
}

/// A benchmark meant to be ran across multiple engines
struct CrossBenchmark {
/// The base name of the benchmark
Expand All @@ -69,11 +105,12 @@ struct CrossBenchmark {
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
/// accomodate multi-line matching, etc.
var isWhole: Bool = false

/// Whether or not to do firstMatch as well or just allMatches
var includeFirst: Bool = false

func register(_ runner: inout BenchmarkRunner) {
let swiftRegex = try! Regex(regex)

let nsPattern = isWhole ? "^" + regex + "$" : regex
let nsRegex: NSRegularExpression
if isWhole {
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
Expand All @@ -95,37 +132,65 @@ struct CrossBenchmark {
type: .first,
target: input))
} else {
runner.register(
Benchmark(
name: baseName + "First",
regex: swiftRegex,
type: .first,
target: input))
runner.register(
Benchmark(
name: baseName + "All",
regex: swiftRegex,
type: .allMatches,
target: input))
runner.register(
NSBenchmark(
name: baseName + "First_NS",
regex: nsRegex,
type: .first,
target: input))
runner.register(
NSBenchmark(
name: baseName + "All_NS",
regex: nsRegex,
type: .allMatches,
target: input))
if includeFirst {
runner.register(
Benchmark(
name: baseName + "First",
regex: swiftRegex,
type: .first,
target: input))
runner.register(
NSBenchmark(
name: baseName + "First_NS",
regex: nsRegex,
type: .first,
target: input))
}
}
}
}

/// A benchmark running a regex on strings in input list, run across multiple engines
struct CrossInputListBenchmark {
/// The base name of the benchmark
var baseName: String

/// The string to compile in differnet engines
var regex: String

/// The list of strings to search
var inputs: [String]

func register(_ runner: inout BenchmarkRunner) {
let swiftRegex = try! Regex(regex)
runner.register(InputListBenchmark(
name: baseName,
regex: swiftRegex,
targets: inputs
))
runner.register(InputListNSBenchmark(
name: baseName + "NS",
regex: regex,
targets: inputs
))
}
}

// TODO: Capture-containing benchmarks

// nom nom nom, consume the argument
@inline(never)
public func blackHole<T>(_ x: T) {
func blackHole<T>(_ x: T) {
}
14 changes: 10 additions & 4 deletions Sources/RegexBenchmark/BenchmarkRegistration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,26 @@
// Do not remove the start of registration or end of registration markers

extension BenchmarkRunner {
public static func makeRunner(
static func makeRunner(
_ samples: Int,
_ outputPath: String
_ quiet: Bool
) -> BenchmarkRunner {
var benchmark = BenchmarkRunner("RegexBench", samples, outputPath)
var benchmark = BenchmarkRunner("RegexBench", samples, quiet)
// -- start of registrations --
benchmark.addReluctantQuant()
benchmark.addCSS()
benchmark.addNotFound()
benchmark.addGraphemeBreak()
benchmark.addHangulSyllable()
benchmark.addHTML()
// benchmark.addHTML() // Disabled due to \b being unusably slow
benchmark.addEmail()
benchmark.addCustomCharacterClasses()
benchmark.addBuiltinCC()
benchmark.addUnicode()
benchmark.addLiteralSearch()
benchmark.addDiceNotation()
benchmark.addErrorMessages()
benchmark.addIpAddress()
// -- end of registrations --
return benchmark
}
Expand Down
Loading