Skip to content

Commit 96fb215

Browse files
authored
Benchmarker improvements and more benchmarks (#581)
- Added some basic statistics to benchmark measurements - Added new benchmark mode, InputList - Fixed CLI flags -New benchmarks: Emojis, dice notation, swiftc error messages, ipv4, ipv6, mac addresses
1 parent 1f2ae04 commit 96fb215

27 files changed

+13091
-1192
lines changed

Sources/RegexBenchmark/Benchmark.swift

+89-24
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
import _StringProcessing
22
import Foundation
33

4-
public protocol RegexBenchmark {
4+
protocol RegexBenchmark {
55
var name: String { get }
66
func run()
77
func debug()
88
}
99

10-
public struct Benchmark: RegexBenchmark {
11-
public let name: String
10+
struct Benchmark: RegexBenchmark {
11+
let name: String
1212
let regex: Regex<AnyRegexOutput>
1313
let type: MatchType
1414
let target: String
1515

16-
public enum MatchType {
16+
enum MatchType {
1717
case whole
1818
case first
1919
case allMatches
2020
}
2121

22-
public func run() {
22+
func run() {
2323
switch type {
2424
case .whole: blackHole(target.wholeMatch(of: regex))
2525
case .allMatches: blackHole(target.matches(of: regex))
@@ -28,8 +28,8 @@ public struct Benchmark: RegexBenchmark {
2828
}
2929
}
3030

31-
public struct NSBenchmark: RegexBenchmark {
32-
public let name: String
31+
struct NSBenchmark: RegexBenchmark {
32+
let name: String
3333
let regex: NSRegularExpression
3434
let type: NSMatchType
3535
let target: String
@@ -38,19 +38,55 @@ public struct NSBenchmark: RegexBenchmark {
3838
NSRange(target.startIndex..<target.endIndex, in: target)
3939
}
4040

41-
public enum NSMatchType {
41+
enum NSMatchType {
4242
case allMatches
4343
case first
4444
}
4545

46-
public func run() {
46+
func run() {
4747
switch type {
4848
case .allMatches: blackHole(regex.matches(in: target, range: range))
4949
case .first: blackHole(regex.firstMatch(in: target, range: range))
5050
}
5151
}
5252
}
5353

54+
/// A benchmark running a regex on strings in input set
55+
struct InputListBenchmark: RegexBenchmark {
56+
let name: String
57+
let regex: Regex<AnyRegexOutput>
58+
let targets: [String]
59+
60+
func run() {
61+
for target in targets {
62+
blackHole(target.wholeMatch(of: regex))
63+
}
64+
}
65+
}
66+
67+
struct InputListNSBenchmark: RegexBenchmark {
68+
let name: String
69+
let regex: NSRegularExpression
70+
let targets: [String]
71+
72+
init(name: String, regex: String, targets: [String]) {
73+
self.name = name
74+
self.regex = try! NSRegularExpression(pattern: "^" + regex + "$")
75+
self.targets = targets
76+
}
77+
78+
func range(in target: String) -> NSRange {
79+
NSRange(target.startIndex..<target.endIndex, in: target)
80+
}
81+
82+
func run() {
83+
for target in targets {
84+
let range = range(in: target)
85+
blackHole(regex.firstMatch(in: target, range: range))
86+
}
87+
}
88+
}
89+
5490
/// A benchmark meant to be ran across multiple engines
5591
struct CrossBenchmark {
5692
/// The base name of the benchmark
@@ -69,11 +105,12 @@ struct CrossBenchmark {
69105
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
70106
/// accomodate multi-line matching, etc.
71107
var isWhole: Bool = false
108+
109+
/// Whether or not to do firstMatch as well or just allMatches
110+
var includeFirst: Bool = false
72111

73112
func register(_ runner: inout BenchmarkRunner) {
74113
let swiftRegex = try! Regex(regex)
75-
76-
let nsPattern = isWhole ? "^" + regex + "$" : regex
77114
let nsRegex: NSRegularExpression
78115
if isWhole {
79116
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
@@ -95,37 +132,65 @@ struct CrossBenchmark {
95132
type: .first,
96133
target: input))
97134
} else {
98-
runner.register(
99-
Benchmark(
100-
name: baseName + "First",
101-
regex: swiftRegex,
102-
type: .first,
103-
target: input))
104135
runner.register(
105136
Benchmark(
106137
name: baseName + "All",
107138
regex: swiftRegex,
108139
type: .allMatches,
109140
target: input))
110-
runner.register(
111-
NSBenchmark(
112-
name: baseName + "First_NS",
113-
regex: nsRegex,
114-
type: .first,
115-
target: input))
116141
runner.register(
117142
NSBenchmark(
118143
name: baseName + "All_NS",
119144
regex: nsRegex,
120145
type: .allMatches,
121146
target: input))
147+
if includeFirst {
148+
runner.register(
149+
Benchmark(
150+
name: baseName + "First",
151+
regex: swiftRegex,
152+
type: .first,
153+
target: input))
154+
runner.register(
155+
NSBenchmark(
156+
name: baseName + "First_NS",
157+
regex: nsRegex,
158+
type: .first,
159+
target: input))
160+
}
122161
}
123162
}
124163
}
125164

165+
/// A benchmark running a regex on strings in input list, run across multiple engines
166+
struct CrossInputListBenchmark {
167+
/// The base name of the benchmark
168+
var baseName: String
169+
170+
/// The string to compile in differnet engines
171+
var regex: String
172+
173+
/// The list of strings to search
174+
var inputs: [String]
175+
176+
func register(_ runner: inout BenchmarkRunner) {
177+
let swiftRegex = try! Regex(regex)
178+
runner.register(InputListBenchmark(
179+
name: baseName,
180+
regex: swiftRegex,
181+
targets: inputs
182+
))
183+
runner.register(InputListNSBenchmark(
184+
name: baseName + "NS",
185+
regex: regex,
186+
targets: inputs
187+
))
188+
}
189+
}
190+
126191
// TODO: Capture-containing benchmarks
127192

128193
// nom nom nom, consume the argument
129194
@inline(never)
130-
public func blackHole<T>(_ x: T) {
195+
func blackHole<T>(_ x: T) {
131196
}

Sources/RegexBenchmark/BenchmarkRegistration.swift

+10-4
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,26 @@
22
// Do not remove the start of registration or end of registration markers
33

44
extension BenchmarkRunner {
5-
public static func makeRunner(
5+
static func makeRunner(
66
_ samples: Int,
7-
_ outputPath: String
7+
_ quiet: Bool
88
) -> BenchmarkRunner {
9-
var benchmark = BenchmarkRunner("RegexBench", samples, outputPath)
9+
var benchmark = BenchmarkRunner("RegexBench", samples, quiet)
1010
// -- start of registrations --
1111
benchmark.addReluctantQuant()
1212
benchmark.addCSS()
1313
benchmark.addNotFound()
1414
benchmark.addGraphemeBreak()
1515
benchmark.addHangulSyllable()
16-
benchmark.addHTML()
16+
// benchmark.addHTML() // Disabled due to \b being unusably slow
1717
benchmark.addEmail()
1818
benchmark.addCustomCharacterClasses()
19+
benchmark.addBuiltinCC()
20+
benchmark.addUnicode()
21+
benchmark.addLiteralSearch()
22+
benchmark.addDiceNotation()
23+
benchmark.addErrorMessages()
24+
benchmark.addIpAddress()
1925
// -- end of registrations --
2026
return benchmark
2127
}

0 commit comments

Comments
 (0)