Skip to content

Commit 9be4826

Browse files
committed
wip: map output as dsl tree node
1 parent d9869ea commit 9be4826

File tree

8 files changed

+172
-69
lines changed

8 files changed

+172
-69
lines changed

Diff for: Sources/_RegexParser/Regex/Parse/CaptureList.swift

+9-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,15 @@ extension CaptureList.Capture: CustomStringConvertible {
158158
typeStr = "Substring"
159159
}
160160
let suffix = String(repeating: "?", count: optionalDepth)
161-
return typeStr + suffix
161+
162+
let prefix: String
163+
if let name = name {
164+
prefix = name + ": "
165+
} else {
166+
prefix = ""
167+
}
168+
169+
return prefix + typeStr + suffix
162170
}
163171
}
164172
extension CaptureList: CustomStringConvertible {

Diff for: Sources/_StringProcessing/ByteCodeGen.swift

+3
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,9 @@ extension Compiler.ByteCodeGen {
717717
case let .matcher(_, f):
718718
emitMatcher(f)
719719

720+
case let .mapOutput(retTy, fun, child):
721+
fatalError()
722+
720723
case .transform:
721724
throw Unreachable(
722725
"Transforms only directly inside captures")

Diff for: Sources/_StringProcessing/ConsumerInterface.swift

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ extension DSLTree.Node {
4949
fatalError("FIXME: Is this where we handle them?")
5050
case .characterPredicate:
5151
fatalError("FIXME: Is this where we handle them?")
52+
case .mapOutput:
53+
fatalError("FIXME: Is this where we handle them?")
5254
}
5355
}
5456
}

Diff for: Sources/_StringProcessing/PrintAsPattern.swift

+3
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ extension PrettyPrinter {
234234
case .characterPredicate:
235235
print("/* TODO: character predicates */")
236236

237+
case .mapOutput:
238+
print("/* TODO: map output */")
239+
237240
case .absentFunction:
238241
print("/* TODO: absent function */")
239242
}

Diff for: Sources/_StringProcessing/Regex/DSLTree.swift

+22-12
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ extension DSLTree {
9797

9898
// TODO: Would this just boil down to a consumer?
9999
case characterPredicate(_CharacterPredicateInterface)
100+
101+
case mapOutput(Any.Type, _MapOutputInterface, Node)
100102
}
101103
}
102104

@@ -237,9 +239,11 @@ public typealias _MatcherInterface = (
237239

238240
// Character-set (post grapheme segmentation)
239241
@_spi(RegexBuilder)
240-
public typealias _CharacterPredicateInterface = (
241-
(Character) -> Bool
242-
)
242+
public typealias _CharacterPredicateInterface = (Character) -> Bool
243+
244+
// Output mapping
245+
@_spi(RegexBuilder)
246+
public typealias _MapOutputInterface = (Any) -> Any
243247

244248
/*
245249

@@ -257,20 +261,22 @@ extension DSLTree.Node {
257261
public var children: [DSLTree.Node]? {
258262
switch self {
259263

260-
case let .orderedChoice(v): return v
264+
case let .orderedChoice(v): return v
261265
case let .concatenation(v): return v
262266

263267
case let .convertedRegexLiteral(n, _):
264268
// Treat this transparently
265269
return n.children
266270

267-
case let .capture(_, _, n): return [n]
268-
case let .nonCapturingGroup(_, n): return [n]
269-
case let .transform(_, n): return [n]
270-
case let .quantification(_, _, n): return [n]
271+
case let .capture(_, _, n): return [n]
272+
case let .nonCapturingGroup(_, n): return [n]
273+
case let .transform(_, n): return [n]
274+
case let .quantification(_, _, n): return [n]
275+
case let .mapOutput(_, _, n): return [n]
271276

272277
case let .conditional(_, t, f): return [t,f]
273278

279+
274280
case .trivia, .empty, .quotedLiteral, .regexLiteral,
275281
.consumer, .matcher, .characterPredicate,
276282
.customCharacterClass, .atom:
@@ -513,6 +519,9 @@ extension DSLTree.Node {
513519
case .matcher:
514520
break
515521

522+
case let .mapOutput(retTy, _, _):
523+
fatalError("Add retTy's contents to capture list")
524+
516525
case .transform(_, let child):
517526
child._addCaptures(to: &list, optionalNesting: nesting)
518527

@@ -549,10 +558,11 @@ extension DSLTree {
549558
// Treat this transparently
550559
return _Tree(n).children
551560

552-
case let .capture(_, _, n): return [_Tree(n)]
553-
case let .nonCapturingGroup(_, n): return [_Tree(n)]
554-
case let .transform(_, n): return [_Tree(n)]
555-
case let .quantification(_, _, n): return [_Tree(n)]
561+
case let .capture(_, _, n): return [_Tree(n)]
562+
case let .nonCapturingGroup(_, n): return [_Tree(n)]
563+
case let .transform(_, n): return [_Tree(n)]
564+
case let .quantification(_, _, n): return [_Tree(n)]
565+
case let .mapOutput(_, _, n): return [_Tree(n)]
556566

557567
case let .conditional(_, t, f): return [_Tree(t), _Tree(f)]
558568

Diff for: Sources/_StringProcessing/Regex/MapOutput.swift

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
3+
@available(SwiftStdlib 5.7, *)
4+
extension Regex {
5+
public func mapOutput<NewOutput>(
6+
_ f: @escaping (Output) -> NewOutput
7+
) -> Regex<NewOutput> {
8+
.init(node: .mapOutput(
9+
NewOutput.self,
10+
{ f($0 as! Output) },
11+
self.root))
12+
}
13+
}
14+

Diff for: Sources/_StringProcessing/Utility/TypeVerification.swift

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,18 @@
1414
@available(SwiftStdlib 5.7, *)
1515
extension Regex {
1616
internal func _verifyType() -> Bool {
17+
guard Output.self != AnyRegexOutput.self else { return true }
18+
1719
var tupleElements: [Any.Type] = [Substring.self]
1820
var labels = " "
1921

2022
for capture in program.tree.root._captureList.captures {
2123
var captureType: Any.Type = capture.type ?? Substring.self
22-
var i = capture.optionalDepth
23-
24-
while i != 0 {
24+
25+
for _ in 0..<capture.optionalDepth {
2526
captureType = TypeConstruction.optionalType(of: captureType)
26-
i -= 1
2727
}
28-
28+
2929
tupleElements.append(captureType)
3030

3131
if let name = capture.name {
@@ -47,7 +47,7 @@ extension Regex {
4747
// to the tuple. In that case, don't pass a label string.
4848
labels: labels.all { $0 == " " } ? nil : labels
4949
)
50-
50+
5151
return Output.self == createdType
5252
}
5353
}

Diff for: Tests/RegexBuilderTests/AnyRegexOutputTests.swift

+113-50
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ import RegexBuilder
55

66
private let enablePrinting = false
77

8+
func addNote(
9+
_ in: (Substring, Substring, Substring, Substring, Substring)
10+
) -> (Substring, Substring, note: Substring, Substring, Substring) {
11+
return `in`
12+
}
13+
func removeName(
14+
_ in: (Substring, Substring, Substring, Substring, Substring)
15+
) -> (Substring, Substring, Substring, Substring, Substring) {
16+
return `in`
17+
}
18+
819
extension RegexDSLTests {
920

1021
func testContrivedAROExample() {
@@ -139,7 +150,7 @@ extension RegexDSLTests {
139150
let aro = Regex<AnyRegexOutput>(re)
140151

141152
// FIXME: The below fatal errors
142-
let casted = aro//try! XCTUnwrap(Regex(aro, as: Output.self))
153+
let casted = try! XCTUnwrap(Regex(aro, as: Output.self))
143154

144155
// contains(captureNamed:)
145156
checkContains(re, kind)
@@ -153,71 +164,123 @@ extension RegexDSLTests {
153164
}
154165

155166
// Literals (mocked up via explicit `as` types)
156-
check(try! Regex(#"""
167+
let noCapBody = #"""
157168
(?x)
158169
\p{hexdigit}{4} -? \p{hexdigit}{4} -?
159170
\p{hexdigit}{4} -? \p{hexdigit}{4}
160-
"""#, as: Substring.self),
161-
.none,
162-
noCapOutput
163-
)
164-
check(try! Regex(#"""
171+
"""#
172+
let noCapType = Substring.self
173+
174+
let unnamedBody = #"""
165175
(?x)
166176
(\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -?
167177
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
168-
"""#, as: (Substring, Substring, Substring, Substring, Substring).self),
169-
.unnamed,
170-
unnamedOutput
171-
)
172-
check(try! Regex(#"""
178+
"""#
179+
let unnamedType = (Substring, Substring, Substring, Substring, Substring).self
180+
181+
let salientBody = #"""
173182
(?x)
174183
(\p{hexdigit}{4}) -? (?<salient>\p{hexdigit}{4}) -?
175184
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
176-
"""#, as: (Substring, Substring, Substring, Substring, Substring).self),
177-
.salient,
178-
salientOutput
179-
)
180-
check(try! Regex(#"""
185+
"""#
186+
let salientType = (Substring, Substring, salient: Substring, Substring, Substring).self
187+
188+
let noteBody = #"""
181189
(?x)
182190
(\p{hexdigit}{4}) -? (?<note>\p{hexdigit}{4}) -?
183191
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
184-
"""#, as: (Substring, Substring, Substring, Substring, Substring).self),
192+
"""#
193+
let noteType = (Substring, Substring, note: Substring, Substring, Substring).self
194+
195+
let unknownBody = #"""
196+
(?x)
197+
(\p{hexdigit}{4}) -? (?<unknown>\p{hexdigit}{4}) -?
198+
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
199+
"""#
200+
let unknownType = (Substring, Substring, unknown: Substring, Substring, Substring).self
201+
202+
// TODO: unknown body tests?
203+
204+
205+
// Literals (mocked via exactly matching explicit types)
206+
check(
207+
try! Regex(noCapBody, as: noCapType),
208+
.none,
209+
noCapOutput
210+
)
211+
check(
212+
try! Regex(unnamedBody, as: unnamedType),
213+
.unnamed,
214+
unnamedOutput
215+
)
216+
check(
217+
try! Regex(salientBody, as: salientType),
218+
.salient,
219+
salientOutput
220+
)
221+
check(
222+
try! Regex(noteBody, as: noteType),
185223
.note,
186224
noteOutput
187225
)
226+
// Unknown behaves same as unnamed
227+
check(
228+
try! Regex(unknownBody, as: unknownType),
229+
.unnamed,
230+
unnamedOutput
231+
)
232+
233+
// TODO: Try regexes `as` different types to pick up different behavior
234+
235+
// TODO: A `mapOutput` variant that takes no-cap and produces captures
236+
// by matching the other regexes inside the mapping
188237

189238
// Run-time strings (ARO)
190-
check(try! Regex(#"""
191-
(?x)
192-
\p{hexdigit}{4} -? \p{hexdigit}{4} -?
193-
\p{hexdigit}{4} -? \p{hexdigit}{4}
194-
"""#),
239+
check(
240+
try! Regex(noCapBody),
195241
.none,
196242
noCapOutput)
197-
check(try! Regex(#"""
198-
(?x)
199-
(\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -?
200-
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
201-
"""#),
243+
check(
244+
try! Regex(unnamedBody),
202245
.unnamed,
203246
unnamedOutput
204247
)
205-
check(try! Regex(#"""
206-
(?x)
207-
(\p{hexdigit}{4}) -? (?<salient>\p{hexdigit}{4}) -?
208-
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
209-
"""#),
248+
check(
249+
try! Regex(salientBody),
210250
.salient,
211251
salientOutput
212252
)
213-
check(try! Regex(#"""
214-
(?x)
215-
(\p{hexdigit}{4}) -? (?<note>\p{hexdigit}{4}) -?
216-
(\p{hexdigit}{4}) -? (\p{hexdigit}{4})
217-
"""#),
253+
check(
254+
try! Regex(noteBody),
218255
.note,
219256
noteOutput
220257
)
258+
// Unknown behaves same as no names
259+
check(
260+
try! Regex(unknownBody),
261+
.unnamed,
262+
unnamedOutput
263+
)
264+
265+
// // Use `mapOutput` to add or remove capture names
266+
// check(
267+
// try! Regex(unnamedBody).mapOutput(addNote),
268+
// .note,
269+
// noteOutput
270+
// )
271+
// check(
272+
// try! Regex(salientBody).mapOutput(addNote),
273+
// .note,
274+
// noteOutput
275+
// )
276+
// check(try! Regex(#"""
277+
// (?x)
278+
// (\p{hexdigit}{4}) -? (?<salient>\p{hexdigit}{4}) -?
279+
// (\p{hexdigit}{4}) -? (\p{hexdigit}{4})
280+
// """#).mapOutput(removeName),
281+
// .unnamed,
282+
// unnamedOutput
283+
// )
221284

222285
// Builders
223286
check(
@@ -234,23 +297,23 @@ extension RegexDSLTests {
234297
.none,
235298
noCapOutput
236299
)
300+
let capDSL = Regex {
301+
let doublet = Repeat(.hexDigit, count: 4)
302+
Capture { doublet }
303+
Optionally { "-" }
304+
Capture { doublet }
305+
Optionally { "-" }
306+
Capture { doublet }
307+
Optionally { "-" }
308+
Capture { doublet }
309+
}
237310
check(
238-
Regex {
239-
let doublet = Repeat(.hexDigit, count: 4)
240-
Capture { doublet }
241-
Optionally { "-" }
242-
Capture { doublet }
243-
Optionally { "-" }
244-
Capture { doublet }
245-
Optionally { "-" }
246-
Capture { doublet }
247-
},
311+
capDSL,
248312
.unnamed,
249313
unnamedOutput
250314
)
251315

252-
// FIXME: `salient` and `note` builders using a semantically rich
253-
// `mapOutput`
316+
// TODO: add first-class capture names via `mapOutput` to DSL test
254317

255318
}
256319
}

0 commit comments

Comments
 (0)