diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift index 2a5a47395..5af132637 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureList.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift @@ -158,7 +158,15 @@ extension CaptureList.Capture: CustomStringConvertible { typeStr = "Substring" } let suffix = String(repeating: "?", count: optionalDepth) - return typeStr + suffix + + let prefix: String + if let name = name { + prefix = name + ": " + } else { + prefix = "" + } + + return prefix + typeStr + suffix } } extension CaptureList: CustomStringConvertible { diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 3a91b6c67..c0e37f94a 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -717,6 +717,9 @@ extension Compiler.ByteCodeGen { case let .matcher(_, f): emitMatcher(f) + case let .mapOutput(retTy, fun, child): + fatalError() + case .transform: throw Unreachable( "Transforms only directly inside captures") diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 640fe3c93..fbce1422b 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -49,6 +49,8 @@ extension DSLTree.Node { fatalError("FIXME: Is this where we handle them?") case .characterPredicate: fatalError("FIXME: Is this where we handle them?") + case .mapOutput: + fatalError("FIXME: Is this where we handle them?") } } } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 2abcf49b5..3d45e6686 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -234,6 +234,9 @@ extension PrettyPrinter { case .characterPredicate: print("/* TODO: character predicates */") + case .mapOutput: + print("/* TODO: map output */") + case .absentFunction: print("/* TODO: absent function */") } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 8ca6dce8d..75e5bb727 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -97,6 +97,8 @@ extension DSLTree { // TODO: Would this just boil down to a consumer? case characterPredicate(_CharacterPredicateInterface) + + case mapOutput(Any.Type, _MapOutputInterface, Node) } } @@ -237,9 +239,11 @@ public typealias _MatcherInterface = ( // Character-set (post grapheme segmentation) @_spi(RegexBuilder) -public typealias _CharacterPredicateInterface = ( - (Character) -> Bool -) +public typealias _CharacterPredicateInterface = (Character) -> Bool + +// Output mapping +@_spi(RegexBuilder) +public typealias _MapOutputInterface = (Any) -> Any /* @@ -257,20 +261,22 @@ extension DSLTree.Node { public var children: [DSLTree.Node]? { switch self { - case let .orderedChoice(v): return v + case let .orderedChoice(v): return v case let .concatenation(v): return v case let .convertedRegexLiteral(n, _): // Treat this transparently return n.children - case let .capture(_, _, n): return [n] - case let .nonCapturingGroup(_, n): return [n] - case let .transform(_, n): return [n] - case let .quantification(_, _, n): return [n] + case let .capture(_, _, n): return [n] + case let .nonCapturingGroup(_, n): return [n] + case let .transform(_, n): return [n] + case let .quantification(_, _, n): return [n] + case let .mapOutput(_, _, n): return [n] case let .conditional(_, t, f): return [t,f] + case .trivia, .empty, .quotedLiteral, .regexLiteral, .consumer, .matcher, .characterPredicate, .customCharacterClass, .atom: @@ -513,6 +519,9 @@ extension DSLTree.Node { case .matcher: break + case let .mapOutput(retTy, _, _): + fatalError("Add retTy's contents to capture list") + case .transform(_, let child): child._addCaptures(to: &list, optionalNesting: nesting) @@ -549,10 +558,11 @@ extension DSLTree { // Treat this transparently return _Tree(n).children - case let .capture(_, _, n): return [_Tree(n)] - case let .nonCapturingGroup(_, n): return [_Tree(n)] - case let .transform(_, n): return [_Tree(n)] - case let .quantification(_, _, n): return [_Tree(n)] + case let .capture(_, _, n): return [_Tree(n)] + case let .nonCapturingGroup(_, n): return [_Tree(n)] + case let .transform(_, n): return [_Tree(n)] + case let .quantification(_, _, n): return [_Tree(n)] + case let .mapOutput(_, _, n): return [_Tree(n)] case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] diff --git a/Sources/_StringProcessing/Regex/MapOutput.swift b/Sources/_StringProcessing/Regex/MapOutput.swift new file mode 100644 index 000000000..e13ab4e2a --- /dev/null +++ b/Sources/_StringProcessing/Regex/MapOutput.swift @@ -0,0 +1,14 @@ + + +@available(SwiftStdlib 5.7, *) +extension Regex { + public func mapOutput( + _ f: @escaping (Output) -> NewOutput + ) -> Regex { + .init(node: .mapOutput( + NewOutput.self, + { f($0 as! Output) }, + self.root)) + } +} + diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift index df0b59f2c..ba7deb302 100644 --- a/Sources/_StringProcessing/Utility/TypeVerification.swift +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -14,18 +14,18 @@ @available(SwiftStdlib 5.7, *) extension Regex { internal func _verifyType() -> Bool { + guard Output.self != AnyRegexOutput.self else { return true } + var tupleElements: [Any.Type] = [Substring.self] var labels = " " for capture in program.tree.root._captureList.captures { var captureType: Any.Type = capture.type ?? Substring.self - var i = capture.optionalDepth - - while i != 0 { + + for _ in 0.. (Substring, Substring, note: Substring, Substring, Substring) { + return `in` +} +func removeName( + _ in: (Substring, Substring, Substring, Substring, Substring) +) -> (Substring, Substring, Substring, Substring, Substring) { + return `in` +} + extension RegexDSLTests { func testContrivedAROExample() { @@ -139,7 +150,7 @@ extension RegexDSLTests { let aro = Regex(re) // FIXME: The below fatal errors - let casted = aro//try! XCTUnwrap(Regex(aro, as: Output.self)) + let casted = try! XCTUnwrap(Regex(aro, as: Output.self)) // contains(captureNamed:) checkContains(re, kind) @@ -153,71 +164,123 @@ extension RegexDSLTests { } // Literals (mocked up via explicit `as` types) - check(try! Regex(#""" + let noCapBody = #""" (?x) \p{hexdigit}{4} -? \p{hexdigit}{4} -? \p{hexdigit}{4} -? \p{hexdigit}{4} - """#, as: Substring.self), - .none, - noCapOutput - ) - check(try! Regex(#""" + """# + let noCapType = Substring.self + + let unnamedBody = #""" (?x) (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#, as: (Substring, Substring, Substring, Substring, Substring).self), - .unnamed, - unnamedOutput - ) - check(try! Regex(#""" + """# + let unnamedType = (Substring, Substring, Substring, Substring, Substring).self + + let salientBody = #""" (?x) (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#, as: (Substring, Substring, Substring, Substring, Substring).self), - .salient, - salientOutput - ) - check(try! Regex(#""" + """# + let salientType = (Substring, Substring, salient: Substring, Substring, Substring).self + + let noteBody = #""" (?x) (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#, as: (Substring, Substring, Substring, Substring, Substring).self), + """# + let noteType = (Substring, Substring, note: Substring, Substring, Substring).self + + let unknownBody = #""" + (?x) + (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? + (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) + """# + let unknownType = (Substring, Substring, unknown: Substring, Substring, Substring).self + + // TODO: unknown body tests? + + + // Literals (mocked via exactly matching explicit types) + check( + try! Regex(noCapBody, as: noCapType), + .none, + noCapOutput + ) + check( + try! Regex(unnamedBody, as: unnamedType), + .unnamed, + unnamedOutput + ) + check( + try! Regex(salientBody, as: salientType), + .salient, + salientOutput + ) + check( + try! Regex(noteBody, as: noteType), .note, noteOutput ) + // Unknown behaves same as unnamed + check( + try! Regex(unknownBody, as: unknownType), + .unnamed, + unnamedOutput + ) + + // TODO: Try regexes `as` different types to pick up different behavior + + // TODO: A `mapOutput` variant that takes no-cap and produces captures + // by matching the other regexes inside the mapping // Run-time strings (ARO) - check(try! Regex(#""" - (?x) - \p{hexdigit}{4} -? \p{hexdigit}{4} -? - \p{hexdigit}{4} -? \p{hexdigit}{4} - """#), + check( + try! Regex(noCapBody), .none, noCapOutput) - check(try! Regex(#""" - (?x) - (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) -? - (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#), + check( + try! Regex(unnamedBody), .unnamed, unnamedOutput ) - check(try! Regex(#""" - (?x) - (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? - (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#), + check( + try! Regex(salientBody), .salient, salientOutput ) - check(try! Regex(#""" - (?x) - (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? - (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) - """#), + check( + try! Regex(noteBody), .note, noteOutput ) + // Unknown behaves same as no names + check( + try! Regex(unknownBody), + .unnamed, + unnamedOutput + ) + +// // Use `mapOutput` to add or remove capture names +// check( +// try! Regex(unnamedBody).mapOutput(addNote), +// .note, +// noteOutput +// ) +// check( +// try! Regex(salientBody).mapOutput(addNote), +// .note, +// noteOutput +// ) +// check(try! Regex(#""" +// (?x) +// (\p{hexdigit}{4}) -? (?\p{hexdigit}{4}) -? +// (\p{hexdigit}{4}) -? (\p{hexdigit}{4}) +// """#).mapOutput(removeName), +// .unnamed, +// unnamedOutput +// ) // Builders check( @@ -234,23 +297,23 @@ extension RegexDSLTests { .none, noCapOutput ) + let capDSL = Regex { + let doublet = Repeat(.hexDigit, count: 4) + Capture { doublet } + Optionally { "-" } + Capture { doublet } + Optionally { "-" } + Capture { doublet } + Optionally { "-" } + Capture { doublet } + } check( - Regex { - let doublet = Repeat(.hexDigit, count: 4) - Capture { doublet } - Optionally { "-" } - Capture { doublet } - Optionally { "-" } - Capture { doublet } - Optionally { "-" } - Capture { doublet } - }, + capDSL, .unnamed, unnamedOutput ) - // FIXME: `salient` and `note` builders using a semantically rich - // `mapOutput` + // TODO: add first-class capture names via `mapOutput` to DSL test } }