Skip to content

Commit 662a8b9

Browse files
committed
cli: change --no-captures to --captures (all|implicit|none)
When we added the WhichCaptures type, we didn't update the CLI to expose the full functionality. This change does that.
1 parent 6b72eec commit 662a8b9

File tree

4 files changed

+60
-20
lines changed

4 files changed

+60
-20
lines changed

regex-automata/src/nfa/thompson/map.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ const INIT: u64 = 14695981039346656037;
6565
/// Specifically, one could observe the difference with std's hashmap via
6666
/// something like the following benchmark:
6767
///
68-
/// hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
68+
/// hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
6969
///
7070
/// But to observe that difference, you'd have to modify the code to use
7171
/// std's hashmap.

regex-automata/src/nfa/thompson/range_trie.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ impl State {
594594
// Benchmarks suggest that binary search is just a bit faster than
595595
// straight linear search. Specifically when using the debug tool:
596596
//
597-
// hyperfine "regex-cli debug thompson -qr --no-captures '\w{90} ecurB'"
597+
// hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
598598
binary_search(&self.transitions, |t| range.start <= t.range.end)
599599
}
600600

regex-cli/args/flags.rs

+52
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,55 @@ impl std::str::FromStr for MatchKind {
152152
Ok(MatchKind { kind })
153153
}
154154
}
155+
156+
/// Provides an implementation of the --captures flag, for use with Thompson
157+
/// NFA configuration.
158+
#[derive(Debug)]
159+
pub struct WhichCaptures {
160+
pub which: regex_automata::nfa::thompson::WhichCaptures,
161+
}
162+
163+
impl WhichCaptures {
164+
pub const USAGE: Usage = Usage::new(
165+
"--captures <which>",
166+
"One of: all, implicit or none.",
167+
r#"
168+
Selects which capture states should be included in the Thompson NFA. The
169+
choices are 'all' (the default), 'implicit' or 'none'.
170+
171+
'all' means that both explicit and implicit capture states are included.
172+
173+
'implicit' means that only implicit capture states are included. That is, the
174+
Thompson NFA will only be able to report the overall match offsets and not the
175+
match offsets of each explicit capture group.
176+
177+
'none' means that no capture states will be included. This is useful when
178+
capture states aren't needed (like when building a DFA) or if they aren't
179+
supported (like when building a reverse NFA).
180+
"#,
181+
);
182+
}
183+
184+
impl Default for WhichCaptures {
185+
fn default() -> WhichCaptures {
186+
WhichCaptures {
187+
which: regex_automata::nfa::thompson::WhichCaptures::All,
188+
}
189+
}
190+
}
191+
192+
impl std::str::FromStr for WhichCaptures {
193+
type Err = anyhow::Error;
194+
195+
fn from_str(s: &str) -> anyhow::Result<WhichCaptures> {
196+
let which = match s {
197+
"all" => regex_automata::nfa::thompson::WhichCaptures::All,
198+
"implicit" => {
199+
regex_automata::nfa::thompson::WhichCaptures::Implicit
200+
}
201+
"none" => regex_automata::nfa::thompson::WhichCaptures::None,
202+
unk => anyhow::bail!("unrecognized captures option '{}'", unk),
203+
};
204+
Ok(WhichCaptures { which })
205+
}
206+
}

regex-cli/args/thompson.rs

+6-18
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ impl Configurable for Config {
7070
Arg::Long("shrink") => {
7171
self.thompson = self.thompson.clone().shrink(true);
7272
}
73-
Arg::Long("no-captures") => {
74-
self.thompson = self
75-
.thompson
76-
.clone()
77-
.which_captures(thompson::WhichCaptures::None);
73+
Arg::Long("captures") => {
74+
let which: flags::WhichCaptures =
75+
args::parse(p, "--captures")?;
76+
self.thompson =
77+
self.thompson.clone().which_captures(which.which);
7878
}
7979
Arg::Long("line-terminator") => {
8080
let byte: flags::OneByte =
@@ -136,19 +136,7 @@ spent shrinking the NFA can lead to far larger savings in the subsequent DFA
136136
determinization.
137137
"#,
138138
),
139-
Usage::new(
140-
"--no-captures",
141-
"Disable capture states.",
142-
r#"
143-
Disables capture states. By default, NFAs include special "capture" states that
144-
instruct some regex engines (like the PikeVM) to record offset positions in
145-
ancillary state.
146-
147-
It can be useful to disable capture states in order to reduce "clutter" in the
148-
automaton when debugging it. Also, at time of writing, reverse NFAs require
149-
that capture groups are disabled.
150-
"#,
151-
),
139+
flags::WhichCaptures::USAGE,
152140
Usage::new(
153141
"--line-terminator",
154142
"Set the line terminator used by line anchors.",

0 commit comments

Comments
 (0)