Skip to content

Commit 47e944c

Browse files
author
Marek Suchánek
committed
Better error reporting for regular expressions
1 parent 07489c6 commit 47e944c

File tree

3 files changed

+36
-54
lines changed

3 files changed

+36
-54
lines changed

src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ mod write;
2222

2323
pub use module::{ContentType, Input, Module};
2424

25+
/// newdoc uses many regular expressions at several places. Constructing them should never fail,
26+
/// because the pattern doesn't change at runtime, but in case it does, present a unified
27+
/// error message through `expect`.
28+
const REGEX_ERROR: &str = "Failed to construct a regular expression. Please report this as a bug";
29+
2530
/// This struct stores options based on the command-line arguments,
2631
/// and is passed to various functions across the program.
2732
#[derive(Debug, Clone)]

src/templating.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use askama::Template;
22
use regex::{Regex, RegexBuilder};
33

44
use crate::module::{ContentType, Input};
5+
use crate::REGEX_ERROR;
56

67
// A note on the structure of this file:
78
// This file repeats a lot of code when it configures the Askama templates.
@@ -121,22 +122,22 @@ impl Input {
121122
.multi_line(true)
122123
.swap_greed(true)
123124
.build()
124-
.unwrap();
125+
.expect(REGEX_ERROR);
125126
document = multi_comments.replace_all(&document, "").to_string();
126127

127128
// Delete single-line comments
128129
let single_comments: Regex = RegexBuilder::new(r"^//.*\n")
129130
.multi_line(true)
130131
.swap_greed(true)
131132
.build()
132-
.unwrap();
133+
.expect(REGEX_ERROR);
133134
document = single_comments.replace_all(&document, "").to_string();
134135

135136
// Delete leading white space left over by the deleted comments
136137
let leading_whitespace: Regex = RegexBuilder::new(r"^[\s\n]*")
137138
.multi_line(true)
138139
.build()
139-
.unwrap();
140+
.expect(REGEX_ERROR);
140141
document = leading_whitespace.replace(&document, "").to_string();
141142
}
142143

src/validation.rs

Lines changed: 27 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use color_eyre::eyre::{Context, Result};
88
use regex::{Regex, RegexBuilder};
99

1010
use crate::module::ContentType;
11+
use crate::REGEX_ERROR;
1112

1213
#[derive(Clone, Copy, Debug)]
1314
struct IssueDefinition {
@@ -43,7 +44,7 @@ impl IssueDefinition {
4344
let regex = RegexBuilder::new(self.pattern)
4445
.multi_line(true)
4546
.build()
46-
.unwrap();
47+
.expect(REGEX_ERROR);
4748
let findings = regex.find_iter(content);
4849

4950
findings
@@ -55,7 +56,7 @@ impl IssueDefinition {
5556
.collect()
5657
// If single-line:
5758
} else {
58-
let regex = Regex::new(self.pattern).unwrap();
59+
let regex = Regex::new(self.pattern).expect(REGEX_ERROR);
5960
let findings = content
6061
.lines()
6162
.enumerate()
@@ -200,13 +201,7 @@ fn check_common(content: &str) -> Vec<IssueReport> {
200201

201202
// This section groups all title requirements
202203
mod title {
203-
use crate::validation::find_first_occurrence;
204-
use crate::validation::find_mod_id;
205-
use crate::validation::perform_simple_tests;
206-
use crate::validation::IssueDefinition;
207-
use crate::validation::IssueReport;
208-
use crate::validation::IssueSeverity;
209-
use regex::Regex;
204+
use super::*;
210205

211206
const SIMPLE_TITLE_TESTS: [IssueDefinition; 1] = [
212207
// Test that there are no inline anchors in the title
@@ -237,14 +232,14 @@ mod title {
237232
/// Find the first occurence of any heading in the file.
238233
/// Returns the line number of the occurence and the line.
239234
fn find_first_heading(content: &str) -> Option<(usize, &str)> {
240-
let any_heading_regex = Regex::new(r"^(\.|=+\s+)\S+.*").unwrap();
235+
let any_heading_regex = Regex::new(r"^(\.|=+\s+)\S+.*").expect(REGEX_ERROR);
241236

242237
find_first_occurrence(content, &any_heading_regex)
243238
}
244239

245240
/// Check that the first heading found in the file is a title: a level-1, numbered heading
246241
fn check_title_level(content: &str) -> Option<IssueReport> {
247-
let title_regex = Regex::new(r"^=\s+\S+.*").unwrap();
242+
let title_regex = Regex::new(r"^=\s+\S+.*").expect(REGEX_ERROR);
248243

249244
if let Some((line_no, heading)) = find_first_heading(content) {
250245
if let Some(_title) = title_regex.find(heading) {
@@ -282,7 +277,7 @@ mod title {
282277
}
283278
};
284279

285-
let attribute_regex = Regex::new(r"\{((?:[[:alnum:]]|[-_])+)\}").unwrap();
280+
let attribute_regex = Regex::new(r"\{((?:[[:alnum:]]|[-_])+)\}").expect(REGEX_ERROR);
286281
let attribute = attribute_regex.captures(mod_id)?;
287282

288283
if attribute.get(1).unwrap().as_str() == "context" {
@@ -300,13 +295,7 @@ mod title {
300295

301296
// This section groups all content requirements
302297
mod content {
303-
use crate::validation::find_first_occurrence;
304-
use crate::validation::find_mod_id;
305-
use crate::validation::perform_simple_tests;
306-
use crate::validation::IssueDefinition;
307-
use crate::validation::IssueReport;
308-
use crate::validation::IssueSeverity;
309-
use regex::Regex;
298+
use super::*;
310299

311300
const SIMPLE_CONTENT_TESTS: [IssueDefinition; 2] = [
312301
IssueDefinition {
@@ -343,7 +332,7 @@ mod content {
343332
fn check_metadata_variable(content: &str) -> Vec<IssueReport> {
344333
let metadata_var_pattern =
345334
r":_content-type:\s*(?:ASSEMBLY|PROCEDURE|CONCEPT|REFERENCE|SNIPPET)";
346-
let metadata_var_regex = Regex::new(metadata_var_pattern).unwrap();
335+
let metadata_var_regex = Regex::new(metadata_var_pattern).expect(REGEX_ERROR);
347336
let metadata_var = find_first_occurrence(content, &metadata_var_regex);
348337

349338
let mod_id = find_mod_id(content);
@@ -380,7 +369,7 @@ mod content {
380369
/// Check that the abstract flag is followed by a paragraph,
381370
/// if it exists at all. The abstract flag is not required.
382371
fn check_abstract_flag(content: &str) -> Option<IssueReport> {
383-
let abstract_regex = Regex::new(r#"^\[role="_abstract"\]"#).unwrap();
372+
let abstract_regex = Regex::new(r#"^\[role="_abstract"\]"#).expect(REGEX_ERROR);
384373
let abstract_flag = find_first_occurrence(content, &abstract_regex);
385374

386375
// If the file contains an abstract flag, test for the following paragraph
@@ -400,7 +389,7 @@ mod content {
400389
// ⁠[systemitem]`firewalld` can be used to (...)
401390
// Let's just check that the line starts with a non-whitespace character
402391
// and that a letter appears at least somewhere.
403-
let paragraph_regex = Regex::new(r"^\S+[[:alpha:]].*").unwrap();
392+
let paragraph_regex = Regex::new(r"^\S+[[:alpha:]].*").expect(REGEX_ERROR);
404393
// If a line follows the flag but it doesn't appear as a paragraph, report the issue
405394
if paragraph_regex.find(next_line).is_none() {
406395
log::debug!("The non-paragraph-line: {:?}", next_line);
@@ -423,12 +412,7 @@ mod content {
423412
// This section groups all module requirements;
424413
// they depend on title and content, and additional resources requirements
425414
mod module {
426-
use crate::validation::check_common;
427-
use crate::validation::perform_simple_tests;
428-
use crate::validation::IssueDefinition;
429-
use crate::validation::IssueReport;
430-
use crate::validation::IssueSeverity;
431-
use regex::Regex;
415+
use super::*;
432416

433417
const SIMPLE_MODULE_TESTS: [IssueDefinition; 2] = [
434418
// Ensure the correct syntax for Additional resources
@@ -460,10 +444,10 @@ mod module {
460444
/// Test that modules include no other modules, except for snippets
461445
fn check_include_except_snip(content: &str) -> Vec<IssueReport> {
462446
let any_include_pattern = r"^include::.*\.adoc";
463-
let any_include_regex = Regex::new(any_include_pattern).unwrap();
447+
let any_include_regex = Regex::new(any_include_pattern).expect(REGEX_ERROR);
464448

465449
let snip_include_pattern = r"^include::((snip|.*/snip)[_-]|common-content/).*\.adoc";
466-
let snip_include_regex = Regex::new(snip_include_pattern).unwrap();
450+
let snip_include_regex = Regex::new(snip_include_pattern).expect(REGEX_ERROR);
467451

468452
let mut reports: Vec<IssueReport> = Vec::new();
469453

@@ -496,12 +480,7 @@ mod module {
496480
// This section groups all assembly requirements;
497481
// they depend on title and content, and additional resources requirements
498482
mod assembly {
499-
use crate::validation::check_common;
500-
use crate::validation::perform_simple_tests;
501-
use crate::validation::IssueDefinition;
502-
use crate::validation::IssueReport;
503-
use crate::validation::IssueSeverity;
504-
use regex::Regex;
483+
use super::*;
505484

506485
const SIMPLE_ASSEMBLY_TESTS: [IssueDefinition; 3] = [
507486
// Test that an assembly includes no other assemblies
@@ -547,10 +526,10 @@ mod assembly {
547526
/// * == Additional resources
548527
/// In addition, let's also assume that the legacy 'Related information' heading is fine. (TODO: Make sure.)
549528
fn check_headings_in_assembly(content: &str) -> Vec<IssueReport> {
550-
let heading_regex = Regex::new(r"^={2,}\s+\S.*").unwrap();
529+
let heading_regex = Regex::new(r"^={2,}\s+\S.*").expect(REGEX_ERROR);
551530
let standard_headings_pattern =
552531
r"^==\s+(?:Prerequisites|Additional resources|Related information)";
553-
let standard_headings_regex = Regex::new(standard_headings_pattern).unwrap();
532+
let standard_headings_regex = Regex::new(standard_headings_pattern).expect(REGEX_ERROR);
554533

555534
let mut lines_with_heading: Vec<usize> = Vec::new();
556535

@@ -580,12 +559,7 @@ mod assembly {
580559
}
581560

582561
mod additional_resources {
583-
use crate::validation::find_first_occurrence;
584-
use crate::validation::perform_simple_tests;
585-
use crate::validation::IssueDefinition;
586-
use crate::validation::IssueReport;
587-
use crate::validation::IssueSeverity;
588-
use regex::Regex;
562+
use super::*;
589563

590564
const SIMPLE_ADDITIONAL_RESOURCES_TESTS: [IssueDefinition; 0] = [
591565
// No simple tests at this point.
@@ -623,7 +597,7 @@ mod additional_resources {
623597
let add_res_regex = Regex::new(
624598
r"^(?:==\s+|\.)(?:Additional resources|Related information|Additional information)\s*$",
625599
)
626-
.unwrap();
600+
.expect(REGEX_ERROR);
627601

628602
find_first_occurrence(content, &add_res_regex)
629603
}
@@ -649,11 +623,13 @@ mod additional_resources {
649623
/// Check that the additional resources section is composed of list items, possibly with some ifdefs.
650624
fn check_paragraphs_in_add_res(lines: &[&str], heading_index: usize) -> Vec<IssueReport> {
651625
// This regex matches either a plain list item, or one that's embedded in an inline ifdef.
652-
let bullet_point_regex = Regex::new(r"(?:^\*+\s+\S+|^ifdef::\S+\[\*+\s+\S+.*\])").unwrap();
626+
let bullet_point_regex =
627+
Regex::new(r"(?:^\*+\s+\S+|^ifdef::\S+\[\*+\s+\S+.*\])").expect(REGEX_ERROR);
653628
// A paragraph that isn't a list item is allowed if it's an ifdef or a comment.
654-
let allowed_paragraph = Regex::new(r"^(?:ifdef::\S+\[.*]|endif::\[\]|//)").unwrap();
629+
let allowed_paragraph =
630+
Regex::new(r"^(?:ifdef::\S+\[.*]|endif::\[\]|//)").expect(REGEX_ERROR);
655631
// Let's try to use a loose definition of an empty paragraph as a whitespace paragraph.
656-
let empty_line_regex = Regex::new(r"^\s*$").unwrap();
632+
let empty_line_regex = Regex::new(r"^\s*$").expect(REGEX_ERROR);
657633

658634
let mut issues = Vec::new();
659635

@@ -694,7 +670,7 @@ mod additional_resources {
694670
/// Detect links with no labels after a certain point in the file,
695671
/// specifically after the additional resources heading.
696672
fn check_link_labels_in_add_res(lines: &[&str], heading_index: usize) -> Vec<IssueReport> {
697-
let link_regex = Regex::new(r"link:\S+\[]").unwrap();
673+
let link_regex = Regex::new(r"link:\S+\[]").expect(REGEX_ERROR);
698674

699675
let mut issues = Vec::new();
700676

@@ -716,7 +692,7 @@ mod additional_resources {
716692
fn check_additional_resource_length(lines: &[&str], heading_index: usize) -> Vec<IssueReport> {
717693
// This regex features capture groups to extract the content of the list item.
718694
let bullet_point_regex =
719-
Regex::new(r"^(?:\*+\s+(\S+.*)|ifdef::\S+\[\*+\s+(\S+.*)\])").unwrap();
695+
Regex::new(r"^(?:\*+\s+(\S+.*)|ifdef::\S+\[\*+\s+(\S+.*)\])").expect(REGEX_ERROR);
720696
// This is the number of words you need to write:
721697
// * The `program(1)` man page
722698
// Let's use that as the approximate upper limit.
@@ -752,7 +728,7 @@ mod additional_resources {
752728
/// Find the first occurence of an ID definition in the file.
753729
/// Returns the line number of the occurence and the line.
754730
fn find_mod_id(content: &str) -> Option<(usize, &str)> {
755-
let id_regex = Regex::new(r"^\[id=\S+\]").unwrap();
731+
let id_regex = Regex::new(r"^\[id=\S+\]").expect(REGEX_ERROR);
756732

757733
find_first_occurrence(content, &id_regex)
758734
}

0 commit comments

Comments
 (0)