From 5ba3f316e1b26c4d7845a094374814b7e1d31281 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sun, 13 Jul 2025 12:38:21 +0200 Subject: [PATCH 1/4] add support for ./x check src/tools/linkchecker --- src/bootstrap/src/core/build_steps/check.rs | 6 ++++++ src/bootstrap/src/core/builder/mod.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/src/bootstrap/src/core/build_steps/check.rs b/src/bootstrap/src/core/build_steps/check.rs index 3278b55305c8b..efdb6c993b5ac 100644 --- a/src/bootstrap/src/core/build_steps/check.rs +++ b/src/bootstrap/src/core/build_steps/check.rs @@ -628,3 +628,9 @@ tool_check_step!(CoverageDump { mode: Mode::ToolBootstrap, default: false }); + +tool_check_step!(Linkchecker { + path: "src/tools/linkchecker", + mode: Mode::ToolBootstrap, + default: false +}); diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs index 1b75d00b30e4c..504a6890906f2 100644 --- a/src/bootstrap/src/core/builder/mod.rs +++ b/src/bootstrap/src/core/builder/mod.rs @@ -1030,6 +1030,7 @@ impl<'a> Builder<'a> { check::Compiletest, check::FeaturesStatusDump, check::CoverageDump, + check::Linkchecker, // This has special staging logic, it may run on stage 1 while others run on stage 0. // It takes quite some time to build stage 1, so put this at the end. // From 3a4c287df5c157ad061c8e0f46617018128d3f8f Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sun, 13 Jul 2025 12:40:32 +0200 Subject: [PATCH 2/4] bump linkchecker to edition 2024 --- src/tools/linkchecker/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml index 7123d43eb564c..fb5bff3fe63ff 100644 --- a/src/tools/linkchecker/Cargo.toml +++ b/src/tools/linkchecker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "linkchecker" version = "0.1.0" -edition = "2021" +edition = "2024" [[bin]] name = "linkchecker" From 97e9d747a8eb30f6e2372dc6b4423c806f29c3e3 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Tue, 1 Jul 2025 10:57:52 +0200 Subject: [PATCH 3/4] add an argument parser to linkchecker --- src/tools/linkchecker/main.rs | 51 +++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 84cba3f8c4473..4b45692e8490f 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -18,11 +18,11 @@ use std::cell::{Cell, RefCell}; use std::collections::{HashMap, HashSet}; +use std::fs; use std::io::ErrorKind; use std::path::{Component, Path, PathBuf}; use std::rc::Rc; use std::time::Instant; -use std::{env, fs}; use html5ever::tendril::ByteTendril; use html5ever::tokenizer::{ @@ -110,10 +110,20 @@ macro_rules! t { }; } +struct Cli { + docs: PathBuf, +} + fn main() { - let docs = env::args_os().nth(1).expect("doc path should be first argument"); - let docs = env::current_dir().unwrap().join(docs); - let mut checker = Checker { root: docs.clone(), cache: HashMap::new() }; + let cli = match parse_cli() { + Ok(cli) => cli, + Err(err) => { + eprintln!("error: {err}"); + usage_and_exit(1); + } + }; + + let mut checker = Checker { root: cli.docs.clone(), cache: HashMap::new() }; let mut report = Report { errors: 0, start: Instant::now(), @@ -125,7 +135,7 @@ fn main() { intra_doc_exceptions: 0, has_broken_urls: false, }; - checker.walk(&docs, &mut report); + checker.walk(&cli.docs, &mut report); report.report(); if report.errors != 0 { println!("found some broken links"); @@ -133,6 +143,37 @@ fn main() { } } +fn parse_cli() -> Result { + fn to_canonical_path(arg: &str) -> Result { + PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}")) + } + + let mut verbatim = false; + let mut docs = None; + + let mut args = std::env::args().skip(1); + while let Some(arg) = args.next() { + if !verbatim && arg == "--" { + verbatim = true; + } else if !verbatim && (arg == "-h" || arg == "--help") { + usage_and_exit(0) + } else if !verbatim && arg.starts_with('-') { + return Err(format!("unknown flag: {arg}")); + } else if docs.is_none() { + docs = Some(arg); + } else { + return Err("too many positional arguments".into()); + } + } + + Ok(Cli { docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)? }) +} + +fn usage_and_exit(code: i32) -> ! { + eprintln!("usage: linkchecker "); + std::process::exit(code) +} + struct Checker { root: PathBuf, cache: Cache, From 299546740b9b84bf0f5e53aceb60f25b777b8ecd Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Tue, 1 Jul 2025 12:16:35 +0200 Subject: [PATCH 4/4] add --link-targets-dir flag to linkchecker --- src/tools/linkchecker/main.rs | 41 ++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4b45692e8490f..7c2cee2e32e63 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -17,9 +17,11 @@ //! should catch the majority of "broken link" cases. use std::cell::{Cell, RefCell}; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fs; use std::io::ErrorKind; +use std::iter::once; use std::path::{Component, Path, PathBuf}; use std::rc::Rc; use std::time::Instant; @@ -112,6 +114,7 @@ macro_rules! t { struct Cli { docs: PathBuf, + link_targets_dirs: Vec, } fn main() { @@ -123,7 +126,11 @@ fn main() { } }; - let mut checker = Checker { root: cli.docs.clone(), cache: HashMap::new() }; + let mut checker = Checker { + root: cli.docs.clone(), + link_targets_dirs: cli.link_targets_dirs, + cache: HashMap::new(), + }; let mut report = Report { errors: 0, start: Instant::now(), @@ -150,6 +157,7 @@ fn parse_cli() -> Result { let mut verbatim = false; let mut docs = None; + let mut link_targets_dirs = Vec::new(); let mut args = std::env::args().skip(1); while let Some(arg) = args.next() { @@ -157,6 +165,12 @@ fn parse_cli() -> Result { verbatim = true; } else if !verbatim && (arg == "-h" || arg == "--help") { usage_and_exit(0) + } else if !verbatim && arg == "--link-targets-dir" { + link_targets_dirs.push(to_canonical_path( + &args.next().ok_or("missing value for --link-targets-dir")?, + )?); + } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") { + link_targets_dirs.push(to_canonical_path(value)?); } else if !verbatim && arg.starts_with('-') { return Err(format!("unknown flag: {arg}")); } else if docs.is_none() { @@ -166,16 +180,20 @@ fn parse_cli() -> Result { } } - Ok(Cli { docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)? }) + Ok(Cli { + docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)?, + link_targets_dirs, + }) } fn usage_and_exit(code: i32) -> ! { - eprintln!("usage: linkchecker "); + eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]"); std::process::exit(code) } struct Checker { root: PathBuf, + link_targets_dirs: Vec, cache: Cache, } @@ -468,15 +486,23 @@ impl Checker { let pretty_path = file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string(); - let entry = - self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) { + for base in once(&self.root).chain(self.link_targets_dirs.iter()) { + let entry = self.cache.entry(pretty_path.clone()); + if let Entry::Occupied(e) = &entry + && !matches!(e.get(), FileEntry::Missing) + { + break; + } + + let file = base.join(&pretty_path); + entry.insert_entry(match fs::metadata(&file) { Ok(metadata) if metadata.is_dir() => FileEntry::Dir, Ok(_) => { if file.extension().and_then(|s| s.to_str()) != Some("html") { FileEntry::OtherFile } else { report.html_files += 1; - load_html_file(file, report) + load_html_file(&file, report) } } Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing, @@ -492,6 +518,9 @@ impl Checker { panic!("unexpected read error for {}: {}", file.display(), e); } }); + } + + let entry = self.cache.get(&pretty_path).unwrap(); (pretty_path, entry) } }