Skip to content

Add --link-targets-dir argument to linkchecker #143883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/bootstrap/src/core/build_steps/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,3 +628,9 @@ tool_check_step!(CoverageDump {
mode: Mode::ToolBootstrap,
default: false
});

tool_check_step!(Linkchecker {
path: "src/tools/linkchecker",
mode: Mode::ToolBootstrap,
default: false
});
1 change: 1 addition & 0 deletions src/bootstrap/src/core/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,7 @@ impl<'a> Builder<'a> {
check::Compiletest,
check::FeaturesStatusDump,
check::CoverageDump,
check::Linkchecker,
// This has special staging logic, it may run on stage 1 while others run on stage 0.
// It takes quite some time to build stage 1, so put this at the end.
//
Expand Down
2 changes: 1 addition & 1 deletion src/tools/linkchecker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "linkchecker"
version = "0.1.0"
edition = "2021"
edition = "2024"

[[bin]]
name = "linkchecker"
Expand Down
86 changes: 78 additions & 8 deletions src/tools/linkchecker/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
//! should catch the majority of "broken link" cases.

use std::cell::{Cell, RefCell};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::ErrorKind;
use std::iter::once;
use std::path::{Component, Path, PathBuf};
use std::rc::Rc;
use std::time::Instant;
use std::{env, fs};

use html5ever::tendril::ByteTendril;
use html5ever::tokenizer::{
Expand Down Expand Up @@ -110,10 +112,25 @@ macro_rules! t {
};
}

struct Cli {
docs: PathBuf,
link_targets_dirs: Vec<PathBuf>,
}

fn main() {
let docs = env::args_os().nth(1).expect("doc path should be first argument");
let docs = env::current_dir().unwrap().join(docs);
let mut checker = Checker { root: docs.clone(), cache: HashMap::new() };
let cli = match parse_cli() {
Ok(cli) => cli,
Err(err) => {
eprintln!("error: {err}");
usage_and_exit(1);
}
};

let mut checker = Checker {
root: cli.docs.clone(),
link_targets_dirs: cli.link_targets_dirs,
cache: HashMap::new(),
};
let mut report = Report {
errors: 0,
start: Instant::now(),
Expand All @@ -125,16 +142,58 @@ fn main() {
intra_doc_exceptions: 0,
has_broken_urls: false,
};
checker.walk(&docs, &mut report);
checker.walk(&cli.docs, &mut report);
report.report();
if report.errors != 0 {
println!("found some broken links");
std::process::exit(1);
}
}

fn parse_cli() -> Result<Cli, String> {
fn to_canonical_path(arg: &str) -> Result<PathBuf, String> {
PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}"))
}

let mut verbatim = false;
let mut docs = None;
let mut link_targets_dirs = Vec::new();

let mut args = std::env::args().skip(1);
while let Some(arg) = args.next() {
if !verbatim && arg == "--" {
verbatim = true;
} else if !verbatim && (arg == "-h" || arg == "--help") {
usage_and_exit(0)
} else if !verbatim && arg == "--link-targets-dir" {
link_targets_dirs.push(to_canonical_path(
&args.next().ok_or("missing value for --link-targets-dir")?,
)?);
} else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
link_targets_dirs.push(to_canonical_path(value)?);
} else if !verbatim && arg.starts_with('-') {
return Err(format!("unknown flag: {arg}"));
} else if docs.is_none() {
docs = Some(arg);
} else {
return Err("too many positional arguments".into());
}
}

Ok(Cli {
docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)?,
link_targets_dirs,
})
}

fn usage_and_exit(code: i32) -> ! {
eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
std::process::exit(code)
}

struct Checker {
root: PathBuf,
link_targets_dirs: Vec<PathBuf>,
cache: Cache,
}

Expand Down Expand Up @@ -427,15 +486,23 @@ impl Checker {
let pretty_path =
file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();

let entry =
self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
let entry = self.cache.entry(pretty_path.clone());
if let Entry::Occupied(e) = &entry
&& !matches!(e.get(), FileEntry::Missing)
{
break;
}

let file = base.join(&pretty_path);
entry.insert_entry(match fs::metadata(&file) {
Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
Ok(_) => {
if file.extension().and_then(|s| s.to_str()) != Some("html") {
FileEntry::OtherFile
} else {
report.html_files += 1;
load_html_file(file, report)
load_html_file(&file, report)
}
}
Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
Expand All @@ -451,6 +518,9 @@ impl Checker {
panic!("unexpected read error for {}: {}", file.display(), e);
}
});
}

let entry = self.cache.get(&pretty_path).unwrap();
(pretty_path, entry)
}
}
Expand Down
Loading