Auto merge of #41992 - ollie27:linkchecker_base, r=alexcrichton
linkchecker: Add support for <base> tag Add support for the HTML <base> tag as used by mdBook so The Unstable Book can be checked. Also cleanup a few things: * Stop checking the name attribute. It should never have been used and mdBook has since been fixed not to use it. * Make sure we only check html files. * Remove a few unnecessary allocations. Finally, dead links in The Unstable Book have been fixed.
This commit is contained in:
commit
75b0568123
@ -4,7 +4,7 @@ The tracking issue for this feature is: [#23121]
|
||||
|
||||
[#23121]: https://github.com/rust-lang/rust/issues/23121
|
||||
|
||||
See also [`slice_patterns`](slice-patterns.html).
|
||||
See also [`slice_patterns`](language-features/slice-patterns.html).
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -190,4 +190,4 @@ constraints, etc.
|
||||
[llvm-docs]: http://llvm.org/docs/LangRef.html#inline-assembler-expressions
|
||||
|
||||
If you need more power and don't mind losing some of the niceties of
|
||||
`asm!`, check out [global_asm](global_asm.html).
|
||||
`asm!`, check out [global_asm](language-features/global_asm.html).
|
||||
|
@ -4,7 +4,7 @@ The tracking issue for this feature is: [#29641]
|
||||
|
||||
[#29641]: https://github.com/rust-lang/rust/issues/29641
|
||||
|
||||
See also [`box_syntax`](box-syntax.html)
|
||||
See also [`box_syntax`](language-features/box-syntax.html)
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -4,7 +4,7 @@ The tracking issue for this feature is: [#27779]
|
||||
|
||||
[#27779]: https://github.com/rust-lang/rust/issues/27779
|
||||
|
||||
See also [`box_patterns`](box-patterns.html)
|
||||
See also [`box_patterns`](language-features/box-patterns.html)
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -74,5 +74,5 @@ usages and placed the larger, single usage in the crate root.
|
||||
|
||||
If you don't need quite as much power and flexibility as
|
||||
`global_asm!` provides, and you don't mind restricting your inline
|
||||
assembly to `fn` bodies only, you might try the [asm](asm.html)
|
||||
feature instead.
|
||||
assembly to `fn` bodies only, you might try the
|
||||
[asm](language-features/asm.html) feature instead.
|
||||
|
@ -8,6 +8,6 @@ This feature is part of "compiler plugins." It will often be used with the
|
||||
[`plugin`] and `rustc_private` features as well. For more details, see
|
||||
their docs.
|
||||
|
||||
[`plugin`]: plugin.html
|
||||
[`plugin`]: language-features/plugin.html
|
||||
|
||||
------------------------
|
||||
|
@ -8,7 +8,7 @@ The tracking issue for this feature is: [#29597]
|
||||
This feature is part of "compiler plugins." It will often be used with the
|
||||
[`plugin_registrar`] and `rustc_private` features.
|
||||
|
||||
[`plugin_registrar`]: plugin-registrar.html
|
||||
[`plugin_registrar`]: language-features/plugin-registrar.html
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -4,7 +4,8 @@ The tracking issue for this feature is: [#23121]
|
||||
|
||||
[#23121]: https://github.com/rust-lang/rust/issues/23121
|
||||
|
||||
See also [`advanced_slice_patterns`](advanced-slice-patterns.html).
|
||||
See also
|
||||
[`advanced_slice_patterns`](language-features/advanced-slice-patterns.html).
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
|
||||
|
||||
[#33082]: https://github.com/rust-lang/rust/issues/33082
|
||||
|
||||
See also [`alloc_system`](alloc-system.html).
|
||||
See also [`alloc_system`](library-features/alloc-system.html).
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
|
||||
|
||||
[#33082]: https://github.com/rust-lang/rust/issues/33082
|
||||
|
||||
See also [`alloc_jemalloc`](alloc-jemalloc.html).
|
||||
See also [`alloc_jemalloc`](library-features/alloc-jemalloc.html).
|
||||
|
||||
------------------------
|
||||
|
||||
|
@ -41,7 +41,7 @@ macro_rules! t {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let docs = env::args().nth(1).unwrap();
|
||||
let docs = env::args_os().nth(1).unwrap();
|
||||
let docs = env::current_dir().unwrap().join(docs);
|
||||
let mut errors = false;
|
||||
walk(&mut HashMap::new(), &docs, &docs, &mut errors);
|
||||
@ -65,7 +65,6 @@ enum Redirect {
|
||||
struct FileEntry {
|
||||
source: String,
|
||||
ids: HashSet<String>,
|
||||
names: HashSet<String>,
|
||||
}
|
||||
|
||||
type Cache = HashMap<PathBuf, FileEntry>;
|
||||
@ -73,7 +72,7 @@ type Cache = HashMap<PathBuf, FileEntry>;
|
||||
impl FileEntry {
|
||||
fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
|
||||
if self.ids.is_empty() {
|
||||
with_attrs_in_source(contents, " id", |fragment, i| {
|
||||
with_attrs_in_source(contents, " id", |fragment, i, _| {
|
||||
let frag = fragment.trim_left_matches("#").to_owned();
|
||||
if !self.ids.insert(frag) {
|
||||
*errors = true;
|
||||
@ -82,15 +81,6 @@ impl FileEntry {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_names(&mut self, contents: &str) {
|
||||
if self.names.is_empty() {
|
||||
with_attrs_in_source(contents, " name", |fragment, _| {
|
||||
let frag = fragment.trim_left_matches("#").to_owned();
|
||||
self.names.insert(frag);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
|
||||
@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
|
||||
file: &Path,
|
||||
errors: &mut bool)
|
||||
-> Option<PathBuf> {
|
||||
// ignore js files as they are not prone to errors as the rest of the
|
||||
// documentation is and they otherwise bring up false positives.
|
||||
if file.extension().and_then(|s| s.to_str()) == Some("js") {
|
||||
return None;
|
||||
}
|
||||
|
||||
// ignore handlebars files as they use {{}} to build links, we only
|
||||
// want to test the generated files
|
||||
if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
|
||||
// Ignore none HTML files.
|
||||
if file.extension().and_then(|s| s.to_str()) != Some("html") {
|
||||
return None;
|
||||
}
|
||||
|
||||
@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
|
||||
return None;
|
||||
}
|
||||
|
||||
// mdbook uses the HTML <base> tag to handle links for subdirectories, which
|
||||
// linkchecker doesn't support
|
||||
if file.to_str().unwrap().contains("unstable-book") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
|
||||
let res = load_file(cache, root, file, SkipRedirect);
|
||||
let (pretty_file, contents) = match res {
|
||||
Ok(res) => res,
|
||||
Err(_) => return None,
|
||||
@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
|
||||
cache.get_mut(&pretty_file)
|
||||
.unwrap()
|
||||
.parse_ids(&pretty_file, &contents, errors);
|
||||
cache.get_mut(&pretty_file)
|
||||
.unwrap()
|
||||
.parse_names(&contents);
|
||||
}
|
||||
|
||||
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
|
||||
with_attrs_in_source(&contents, " href", |url, i| {
|
||||
with_attrs_in_source(&contents, " href", |url, i, base| {
|
||||
// Ignore external URLs
|
||||
if url.starts_with("http:") || url.starts_with("https:") ||
|
||||
url.starts_with("javascript:") || url.starts_with("ftp:") ||
|
||||
@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
|
||||
// Once we've plucked out the URL, parse it using our base url and
|
||||
// then try to extract a file path.
|
||||
let mut path = file.to_path_buf();
|
||||
if !url.is_empty() {
|
||||
if !base.is_empty() || !url.is_empty() {
|
||||
path.pop();
|
||||
for part in Path::new(url).components() {
|
||||
for part in Path::new(base).join(url).components() {
|
||||
match part {
|
||||
Component::Prefix(_) |
|
||||
Component::RootDir => panic!(),
|
||||
@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(extension) = path.extension() {
|
||||
// don't check these files
|
||||
if extension == "png" {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Alright, if we've found a file name then this file had better
|
||||
// exist! If it doesn't then we register and print an error.
|
||||
if path.exists() {
|
||||
@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
|
||||
pretty_path.display());
|
||||
return;
|
||||
}
|
||||
let res = load_file(cache, root, path.clone(), FromRedirect(false));
|
||||
if let Some(extension) = path.extension() {
|
||||
// Ignore none HTML files.
|
||||
if extension != "html" {
|
||||
return;
|
||||
}
|
||||
}
|
||||
let res = load_file(cache, root, &path, FromRedirect(false));
|
||||
let (pretty_path, contents) = match res {
|
||||
Ok(res) => res,
|
||||
Err(LoadError::IOError(err)) => {
|
||||
panic!(format!("error loading {}: {}", path.display(), err));
|
||||
panic!("error loading {}: {}", path.display(), err);
|
||||
}
|
||||
Err(LoadError::BrokenRedirect(target, _)) => {
|
||||
*errors = true;
|
||||
@ -245,11 +218,10 @@ fn check(cache: &mut Cache,
|
||||
|
||||
let entry = &mut cache.get_mut(&pretty_path).unwrap();
|
||||
entry.parse_ids(&pretty_path, &contents, errors);
|
||||
entry.parse_names(&contents);
|
||||
|
||||
if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
|
||||
if !entry.ids.contains(*fragment) {
|
||||
*errors = true;
|
||||
print!("{}:{}: broken link fragment ",
|
||||
print!("{}:{}: broken link fragment ",
|
||||
pretty_file.display(),
|
||||
i + 1);
|
||||
println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
|
||||
@ -267,7 +239,7 @@ fn check(cache: &mut Cache,
|
||||
|
||||
fn load_file(cache: &mut Cache,
|
||||
root: &Path,
|
||||
mut file: PathBuf,
|
||||
file: &Path,
|
||||
redirect: Redirect)
|
||||
-> Result<(PathBuf, String), LoadError> {
|
||||
let mut contents = String::new();
|
||||
@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
|
||||
None
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let mut fp = File::open(file.clone()).map_err(|err| {
|
||||
let mut fp = File::open(file).map_err(|err| {
|
||||
if let FromRedirect(true) = redirect {
|
||||
LoadError::BrokenRedirect(file.clone(), err)
|
||||
LoadError::BrokenRedirect(file.to_path_buf(), err)
|
||||
} else {
|
||||
LoadError::IOError(err)
|
||||
}
|
||||
@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
|
||||
entry.insert(FileEntry {
|
||||
source: contents.clone(),
|
||||
ids: HashSet::new(),
|
||||
names: HashSet::new(),
|
||||
});
|
||||
}
|
||||
maybe
|
||||
}
|
||||
};
|
||||
file.pop();
|
||||
match maybe_redirect.map(|url| file.join(url)) {
|
||||
match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
|
||||
Some(redirect_file) => {
|
||||
let path = PathBuf::from(redirect_file);
|
||||
load_file(cache, root, path, FromRedirect(true))
|
||||
load_file(cache, root, &redirect_file, FromRedirect(true))
|
||||
}
|
||||
None => Ok((pretty_file, contents)),
|
||||
}
|
||||
@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
|
||||
})
|
||||
}
|
||||
|
||||
fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
|
||||
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
|
||||
let mut base = "";
|
||||
for (i, mut line) in contents.lines().enumerate() {
|
||||
while let Some(j) = line.find(attr) {
|
||||
let rest = &line[j + attr.len()..];
|
||||
// The base tag should always be the first link in the document so
|
||||
// we can get away with using one pass.
|
||||
let is_base = line[..j].ends_with("<base");
|
||||
line = rest;
|
||||
let pos_equals = match rest.find("=") {
|
||||
Some(i) => i,
|
||||
@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
|
||||
Some(i) => &rest[..i],
|
||||
None => continue,
|
||||
};
|
||||
f(url, i)
|
||||
if is_base {
|
||||
base = url;
|
||||
continue;
|
||||
}
|
||||
f(url, i, base)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user