Auto merge of #41992 - ollie27:linkchecker_base, r=alexcrichton

linkchecker: Add support for <base> tag

Add support for the HTML <base> tag as used by mdBook so The Unstable
Book can be checked.

Also cleanup a few things:
* Stop checking the name attribute. It should never have been used and
mdBook has since been fixed not to use it.
* Make sure we only check html files.
* Remove a few unnecessary allocations.

Finally, dead links in The Unstable Book have been fixed.
This commit is contained in:
bors 2017-05-15 12:59:31 +00:00
commit 75b0568123
11 changed files with 45 additions and 67 deletions

View File

@ -4,7 +4,7 @@ The tracking issue for this feature is: [#23121]
[#23121]: https://github.com/rust-lang/rust/issues/23121
See also [`slice_patterns`](slice-patterns.html).
See also [`slice_patterns`](language-features/slice-patterns.html).
------------------------

View File

@ -190,4 +190,4 @@ constraints, etc.
[llvm-docs]: http://llvm.org/docs/LangRef.html#inline-assembler-expressions
If you need more power and don't mind losing some of the niceties of
`asm!`, check out [global_asm](global_asm.html).
`asm!`, check out [global_asm](language-features/global_asm.html).

View File

@ -4,7 +4,7 @@ The tracking issue for this feature is: [#29641]
[#29641]: https://github.com/rust-lang/rust/issues/29641
See also [`box_syntax`](box-syntax.html)
See also [`box_syntax`](language-features/box-syntax.html)
------------------------

View File

@ -4,7 +4,7 @@ The tracking issue for this feature is: [#27779]
[#27779]: https://github.com/rust-lang/rust/issues/27779
See also [`box_patterns`](box-patterns.html)
See also [`box_patterns`](language-features/box-patterns.html)
------------------------

View File

@ -74,5 +74,5 @@ usages and placed the larger, single usage in the crate root.
If you don't need quite as much power and flexibility as
`global_asm!` provides, and you don't mind restricting your inline
assembly to `fn` bodies only, you might try the [asm](asm.html)
feature instead.
assembly to `fn` bodies only, you might try the
[asm](language-features/asm.html) feature instead.

View File

@ -8,6 +8,6 @@ This feature is part of "compiler plugins." It will often be used with the
[`plugin`] and `rustc_private` features as well. For more details, see
their docs.
[`plugin`]: plugin.html
[`plugin`]: language-features/plugin.html
------------------------

View File

@ -8,7 +8,7 @@ The tracking issue for this feature is: [#29597]
This feature is part of "compiler plugins." It will often be used with the
[`plugin_registrar`] and `rustc_private` features.
[`plugin_registrar`]: plugin-registrar.html
[`plugin_registrar`]: language-features/plugin-registrar.html
------------------------

View File

@ -4,7 +4,8 @@ The tracking issue for this feature is: [#23121]
[#23121]: https://github.com/rust-lang/rust/issues/23121
See also [`advanced_slice_patterns`](advanced-slice-patterns.html).
See also
[`advanced_slice_patterns`](language-features/advanced-slice-patterns.html).
------------------------

View File

@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
[#33082]: https://github.com/rust-lang/rust/issues/33082
See also [`alloc_system`](alloc-system.html).
See also [`alloc_system`](library-features/alloc-system.html).
------------------------

View File

@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
[#33082]: https://github.com/rust-lang/rust/issues/33082
See also [`alloc_jemalloc`](alloc-jemalloc.html).
See also [`alloc_jemalloc`](library-features/alloc-jemalloc.html).
------------------------

View File

@ -41,7 +41,7 @@ macro_rules! t {
}
fn main() {
let docs = env::args().nth(1).unwrap();
let docs = env::args_os().nth(1).unwrap();
let docs = env::current_dir().unwrap().join(docs);
let mut errors = false;
walk(&mut HashMap::new(), &docs, &docs, &mut errors);
@ -65,7 +65,6 @@ enum Redirect {
struct FileEntry {
source: String,
ids: HashSet<String>,
names: HashSet<String>,
}
type Cache = HashMap<PathBuf, FileEntry>;
@ -73,7 +72,7 @@ type Cache = HashMap<PathBuf, FileEntry>;
impl FileEntry {
fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
if self.ids.is_empty() {
with_attrs_in_source(contents, " id", |fragment, i| {
with_attrs_in_source(contents, " id", |fragment, i, _| {
let frag = fragment.trim_left_matches("#").to_owned();
if !self.ids.insert(frag) {
*errors = true;
@ -82,15 +81,6 @@ impl FileEntry {
});
}
}
fn parse_names(&mut self, contents: &str) {
if self.names.is_empty() {
with_attrs_in_source(contents, " name", |fragment, _| {
let frag = fragment.trim_left_matches("#").to_owned();
self.names.insert(frag);
});
}
}
}
fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
file: &Path,
errors: &mut bool)
-> Option<PathBuf> {
// ignore js files as they are not prone to errors as the rest of the
// documentation is and they otherwise bring up false positives.
if file.extension().and_then(|s| s.to_str()) == Some("js") {
return None;
}
// ignore handlebars files as they use {{}} to build links, we only
// want to test the generated files
if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
// Ignore none HTML files.
if file.extension().and_then(|s| s.to_str()) != Some("html") {
return None;
}
@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
return None;
}
// mdbook uses the HTML <base> tag to handle links for subdirectories, which
// linkchecker doesn't support
if file.to_str().unwrap().contains("unstable-book") {
return None;
}
let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
let res = load_file(cache, root, file, SkipRedirect);
let (pretty_file, contents) = match res {
Ok(res) => res,
Err(_) => return None,
@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
cache.get_mut(&pretty_file)
.unwrap()
.parse_ids(&pretty_file, &contents, errors);
cache.get_mut(&pretty_file)
.unwrap()
.parse_names(&contents);
}
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
with_attrs_in_source(&contents, " href", |url, i| {
with_attrs_in_source(&contents, " href", |url, i, base| {
// Ignore external URLs
if url.starts_with("http:") || url.starts_with("https:") ||
url.starts_with("javascript:") || url.starts_with("ftp:") ||
@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
// Once we've plucked out the URL, parse it using our base url and
// then try to extract a file path.
let mut path = file.to_path_buf();
if !url.is_empty() {
if !base.is_empty() || !url.is_empty() {
path.pop();
for part in Path::new(url).components() {
for part in Path::new(base).join(url).components() {
match part {
Component::Prefix(_) |
Component::RootDir => panic!(),
@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
}
}
if let Some(extension) = path.extension() {
// don't check these files
if extension == "png" {
return;
}
}
// Alright, if we've found a file name then this file had better
// exist! If it doesn't then we register and print an error.
if path.exists() {
@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
pretty_path.display());
return;
}
let res = load_file(cache, root, path.clone(), FromRedirect(false));
if let Some(extension) = path.extension() {
// Ignore none HTML files.
if extension != "html" {
return;
}
}
let res = load_file(cache, root, &path, FromRedirect(false));
let (pretty_path, contents) = match res {
Ok(res) => res,
Err(LoadError::IOError(err)) => {
panic!(format!("error loading {}: {}", path.display(), err));
panic!("error loading {}: {}", path.display(), err);
}
Err(LoadError::BrokenRedirect(target, _)) => {
*errors = true;
@ -245,11 +218,10 @@ fn check(cache: &mut Cache,
let entry = &mut cache.get_mut(&pretty_path).unwrap();
entry.parse_ids(&pretty_path, &contents, errors);
entry.parse_names(&contents);
if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
if !entry.ids.contains(*fragment) {
*errors = true;
print!("{}:{}: broken link fragment ",
print!("{}:{}: broken link fragment ",
pretty_file.display(),
i + 1);
println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
@ -267,7 +239,7 @@ fn check(cache: &mut Cache,
fn load_file(cache: &mut Cache,
root: &Path,
mut file: PathBuf,
file: &Path,
redirect: Redirect)
-> Result<(PathBuf, String), LoadError> {
let mut contents = String::new();
@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
None
}
Entry::Vacant(entry) => {
let mut fp = File::open(file.clone()).map_err(|err| {
let mut fp = File::open(file).map_err(|err| {
if let FromRedirect(true) = redirect {
LoadError::BrokenRedirect(file.clone(), err)
LoadError::BrokenRedirect(file.to_path_buf(), err)
} else {
LoadError::IOError(err)
}
@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
entry.insert(FileEntry {
source: contents.clone(),
ids: HashSet::new(),
names: HashSet::new(),
});
}
maybe
}
};
file.pop();
match maybe_redirect.map(|url| file.join(url)) {
match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
Some(redirect_file) => {
let path = PathBuf::from(redirect_file);
load_file(cache, root, path, FromRedirect(true))
load_file(cache, root, &redirect_file, FromRedirect(true))
}
None => Ok((pretty_file, contents)),
}
@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
})
}
fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
let mut base = "";
for (i, mut line) in contents.lines().enumerate() {
while let Some(j) = line.find(attr) {
let rest = &line[j + attr.len()..];
// The base tag should always be the first link in the document so
// we can get away with using one pass.
let is_base = line[..j].ends_with("<base");
line = rest;
let pos_equals = match rest.find("=") {
Some(i) => i,
@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
Some(i) => &rest[..i],
None => continue,
};
f(url, i)
if is_base {
base = url;
continue;
}
f(url, i, base)
}
}
}