tidy: exempt URLs from the line length restriction

The length of a URL is usually not under our control, and Markdown
provides no way to split a URL in the middle.  Therefore, comment
lines consisting _solely_ of a URL (possibly with a Markdown link
label in front) should be exempt from the line-length restriction.

Inline hyperlink destinations ( `[foo](http://...)` notation ) are
_not_ exempt, because it is my arrogant opinion that long lines of
that type make the source text illegible.

The patch adds dependencies on the `regex` and `lazy_static` crates
to the tidy utility.  This _appears_ to Just Work, but if you would
rather not have that dependency I am willing to provide a hand-written
parser instead.
This commit is contained in:
Zack Weinberg 2017-02-13 12:33:35 -05:00
parent 10f6a5c443
commit 5817351048
3 changed files with 36 additions and 2 deletions

View File

@ -4,3 +4,5 @@ version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[dependencies]
regex = "*"
lazy_static = "*"

View File

@ -14,6 +14,9 @@
//! etc. This is run by default on `make check` and as part of the auto
//! builders.
extern crate regex;
#[macro_use] extern crate lazy_static;
use std::fs;
use std::path::{PathBuf, Path};
use std::env;

View File

@ -26,6 +26,8 @@ use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use regex::Regex;
const COLS: usize = 100;
const LICENSE: &'static str = "\
Copyright <year> The Rust Project Developers. See the COPYRIGHT
@ -38,6 +40,32 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
option. This file may not be copied, modified, or distributed
except according to those terms.";
/// True if LINE is allowed to be longer than the normal limit.
///
/// Currently there is only one exception: if the line is within a
/// comment, and its entire text is one URL (possibly with a Markdown
/// link label in front), then it's allowed to be overlength. This is
/// because Markdown offers no way to split a line in the middle of a
/// URL, and the length of URLs for external references is beyond our
/// control.
fn long_line_is_ok(line: &str) -> bool {
lazy_static! {
static ref URL_RE: Regex = Regex::new(
// This regexp uses the CommonMark definition of link
// label. It thinks any sequence of nonwhitespace
// characters beginning with "http://" or "https://" is a
// URL. Add more schemas as necessary.
r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
).unwrap();
}
if URL_RE.is_match(line) {
return true;
}
false
}
pub fn check(path: &Path, bad: &mut bool) {
let mut contents = String::new();
super::walk(path, &mut super::filter_dirs, &mut |file| {
@ -61,8 +89,9 @@ pub fn check(path: &Path, bad: &mut bool) {
println!("{}:{}: {}", file.display(), i + 1, msg);
*bad = true;
};
if line.chars().count() > COLS && !skip_length {
err(&format!("line longer than {} chars", COLS));
if !skip_length && line.chars().count() > COLS
&& !long_line_is_ok(line) {
err(&format!("line longer than {} chars", COLS));
}
if line.contains("\t") && !skip_tab {
err("tab character");