tidy: exempt URLs from the line length restriction

The length of a URL is usually not under our control, and Markdown provides no way to split a URL in the middle. Therefore, comment lines consisting _solely_ of a URL (possibly with a Markdown link label in front) should be exempt from the line-length restriction. Inline hyperlink destinations ( `[foo](http://...)` notation ) are _not_ exempt, because it is my arrogant opinion that long lines of that type make the source text illegible. The patch adds dependencies on the `regex` and `lazy_static` crates to the tidy utility. This _appears_ to Just Work, but if you would rather not have that dependency I am willing to provide a hand-written parser instead.
2017-02-13 12:33:35 -05:00 · 2017-02-13 12:33:35 -05:00 · 5817351048
commit 5817351048
parent 10f6a5c443
3 changed files with 36 additions and 2 deletions
--- a/src/tools/tidy/Cargo.toml
+++ b/src/tools/tidy/Cargo.toml
@ -4,3 +4,5 @@ version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]

 [dependencies]
+regex = "*"
+lazy_static = "*"
--- a/src/tools/tidy/src/main.rs
+++ b/src/tools/tidy/src/main.rs
@ -14,6 +14,9 @@
 //! etc. This is run by default on `make check` and as part of the auto
 //! builders.

+extern crate regex;
+#[macro_use] extern crate lazy_static;
+
 use std::fs;
 use std::path::{PathBuf, Path};
 use std::env;
--- a/src/tools/tidy/src/style.rs
+++ b/src/tools/tidy/src/style.rs
@ -26,6 +26,8 @@ use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;

+use regex::Regex;
+
 const COLS: usize = 100;
 const LICENSE: &'static str = "\
 Copyright <year> The Rust Project Developers. See the COPYRIGHT
@ -38,6 +40,32 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 option. This file may not be copied, modified, or distributed
 except according to those terms.";

+/// True if LINE is allowed to be longer than the normal limit.
+///
+/// Currently there is only one exception: if the line is within a
+/// comment, and its entire text is one URL (possibly with a Markdown
+/// link label in front), then it's allowed to be overlength.  This is
+/// because Markdown offers no way to split a line in the middle of a
+/// URL, and the length of URLs for external references is beyond our
+/// control.
+fn long_line_is_ok(line: &str) -> bool {
+    lazy_static! {
+        static ref URL_RE: Regex = Regex::new(
+            // This regexp uses the CommonMark definition of link
+            // label.  It thinks any sequence of nonwhitespace
+            // characters beginning with "http://" or "https://" is a
+            // URL.  Add more schemas as necessary.
+            r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
+        ).unwrap();
+    }
+
+    if URL_RE.is_match(line) {
+        return true;
+    }
+
+    false
+}
+
 pub fn check(path: &Path, bad: &mut bool) {
    let mut contents = String::new();
    super::walk(path, &mut super::filter_dirs, &mut |file| {
@ -61,8 +89,9 @@ pub fn check(path: &Path, bad: &mut bool) {
                println!("{}:{}: {}", file.display(), i + 1, msg);
                *bad = true;
            };
-            if line.chars().count() > COLS && !skip_length {
-                err(&format!("line longer than {} chars", COLS));
+            if !skip_length && line.chars().count() > COLS
+                && !long_line_is_ok(line) {
+                    err(&format!("line longer than {} chars", COLS));
            }
            if line.contains("\t") && !skip_tab {
                err("tab character");