From 4f11f84dee891e80680f47d48cc33a0cb0229080 Mon Sep 17 00:00:00 2001
From: mcarton <cartonmartin+git@gmail.com>
Date: Wed, 25 May 2016 17:15:19 +0200
Subject: [PATCH 1/2] Lint binary regexes

---
 CHANGELOG.md                |   6 +-
 src/regex.rs                | 108 ++++++++++++++++++++++--------------
 src/utils/paths.rs          |   6 +-
 tests/compile-fail/regex.rs |  33 ++++++++++-
 4 files changed, 109 insertions(+), 44 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a1e7322e76..f17b1dd0837 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,13 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## 0.0.70 — TBD
+* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new` and
+  byte regexes
+
 ## 0.0.69 — 2016-05-20
 * Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
-* `used_underscore_binding` has been made `Allow` temporarily
+* [`used_underscore_binding`] has been made `Allow` temporarily
 
 ## 0.0.68 — 2016-05-17
 * Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)*
diff --git a/src/regex.rs b/src/regex.rs
index e1b4237b9b2..8876a649e5d 100644
--- a/src/regex.rs
+++ b/src/regex.rs
@@ -9,7 +9,7 @@ use std::error::Error;
 use syntax::ast::{LitKind, NodeId};
 use syntax::codemap::{Span, BytePos};
 use syntax::parse::token::InternedString;
-use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint};
+use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint};
 
 /// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
 ///
@@ -81,7 +81,7 @@ impl LateLintPass for RegexPass {
                           span,
                           "`regex!(_)` found. \
                           Please use `Regex::new(_)`, which is faster for now.");
-                self.spans.insert(span);                    
+                self.spans.insert(span);
             }
             self.last = Some(block.id);
         }}
@@ -96,46 +96,18 @@ impl LateLintPass for RegexPass {
     fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
         if_let_chain!{[
             let ExprCall(ref fun, ref args) = expr.node,
-            let ExprPath(_, ref path) = fun.node,
-            match_path(path, &paths::REGEX_NEW) && args.len() == 1
+            args.len() == 1,
+            let Some(def) = cx.tcx.def_map.borrow().get(&fun.id),
         ], {
-            if let ExprLit(ref lit) = args[0].node {
-                if let LitKind::Str(ref r, _) = lit.node {
-                    match regex_syntax::Expr::parse(r) {
-                        Ok(r) => {
-                            if let Some(repl) = is_trivial_regex(&r) {
-                                span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
-                                                   "trivial regex",
-                                                   &format!("consider using {}", repl));
-                            }
-                        }
-                        Err(e) => {
-                            span_lint(cx,
-                                      INVALID_REGEX,
-                                      str_span(args[0].span, &r, e.position()),
-                                      &format!("regex syntax error: {}",
-                                               e.description()));
-                        }
-                    }
-                }
-            } else if let Some(r) = const_str(cx, &*args[0]) {
-                match regex_syntax::Expr::parse(&r) {
-                    Ok(r) => {
-                        if let Some(repl) = is_trivial_regex(&r) {
-                            span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
-                                               "trivial regex",
-                                               &format!("consider using {}", repl));
-                        }
-                    }
-                    Err(e) => {
-                        span_lint(cx,
-                                  INVALID_REGEX,
-                                  args[0].span,
-                                  &format!("regex syntax error on position {}: {}",
-                                           e.position(),
-                                           e.description()));
-                    }
-                }
+            let def_id = def.def_id();
+            if match_def_path(cx, def_id, &paths::REGEX_NEW) {
+                check_regex(cx, &args[0], true);
+            } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
+                check_regex(cx, &args[0], false);
+            } else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
+                check_set(cx, &args[0], true);
+            } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
+                check_set(cx, &args[0], false);
             }
         }}
     }
@@ -193,3 +165,57 @@ fn is_trivial_regex(s: &regex_syntax::Expr) -> Option<&'static str> {
         _ => None,
     }
 }
+
+fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) {
+    if_let_chain! {[
+        let ExprAddrOf(_, ref expr) = expr.node,
+        let ExprVec(ref exprs) = expr.node,
+    ], {
+        for expr in exprs {
+            check_regex(cx, expr, utf8);
+        }
+    }}
+}
+
+fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) {
+    let builder = regex_syntax::ExprBuilder::new().unicode(utf8);
+
+    if let ExprLit(ref lit) = expr.node {
+        if let LitKind::Str(ref r, _) = lit.node {
+            match builder.parse(r) {
+                Ok(r) => {
+                    if let Some(repl) = is_trivial_regex(&r) {
+                        span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
+                                           "trivial regex",
+                                           &format!("consider using {}", repl));
+                    }
+                }
+                Err(e) => {
+                    span_lint(cx,
+                              INVALID_REGEX,
+                              str_span(expr.span, r, e.position()),
+                              &format!("regex syntax error: {}",
+                                       e.description()));
+                }
+            }
+        }
+    } else if let Some(r) = const_str(cx, expr) {
+        match builder.parse(&r) {
+            Ok(r) => {
+                if let Some(repl) = is_trivial_regex(&r) {
+                    span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
+                                       "trivial regex",
+                                       &format!("consider using {}", repl));
+                }
+            }
+            Err(e) => {
+                span_lint(cx,
+                          INVALID_REGEX,
+                          expr.span,
+                          &format!("regex syntax error on position {}: {}",
+                                   e.position(),
+                                   e.description()));
+            }
+        }
+    }
+}
diff --git a/src/utils/paths.rs b/src/utils/paths.rs
index 3db1e1c5572..b0ce8b4a233 100644
--- a/src/utils/paths.rs
+++ b/src/utils/paths.rs
@@ -46,7 +46,11 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
 pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
 pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
 pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
-pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"];
+pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
+pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
+pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
+pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
+pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"];
 pub const RESULT: [&'static str; 3] = ["core", "result", "Result"];
 pub const STRING: [&'static str; 3] = ["collections", "string", "String"];
 pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"];
diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs
index 9cd2bc8098e..d9f262fb045 100644
--- a/tests/compile-fail/regex.rs
+++ b/tests/compile-fail/regex.rs
@@ -6,7 +6,8 @@
 
 extern crate regex;
 
-use regex::Regex;
+use regex::{Regex, RegexSet};
+use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet};
 
 const OPENING_PAREN : &'static str = "(";
 const NOT_A_REAL_REGEX : &'static str = "foobar";
@@ -22,8 +23,33 @@ fn syntax_error() {
     let some_regex = Regex::new(OPENING_PAREN);
     //~^ERROR: regex syntax error on position 0: unclosed
 
+    let binary_pipe_in_wrong_position = BRegex::new("|");
+    //~^ERROR: regex syntax error: empty alternate
+    let some_binary_regex = BRegex::new(OPENING_PAREN);
+    //~^ERROR: regex syntax error on position 0: unclosed
+
     let closing_paren = ")";
     let not_linted = Regex::new(closing_paren);
+
+    let set = RegexSet::new(&[
+        r"[a-z]+@[a-z]+\.(com|org|net)",
+        r"[a-z]+\.(com|org|net)",
+    ]);
+    let bset = BRegexSet::new(&[
+        r"[a-z]+@[a-z]+\.(com|org|net)",
+        r"[a-z]+\.(com|org|net)",
+    ]);
+
+    let set_error = RegexSet::new(&[
+        OPENING_PAREN,
+        //~^ERROR: regex syntax error on position 0: unclosed
+        r"[a-z]+\.(com|org|net)",
+    ]);
+    let bset_error = BRegexSet::new(&[
+        OPENING_PAREN,
+        //~^ERROR: regex syntax error on position 0: unclosed
+        r"[a-z]+\.(com|org|net)",
+    ]);
 }
 
 fn trivial_regex() {
@@ -64,12 +90,17 @@ fn trivial_regex() {
     //~^ERROR: trivial regex
     //~|HELP consider using `str::is_empty`
 
+    let binary_trivial_empty = BRegex::new("^$");
+    //~^ERROR: trivial regex
+    //~|HELP consider using `str::is_empty`
+
     // non-trivial regexes
     let non_trivial_dot = Regex::new("a.b");
     let non_trivial_eq = Regex::new("^foo|bar$");
     let non_trivial_starts_with = Regex::new("^foo|bar");
     let non_trivial_ends_with = Regex::new("^foo|bar");
     let non_trivial_ends_with = Regex::new("foo|bar");
+    let non_trivial_binary = BRegex::new("foo|bar");
 }
 
 fn main() {

From 51d166f17aa2dd73226152ff7ca6c69f2db5947c Mon Sep 17 00:00:00 2001
From: mcarton <cartonmartin+git@gmail.com>
Date: Wed, 25 May 2016 21:36:51 +0200
Subject: [PATCH 2/2] Support `RegexBuilder`

---
 CHANGELOG.md                |  4 ++--
 src/regex.rs                |  4 ++++
 src/utils/paths.rs          |  2 ++
 tests/compile-fail/regex.rs | 14 ++++++++++++--
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f17b1dd0837..928fac7412e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,8 +2,8 @@
 All notable changes to this project will be documented in this file.
 
 ## 0.0.70 — TBD
-* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new` and
-  byte regexes
+* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new`,
+  `RegexBuilder::new` and byte regexes
 
 ## 0.0.69 — 2016-05-20
 * Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
diff --git a/src/regex.rs b/src/regex.rs
index 8876a649e5d..b335bf993b5 100644
--- a/src/regex.rs
+++ b/src/regex.rs
@@ -104,6 +104,10 @@ impl LateLintPass for RegexPass {
                 check_regex(cx, &args[0], true);
             } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
                 check_regex(cx, &args[0], false);
+            } else if match_def_path(cx, def_id, &paths::REGEX_BUILDER_NEW) {
+                check_regex(cx, &args[0], true);
+            } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_BUILDER_NEW) {
+                check_regex(cx, &args[0], false);
             } else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
                 check_set(cx, &args[0], true);
             } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
diff --git a/src/utils/paths.rs b/src/utils/paths.rs
index b0ce8b4a233..3c91578abd0 100644
--- a/src/utils/paths.rs
+++ b/src/utils/paths.rs
@@ -46,7 +46,9 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
 pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
 pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
 pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
+pub const REGEX_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "unicode", "RegexBuilder", "new"];
 pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
+pub const REGEX_BYTES_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "bytes", "RegexBuilder", "new"];
 pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
 pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
 pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs
index d9f262fb045..d6287b881a1 100644
--- a/tests/compile-fail/regex.rs
+++ b/tests/compile-fail/regex.rs
@@ -6,8 +6,8 @@
 
 extern crate regex;
 
-use regex::{Regex, RegexSet};
-use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet};
+use regex::{Regex, RegexSet, RegexBuilder};
+use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet, RegexBuilder as BRegexBuilder};
 
 const OPENING_PAREN : &'static str = "(";
 const NOT_A_REAL_REGEX : &'static str = "foobar";
@@ -15,6 +15,8 @@ const NOT_A_REAL_REGEX : &'static str = "foobar";
 fn syntax_error() {
     let pipe_in_wrong_position = Regex::new("|");
     //~^ERROR: regex syntax error: empty alternate
+    let pipe_in_wrong_position_builder = RegexBuilder::new("|");
+    //~^ERROR: regex syntax error: empty alternate
     let wrong_char_ranice = Regex::new("[z-a]");
     //~^ERROR: regex syntax error: invalid character class range
     let some_unicode = Regex::new("[é-è]");
@@ -27,6 +29,8 @@ fn syntax_error() {
     //~^ERROR: regex syntax error: empty alternate
     let some_binary_regex = BRegex::new(OPENING_PAREN);
     //~^ERROR: regex syntax error on position 0: unclosed
+    let some_binary_regex_builder = BRegexBuilder::new(OPENING_PAREN);
+    //~^ERROR: regex syntax error on position 0: unclosed
 
     let closing_paren = ")";
     let not_linted = Regex::new(closing_paren);
@@ -57,6 +61,10 @@ fn trivial_regex() {
     //~^ERROR: trivial regex
     //~|HELP consider using `==` on `str`s
 
+    let trivial_eq_builder = RegexBuilder::new("^foobar$");
+    //~^ERROR: trivial regex
+    //~|HELP consider using `==` on `str`s
+
     let trivial_starts_with = Regex::new("^foobar");
     //~^ERROR: trivial regex
     //~|HELP consider using `str::starts_with`
@@ -96,11 +104,13 @@ fn trivial_regex() {
 
     // non-trivial regexes
     let non_trivial_dot = Regex::new("a.b");
+    let non_trivial_dot_builder = RegexBuilder::new("a.b");
     let non_trivial_eq = Regex::new("^foo|bar$");
     let non_trivial_starts_with = Regex::new("^foo|bar");
     let non_trivial_ends_with = Regex::new("^foo|bar");
     let non_trivial_ends_with = Regex::new("foo|bar");
     let non_trivial_binary = BRegex::new("foo|bar");
+    let non_trivial_binary_builder = BRegexBuilder::new("foo|bar");
 }
 
 fn main() {