From 76ba11994e3b94890ec61bd75c2d2b9a428d151d Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 7 Jul 2024 14:22:06 +0200 Subject: [PATCH 1/2] Revert "remove regexes" This reverts commit 8d8504300fe7ad9b20a7690cce8025290847f155. The regexes are important for performance. --- src/tools/tidy/src/style.rs | 36 +++++++++++++------------------ src/tools/tidy/src/style/tests.rs | 19 ++++++++++------ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index f65001e8de8..e4d54d2a2b5 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -18,8 +18,9 @@ // ignore-tidy-dbg use crate::walk::{filter_dirs, walk}; +use regex::RegexSet; use rustc_hash::FxHashMap; -use std::{ffi::OsStr, path::Path, sync::LazyLock}; +use std::{ffi::OsStr, path::Path}; #[cfg(test)] mod tests; @@ -109,32 +110,15 @@ fn generate_problems<'a>( 173390526, 721077, ]; -#[cfg(not(test))] -const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3')]; - -#[cfg(test)] -const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')]; // use "futile" F intentionally - fn generate_problematic_strings( consts: &[u32], letter_digit: &FxHashMap, ) -> Vec { generate_problems(consts, letter_digit) - .flat_map(|v| vec![v.to_string(), format!("{:X}", v)]) + .flat_map(|v| vec![v.to_string(), format!("{:x}", v), format!("{:X}", v)]) .collect() } -static PROBLEMATIC_CONSTS_STRINGS: LazyLock> = LazyLock::new(|| { - generate_problematic_strings( - ROOT_PROBLEMATIC_CONSTS, - &FxHashMap::from_iter(LETTER_DIGIT.iter().copied()), - ) -}); - -fn contains_problematic_const(trimmed: &str) -> bool { - PROBLEMATIC_CONSTS_STRINGS.iter().any(|s| trimmed.to_uppercase().contains(s)) -} - const INTERNAL_COMPILER_DOCS_LINE: &str = "#### This error code is internal to the compiler and will not be emitted with normal Rust code."; /// Parser states for `line_is_url`. @@ -331,6 +315,11 @@ fn skip(path: &Path, is_dir: bool) -> bool { // We only check CSS files in rustdoc. path.extension().map_or(false, |e| e == "css") && !is_in(path, "src", "librustdoc") } + let problematic_consts_strings = generate_problematic_strings( + ROOT_PROBLEMATIC_CONSTS, + &[('A', '4'), ('B', '8'), ('E', '3')].iter().cloned().collect(), + ); + let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap(); walk(path, skip, &mut |entry, contents| { let file = entry.path(); @@ -400,6 +389,7 @@ fn skip(path: &Path, is_dir: bool) -> bool { let is_test = file.components().any(|c| c.as_os_str() == "tests"); // scanning the whole file for multiple needles at once is more efficient than // executing lines times needles separate searches. + let any_problematic_line = problematic_regex.is_match(contents); for (i, line) in contents.split('\n').enumerate() { if line.is_empty() { if i == 0 { @@ -469,8 +459,12 @@ fn skip(path: &Path, is_dir: bool) -> bool { if trimmed.contains("//") && trimmed.contains(" XXX") { err("Instead of XXX use FIXME") } - if contains_problematic_const(trimmed) { - err("Don't use magic numbers that spell things (consider 0x12345678)"); + if any_problematic_line { + for s in problematic_consts_strings.iter() { + if trimmed.contains(s) { + err("Don't use magic numbers that spell things (consider 0x12345678)"); + } + } } } // for now we just check libcore diff --git a/src/tools/tidy/src/style/tests.rs b/src/tools/tidy/src/style/tests.rs index 47fec890432..292e23916d2 100644 --- a/src/tools/tidy/src/style/tests.rs +++ b/src/tools/tidy/src/style/tests.rs @@ -1,10 +1,17 @@ use super::*; #[test] -fn test_contains_problematic_const() { - assert!(contains_problematic_const("786357")); // check with no "decimal" hex digits - converted to integer - assert!(contains_problematic_const("589701")); // check with "decimal" replacements - converted to integer - assert!(contains_problematic_const("8FF85")); // check for hex display - assert!(contains_problematic_const("8fF85")); // check for case-alternating hex display - assert!(!contains_problematic_const("1193046")); // check for non-matching value +fn test_generate_problematic_strings() { + let problematic_regex = RegexSet::new( + generate_problematic_strings( + ROOT_PROBLEMATIC_CONSTS, + &[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')].iter().cloned().collect(), // use "futile" F intentionally + ) + .as_slice(), + ) + .unwrap(); + assert!(problematic_regex.is_match("786357")); // check with no "decimal" hex digits - converted to integer + assert!(problematic_regex.is_match("589701")); // check with "decimal" replacements - converted to integer + assert!(problematic_regex.is_match("8FF85")); // check for hex display + assert!(!problematic_regex.is_match("1193046")); // check for non-matching value } From 1cfc89ad691f807ff157ed99104aee9118fdc3fe Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 7 Jul 2024 14:23:36 +0200 Subject: [PATCH 2/2] Add note about performance of tidy problematic consts --- src/tools/tidy/src/style.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index e4d54d2a2b5..8e693c35adc 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -110,6 +110,7 @@ fn generate_problems<'a>( 173390526, 721077, ]; +// Returns all permutations of problematic consts, over 2000 elements. fn generate_problematic_strings( consts: &[u32], letter_digit: &FxHashMap, @@ -319,6 +320,8 @@ fn skip(path: &Path, is_dir: bool) -> bool { ROOT_PROBLEMATIC_CONSTS, &[('A', '4'), ('B', '8'), ('E', '3')].iter().cloned().collect(), ); + // This creates a RegexSet as regex contains performance optimizations to be able to deal with these over + // 2000 needles efficiently. This runs over the entire source code, so performance matters. let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap(); walk(path, skip, &mut |entry, contents| {