From 1cccf2dd4cb342542a0c7d2e59445f6eedd32a85 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 4 Jan 2023 02:16:24 +0000 Subject: [PATCH] Ignore things in .gitignore in tidy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switch from `walkdir` to `ignore`. This required various changes to make `skip` thread-safe. - Ignore `build` anywhere in the source tree, not just at the top-level. We support this in bootstrap, we should support it in tidy too. As a nice side benefit, this also makes tidy a bit faster. Before: ``` ; hyperfine -i '"/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"' Benchmark 1: "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32" Time (mean ± σ): 1.080 s ± 0.008 s [User: 2.616 s, System: 3.243 s] Range (min … max): 1.069 s … 1.099 s 10 runs ``` After: ``` ; hyperfine '"/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"' Benchmark 1: "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32" Time (mean ± σ): 705.0 ms ± 1.4 ms [User: 3179.1 ms, System: 1517.5 ms] Range (min … max): 702.3 ms … 706.9 ms 10 runs ``` --- .gitignore | 2 +- .../replace-version-placeholder/src/main.rs | 2 +- src/tools/tidy/src/alphabetical.rs | 2 +- src/tools/tidy/src/bins.rs | 74 ++++++++----------- src/tools/tidy/src/debug_artifacts.rs | 2 +- src/tools/tidy/src/edition.rs | 2 +- src/tools/tidy/src/error_codes.rs | 4 +- src/tools/tidy/src/features.rs | 6 +- src/tools/tidy/src/pal.rs | 2 +- src/tools/tidy/src/rustdoc_gui_tests.rs | 2 +- src/tools/tidy/src/style.rs | 2 +- src/tools/tidy/src/target_specific_tests.rs | 2 +- src/tools/tidy/src/ui_tests.rs | 2 +- src/tools/tidy/src/unit_tests.rs | 18 +++-- src/tools/tidy/src/walk.rs | 38 +++++----- 15 files changed, 75 insertions(+), 85 deletions(-) diff --git a/.gitignore b/.gitignore index e5ca3e50313..ce797a7a837 100644 --- a/.gitignore +++ b/.gitignore @@ -41,7 +41,7 @@ no_llvm_build /inst/ /llvm/ /mingw-build/ -/build/ +build/ /build-rust-analyzer/ /dist/ /unicode-downloads diff --git a/src/tools/replace-version-placeholder/src/main.rs b/src/tools/replace-version-placeholder/src/main.rs index 33b35d05415..864e68de55d 100644 --- a/src/tools/replace-version-placeholder/src/main.rs +++ b/src/tools/replace-version-placeholder/src/main.rs @@ -10,7 +10,7 @@ fn main() { let version_str = version_str.trim(); walk::walk( &root_path, - &mut |path| { + |path| { walk::filter_dirs(path) // We exempt these as they require the placeholder // for their operation diff --git a/src/tools/tidy/src/alphabetical.rs b/src/tools/tidy/src/alphabetical.rs index f913f6cde38..9bfee1efc0b 100644 --- a/src/tools/tidy/src/alphabetical.rs +++ b/src/tools/tidy/src/alphabetical.rs @@ -95,7 +95,7 @@ fn check_section<'a>( } pub fn check(path: &Path, bad: &mut bool) { - walk(path, &mut filter_dirs, &mut |entry, contents| { + walk(path, filter_dirs, &mut |entry, contents| { let file = &entry.path().display(); let mut lines = contents.lines().enumerate().peekable(); diff --git a/src/tools/tidy/src/bins.rs b/src/tools/tidy/src/bins.rs index b898f20a5d0..2d6abe59343 100644 --- a/src/tools/tidy/src/bins.rs +++ b/src/tools/tidy/src/bins.rs @@ -101,54 +101,38 @@ pub fn check(path: &Path, bad: &mut bool) { const ALLOWED: &[&str] = &["configure", "x"]; - walk_no_read( - path, - &mut |path| { - filter_dirs(path) - || path.ends_with("src/etc") - // This is a list of directories that we almost certainly - // don't need to walk. A future PR will likely want to - // remove these in favor of crate::walk_no_read using git - // ls-files to discover the paths we should check, which - // would naturally ignore all of these directories. It's - // also likely faster than walking the directory tree - // directly (since git is just reading from a couple files - // to produce the results). - || path.ends_with("target") - || path.ends_with("build") - || path.ends_with(".git") - }, - &mut |entry| { - let file = entry.path(); - let extension = file.extension(); - let scripts = ["py", "sh", "ps1"]; - if scripts.into_iter().any(|e| extension == Some(OsStr::new(e))) { + // FIXME: we don't need to look at all binaries, only files that have been modified in this branch + // (e.g. using `git ls-files`). + walk_no_read(path, |path| filter_dirs(path) || path.ends_with("src/etc"), &mut |entry| { + let file = entry.path(); + let extension = file.extension(); + let scripts = ["py", "sh", "ps1"]; + if scripts.into_iter().any(|e| extension == Some(OsStr::new(e))) { + return; + } + + if t!(is_executable(&file), file) { + let rel_path = file.strip_prefix(path).unwrap(); + let git_friendly_path = rel_path.to_str().unwrap().replace("\\", "/"); + + if ALLOWED.contains(&git_friendly_path.as_str()) { return; } - if t!(is_executable(&file), file) { - let rel_path = file.strip_prefix(path).unwrap(); - let git_friendly_path = rel_path.to_str().unwrap().replace("\\", "/"); - - if ALLOWED.contains(&git_friendly_path.as_str()) { - return; - } - - let output = Command::new("git") - .arg("ls-files") - .arg(&git_friendly_path) - .current_dir(path) - .stderr(Stdio::null()) - .output() - .unwrap_or_else(|e| { - panic!("could not run git ls-files: {e}"); - }); - let path_bytes = rel_path.as_os_str().as_bytes(); - if output.status.success() && output.stdout.starts_with(path_bytes) { - tidy_error!(bad, "binary checked into source: {}", file.display()); - } + let output = Command::new("git") + .arg("ls-files") + .arg(&git_friendly_path) + .current_dir(path) + .stderr(Stdio::null()) + .output() + .unwrap_or_else(|e| { + panic!("could not run git ls-files: {e}"); + }); + let path_bytes = rel_path.as_os_str().as_bytes(); + if output.status.success() && output.stdout.starts_with(path_bytes) { + tidy_error!(bad, "binary checked into source: {}", file.display()); } - }, - ) + } + }) } } diff --git a/src/tools/tidy/src/debug_artifacts.rs b/src/tools/tidy/src/debug_artifacts.rs index 0dd9c1e160c..2241375eaae 100644 --- a/src/tools/tidy/src/debug_artifacts.rs +++ b/src/tools/tidy/src/debug_artifacts.rs @@ -6,7 +6,7 @@ const GRAPHVIZ_POSTFLOW_MSG: &str = "`borrowck_graphviz_postflow` attribute in test"; pub fn check(test_dir: &Path, bad: &mut bool) { - walk(test_dir, &mut filter_dirs, &mut |entry, contents| { + walk(test_dir, filter_dirs, &mut |entry, contents| { let filename = entry.path(); let is_rust = filename.extension().map_or(false, |ext| ext == "rs"); if !is_rust { diff --git a/src/tools/tidy/src/edition.rs b/src/tools/tidy/src/edition.rs index 8172e3d292b..ae8233aa910 100644 --- a/src/tools/tidy/src/edition.rs +++ b/src/tools/tidy/src/edition.rs @@ -11,7 +11,7 @@ fn is_edition_2021(mut line: &str) -> bool { pub fn check(path: &Path, bad: &mut bool) { walk( path, - &mut |path| { + |path| { filter_dirs(path) || (path.ends_with("tests") && path.join("COMPILER_TESTS.md").exists()) }, diff --git a/src/tools/tidy/src/error_codes.rs b/src/tools/tidy/src/error_codes.rs index c60caa0d49c..d90ad5abbf9 100644 --- a/src/tools/tidy/src/error_codes.rs +++ b/src/tools/tidy/src/error_codes.rs @@ -127,7 +127,7 @@ fn check_error_codes_docs( let mut no_longer_emitted_codes = Vec::new(); - walk(&docs_path, &mut |_| false, &mut |entry, contents| { + walk(&docs_path, |_| false, &mut |entry, contents| { let path = entry.path(); // Error if the file isn't markdown. @@ -319,7 +319,7 @@ fn check_error_codes_used( let mut found_codes = Vec::new(); - walk_many(search_paths, &mut filter_dirs, &mut |entry, contents| { + walk_many(search_paths, filter_dirs, &mut |entry, contents| { let path = entry.path(); // Return early if we aren't looking at a source file. diff --git a/src/tools/tidy/src/features.rs b/src/tools/tidy/src/features.rs index af92e6eb863..f0f13628dc7 100644 --- a/src/tools/tidy/src/features.rs +++ b/src/tools/tidy/src/features.rs @@ -101,7 +101,7 @@ pub fn check( &tests_path.join("rustdoc-ui"), &tests_path.join("rustdoc"), ], - &mut filter_dirs, + filter_dirs, &mut |entry, contents| { let file = entry.path(); let filename = file.file_name().unwrap().to_string_lossy(); @@ -477,11 +477,11 @@ fn get_and_check_lib_features( fn map_lib_features( base_src_path: &Path, - mf: &mut dyn FnMut(Result<(&str, Feature), &str>, &Path, usize), + mf: &mut (dyn Send + Sync + FnMut(Result<(&str, Feature), &str>, &Path, usize)), ) { walk( base_src_path, - &mut |path| filter_dirs(path) || path.ends_with("tests"), + |path| filter_dirs(path) || path.ends_with("tests"), &mut |entry, contents| { let file = entry.path(); let filename = file.file_name().unwrap().to_string_lossy(); diff --git a/src/tools/tidy/src/pal.rs b/src/tools/tidy/src/pal.rs index f4592fdcff9..33938ac9a0a 100644 --- a/src/tools/tidy/src/pal.rs +++ b/src/tools/tidy/src/pal.rs @@ -68,7 +68,7 @@ pub fn check(path: &Path, bad: &mut bool) { // Sanity check that the complex parsing here works. let mut saw_target_arch = false; let mut saw_cfg_bang = false; - walk(path, &mut filter_dirs, &mut |entry, contents| { + walk(path, filter_dirs, &mut |entry, contents| { let file = entry.path(); let filestr = file.to_string_lossy().replace("\\", "/"); if !filestr.ends_with(".rs") { diff --git a/src/tools/tidy/src/rustdoc_gui_tests.rs b/src/tools/tidy/src/rustdoc_gui_tests.rs index feb513df34b..d7db5c02297 100644 --- a/src/tools/tidy/src/rustdoc_gui_tests.rs +++ b/src/tools/tidy/src/rustdoc_gui_tests.rs @@ -5,7 +5,7 @@ pub fn check(path: &Path, bad: &mut bool) { crate::walk::walk( &path.join("rustdoc-gui"), - &mut |p| { + |p| { // If there is no extension, it's very likely a folder and we want to go into it. p.extension().map(|e| e != "goml").unwrap_or(false) }, diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index 6a0855405ec..9ecb30529cc 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -235,7 +235,7 @@ fn skip(path: &Path) -> bool { .chain(PROBLEMATIC_CONSTS.iter().map(|v| format!("{:X}", v))) .collect(); let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap(); - walk(path, &mut skip, &mut |entry, contents| { + walk(path, skip, &mut |entry, contents| { let file = entry.path(); let filename = file.file_name().unwrap().to_string_lossy(); let extensions = diff --git a/src/tools/tidy/src/target_specific_tests.rs b/src/tools/tidy/src/target_specific_tests.rs index d7a157672cf..f41fa4fcce1 100644 --- a/src/tools/tidy/src/target_specific_tests.rs +++ b/src/tools/tidy/src/target_specific_tests.rs @@ -37,7 +37,7 @@ struct RevisionInfo<'a> { pub fn check(path: &Path, bad: &mut bool) { crate::walk::walk( path, - &mut |path| path.extension().map(|p| p == "rs") == Some(false), + |path| path.extension().map(|p| p == "rs") == Some(false), &mut |entry, content| { let file = entry.path().display(); let mut header_map = BTreeMap::new(); diff --git a/src/tools/tidy/src/ui_tests.rs b/src/tools/tidy/src/ui_tests.rs index 409f7563184..15c36923e88 100644 --- a/src/tools/tidy/src/ui_tests.rs +++ b/src/tools/tidy/src/ui_tests.rs @@ -54,7 +54,7 @@ fn check_entries(path: &Path, bad: &mut bool) { pub fn check(path: &Path, bad: &mut bool) { check_entries(&path, bad); for path in &[&path.join("ui"), &path.join("ui-fulldeps")] { - crate::walk::walk_no_read(path, &mut |_| false, &mut |entry| { + crate::walk::walk_no_read(path, |_| false, &mut |entry| { let file_path = entry.path(); if let Some(ext) = file_path.extension() { if ext == "stderr" || ext == "stdout" { diff --git a/src/tools/tidy/src/unit_tests.rs b/src/tools/tidy/src/unit_tests.rs index 27f36c85561..24ab81587db 100644 --- a/src/tools/tidy/src/unit_tests.rs +++ b/src/tools/tidy/src/unit_tests.rs @@ -11,14 +11,16 @@ use std::path::Path; pub fn check(root_path: &Path, bad: &mut bool) { - let core = &root_path.join("core"); - let core_tests = &core.join("tests"); - let core_benches = &core.join("benches"); - let is_core = |path: &Path| { - path.starts_with(core) && !(path.starts_with(core_tests) || path.starts_with(core_benches)) + let core = root_path.join("core"); + let core_copy = core.clone(); + let core_tests = core.join("tests"); + let core_benches = core.join("benches"); + let is_core = move |path: &Path| { + path.starts_with(&core) + && !(path.starts_with(&core_tests) || path.starts_with(&core_benches)) }; - let mut skip = |path: &Path| { + let skip = move |path: &Path| { let file_name = path.file_name().unwrap_or_default(); if path.is_dir() { filter_dirs(path) @@ -35,9 +37,9 @@ pub fn check(root_path: &Path, bad: &mut bool) { } }; - walk(root_path, &mut skip, &mut |entry, contents| { + walk(root_path, skip, &mut |entry, contents| { let path = entry.path(); - let is_core = path.starts_with(core); + let is_core = path.starts_with(&core_copy); for (i, line) in contents.lines().enumerate() { let line = line.trim(); let is_test = || line.contains("#[test]") && !line.contains("`#[test]"); diff --git a/src/tools/tidy/src/walk.rs b/src/tools/tidy/src/walk.rs index 4cfb70fa31c..f7f2c647eb4 100644 --- a/src/tools/tidy/src/walk.rs +++ b/src/tools/tidy/src/walk.rs @@ -1,9 +1,8 @@ -use std::fs::File; -use std::io::Read; -use walkdir::{DirEntry, WalkDir}; +use ignore::DirEntry; -use std::path::Path; +use std::{fs, path::Path}; +/// The default directory filter. pub fn filter_dirs(path: &Path) -> bool { let skip = [ "tidy-test-file", @@ -36,34 +35,39 @@ pub fn filter_dirs(path: &Path) -> bool { pub fn walk_many( paths: &[&Path], - skip: &mut dyn FnMut(&Path) -> bool, + skip: impl Clone + Send + Sync + 'static + Fn(&Path) -> bool, f: &mut dyn FnMut(&DirEntry, &str), ) { for path in paths { - walk(path, skip, f); + walk(path, skip.clone(), f); } } -pub fn walk(path: &Path, skip: &mut dyn FnMut(&Path) -> bool, f: &mut dyn FnMut(&DirEntry, &str)) { - let mut contents = String::new(); +pub fn walk( + path: &Path, + skip: impl Send + Sync + 'static + Fn(&Path) -> bool, + f: &mut dyn FnMut(&DirEntry, &str), +) { walk_no_read(path, skip, &mut |entry| { - contents.clear(); - if t!(File::open(entry.path()), entry.path()).read_to_string(&mut contents).is_err() { - contents.clear(); - } - f(&entry, &contents); + let contents = t!(fs::read(entry.path()), entry.path()); + let contents_str = match String::from_utf8(contents) { + Ok(s) => s, + Err(_) => return, // skip this file + }; + f(&entry, &contents_str); }); } pub(crate) fn walk_no_read( path: &Path, - skip: &mut dyn FnMut(&Path) -> bool, + skip: impl Send + Sync + 'static + Fn(&Path) -> bool, f: &mut dyn FnMut(&DirEntry), ) { - let walker = WalkDir::new(path).into_iter().filter_entry(|e| !skip(e.path())); - for entry in walker { + let mut walker = ignore::WalkBuilder::new(path); + let walker = walker.filter_entry(move |e| !skip(e.path())); + for entry in walker.build() { if let Ok(entry) = entry { - if entry.file_type().is_dir() { + if entry.file_type().map_or(true, |kind| kind.is_dir() || kind.is_symlink()) { continue; } f(&entry);