From 5b0ec1ebe4da106c18ce1ceec76e4adc627bddd1 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Fri, 29 Jul 2022 23:26:00 -0400 Subject: [PATCH] parallelize HTML checking tool --- Cargo.lock | 1 + src/tools/html-checker/Cargo.toml | 1 + src/tools/html-checker/main.rs | 46 +++++++++++++++++-------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 58c3982de23..ccffbb01518 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1788,6 +1788,7 @@ dependencies = [ name = "html-checker" version = "0.1.0" dependencies = [ + "rayon", "walkdir", ] diff --git a/src/tools/html-checker/Cargo.toml b/src/tools/html-checker/Cargo.toml index 34d3954db28..72d61d9bd26 100644 --- a/src/tools/html-checker/Cargo.toml +++ b/src/tools/html-checker/Cargo.toml @@ -9,3 +9,4 @@ path = "main.rs" [dependencies] walkdir = "2" +rayon = "1.5" diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs index f52fbdfe2d7..9b4d2c52598 100644 --- a/src/tools/html-checker/main.rs +++ b/src/tools/html-checker/main.rs @@ -1,3 +1,4 @@ +use rayon::iter::{ParallelBridge, ParallelIterator}; use std::env; use std::path::Path; use std::process::{Command, Output}; @@ -56,27 +57,30 @@ fn check_html_file(file: &Path) -> usize { // Returns the number of files read and the number of errors. fn find_all_html_files(dir: &Path) -> (usize, usize) { - let mut files_read = 0; - let mut errors = 0; - - for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| { - e.depth() != 1 - || e.file_name() - .to_str() - .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s)) - .unwrap_or(false) - }) { - let entry = entry.expect("failed to read file"); - if !entry.file_type().is_file() { - continue; - } - let entry = entry.path(); - if entry.extension().and_then(|s| s.to_str()) == Some("html") { - errors += check_html_file(&entry); - files_read += 1; - } - } - (files_read, errors) + walkdir::WalkDir::new(dir) + .into_iter() + .filter_entry(|e| { + e.depth() != 1 + || e.file_name() + .to_str() + .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s)) + .unwrap_or(false) + }) + .par_bridge() + .map(|entry| { + let entry = entry.expect("failed to read file"); + if !entry.file_type().is_file() { + return (0, 0); + } + let entry = entry.path(); + // (Number of files processed, number of errors) + if entry.extension().and_then(|s| s.to_str()) == Some("html") { + (1, check_html_file(&entry)) + } else { + (0, 0) + } + }) + .reduce(|| (0, 0), |a, b| (a.0 + b.0, a.1 + b.1)) } /// Default `tidy` command for macOS is too old that it does not have `mute-id` and `mute` options.