From d21aa86c652c523f6b39e53866690a1d89d97f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= <39484203+jieyouxu@users.noreply.github.com> Date: Sun, 13 Oct 2024 19:11:58 +0800 Subject: [PATCH 1/4] unicode-table-generator: match bin name with tool name Bootstrap assumes that the binary name is the same as tool name, just makes everyone's lives easier. --- Cargo.lock | 14 +++++++------- src/tools/unicode-table-generator/Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c62c379f70d..33e855bccc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5584,13 +5584,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "unicode-bdd" -version = "0.1.0" -dependencies = [ - "ucd-parse", -] - [[package]] name = "unicode-bidi" version = "0.3.15" @@ -5640,6 +5633,13 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-table-generator" +version = "0.1.0" +dependencies = [ + "ucd-parse", +] + [[package]] name = "unicode-width" version = "0.1.14" diff --git a/src/tools/unicode-table-generator/Cargo.toml b/src/tools/unicode-table-generator/Cargo.toml index ef01877c0b9..f8a500922d0 100644 --- a/src/tools/unicode-table-generator/Cargo.toml +++ b/src/tools/unicode-table-generator/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "unicode-bdd" +name = "unicode-table-generator" version = "0.1.0" edition = "2021" From 3748c5eecf5e2884f469132e2ba46f782c671a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= <39484203+jieyouxu@users.noreply.github.com> Date: Sun, 13 Oct 2024 19:13:38 +0800 Subject: [PATCH 2/4] bootstrap: register `src/tools/unicode-table-generator` as a runnable tool --- src/bootstrap/src/core/build_steps/run.rs | 22 ++++++++++++++++++++++ src/bootstrap/src/core/build_steps/tool.rs | 1 + src/bootstrap/src/core/builder.rs | 1 + 3 files changed, 24 insertions(+) diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index c7bcd76cadd..a6dff7fde80 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -283,3 +283,25 @@ fn make_run(run: RunConfig<'_>) { run.builder.ensure(GenerateCompletions); } } + +#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)] +pub struct UnicodeTableGenerator; + +impl Step for UnicodeTableGenerator { + type Output = (); + const ONLY_HOSTS: bool = true; + + fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> { + run.path("src/tools/unicode-table-generator") + } + + fn make_run(run: RunConfig<'_>) { + run.builder.ensure(UnicodeTableGenerator); + } + + fn run(self, builder: &Builder<'_>) { + let mut cmd = builder.tool_cmd(Tool::UnicodeTableGenerator); + cmd.arg(builder.src.join("library/core/src/unicode/unicode_data.rs")); + cmd.run(builder); + } +} diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs index a01497c2bb9..0d2ffd1c08d 100644 --- a/src/bootstrap/src/core/build_steps/tool.rs +++ b/src/bootstrap/src/core/build_steps/tool.rs @@ -360,6 +360,7 @@ fn run(self, builder: &Builder<'_>) -> PathBuf { CoverageDump, "src/tools/coverage-dump", "coverage-dump"; RustcPerfWrapper, "src/tools/rustc-perf-wrapper", "rustc-perf-wrapper"; WasmComponentLd, "src/tools/wasm-component-ld", "wasm-component-ld", is_unstable_tool = true, allow_features = "min_specialization"; + UnicodeTableGenerator, "src/tools/unicode-table-generator", "unicode-table-generator"; ); /// These are the submodules that are required for rustbook to work due to diff --git a/src/bootstrap/src/core/builder.rs b/src/bootstrap/src/core/builder.rs index 9ac0b0a01f7..2b82e6c569c 100644 --- a/src/bootstrap/src/core/builder.rs +++ b/src/bootstrap/src/core/builder.rs @@ -1010,6 +1010,7 @@ macro_rules! describe { run::GenerateCopyright, run::GenerateWindowsSys, run::GenerateCompletions, + run::UnicodeTableGenerator, ), Kind::Setup => { describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor) From 75a9c86a7786e72dd432bbef1e3bf99f25654496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= <39484203+jieyouxu@users.noreply.github.com> Date: Sun, 13 Oct 2024 19:33:10 +0800 Subject: [PATCH 3/4] unicode-table-generator: sync comments These comments were updated on master but not through this tool, so the comments in the tool became outdated. Sync the comments to stay consistent. --- src/tools/unicode-table-generator/src/range_search.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/src/tools/unicode-table-generator/src/range_search.rs index 3a5b869f72f..221e5d75ef5 100644 --- a/src/tools/unicode-table-generator/src/range_search.rs +++ b/src/tools/unicode-table-generator/src/range_search.rs @@ -16,16 +16,14 @@ const fn bitset_search< let bucket_idx = (needle / 64) as usize; let chunk_map_idx = bucket_idx / CHUNK_SIZE; let chunk_piece = bucket_idx % CHUNK_SIZE; - // FIXME: const-hack: Revert to `slice::get` after `const_slice_index` - // feature stabilizes. + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { chunk_idx_map[chunk_map_idx] } else { return false; }; let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - // FIXME: const-hack: Revert to `slice::get` after `const_slice_index` - // feature stabilizes. + // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. let word = if idx < bitset_canonical.len() { bitset_canonical[idx] } else { From be89da5ab5b4e01cead1219ac7d68ac93751e69b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= <39484203+jieyouxu@users.noreply.github.com> Date: Sun, 13 Oct 2024 19:45:25 +0800 Subject: [PATCH 4/4] triagebot: add reminder for `library/core/src/unicode/unicode_data.rs` that it needs to be generated by tool And should not be directly edited. --- triagebot.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/triagebot.toml b/triagebot.toml index 60ec1043aad..b3f6a94d7f4 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -674,6 +674,15 @@ instead. """ cc = ["@calebzulawski", "@programmerjake"] +[mentions."library/core/src/unicode/unicode_data.rs"] +message = """ +`library/core/src/unicode/unicode_data.rs` is generated by +`src/tools/unicode-table-generator` via `./x run +src/tools/unicode-table-generator`. If you want to modify `unicode_data.rs`, +please modify the tool then regenerate the library source file with the tool +instead of editing the library source file manually. +""" + [mentions."src/librustdoc/clean/types.rs"] cc = ["@camelid"]