Rollup merge of #131647 - jieyouxu:unicode-table-generator, r=Mark-Simulacrum

Register `src/tools/unicode-table-generator` as a runnable tool

It seems like `src/tools/unicode-table-generator` is not currently managed by bootstrap. This PR wires it up with bootstrap as a runnable tool.

This tool seems to take two possible args:

1. (Mandatory) path to `library/core/src/unicode/unicode_data.rs`, and
2. (Optional) path to generate a test file.

I only passed the mandatory path to `unicode_data.rs` in bootstrap and didn't do anything about (2). I'm not sure about how this tool is supposed to be run.

`Cargo.lock` is modified because I renamed `unicode-table-generator`'s bin name to match the tool name, as bootstrap's tool running logic expects the bin name to be derived from the tool name.

I also added a triagebot message to remind to not manually edit the library source file and edit the tool then regenerate instead, but this should probably be a tidy check (if that's desirable then that can be in a follow-up PR, though may be overkill).

Helps with #131640 but does not close it because still no docs.

r? `@Mark-Simulacrum` (since I think you authored this tool?)
This commit is contained in:
Matthias Krüger 2024-10-20 16:54:09 +02:00 committed by GitHub
commit fb42a4581b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 43 additions and 12 deletions

View File

@ -5570,13 +5570,6 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "unicode-bdd"
version = "0.1.0"
dependencies = [
"ucd-parse",
]
[[package]] [[package]]
name = "unicode-bidi" name = "unicode-bidi"
version = "0.3.15" version = "0.3.15"
@ -5626,6 +5619,13 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-table-generator"
version = "0.1.0"
dependencies = [
"ucd-parse",
]
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.14" version = "0.1.14"

View File

@ -283,3 +283,25 @@ fn make_run(run: RunConfig<'_>) {
run.builder.ensure(GenerateCompletions); run.builder.ensure(GenerateCompletions);
} }
} }
#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
pub struct UnicodeTableGenerator;
impl Step for UnicodeTableGenerator {
type Output = ();
const ONLY_HOSTS: bool = true;
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/unicode-table-generator")
}
fn make_run(run: RunConfig<'_>) {
run.builder.ensure(UnicodeTableGenerator);
}
fn run(self, builder: &Builder<'_>) {
let mut cmd = builder.tool_cmd(Tool::UnicodeTableGenerator);
cmd.arg(builder.src.join("library/core/src/unicode/unicode_data.rs"));
cmd.run(builder);
}
}

View File

@ -360,6 +360,7 @@ fn run(self, builder: &Builder<'_>) -> PathBuf {
CoverageDump, "src/tools/coverage-dump", "coverage-dump"; CoverageDump, "src/tools/coverage-dump", "coverage-dump";
RustcPerfWrapper, "src/tools/rustc-perf-wrapper", "rustc-perf-wrapper"; RustcPerfWrapper, "src/tools/rustc-perf-wrapper", "rustc-perf-wrapper";
WasmComponentLd, "src/tools/wasm-component-ld", "wasm-component-ld", is_unstable_tool = true, allow_features = "min_specialization"; WasmComponentLd, "src/tools/wasm-component-ld", "wasm-component-ld", is_unstable_tool = true, allow_features = "min_specialization";
UnicodeTableGenerator, "src/tools/unicode-table-generator", "unicode-table-generator";
); );
/// These are the submodules that are required for rustbook to work due to /// These are the submodules that are required for rustbook to work due to

View File

@ -1010,6 +1010,7 @@ macro_rules! describe {
run::GenerateCopyright, run::GenerateCopyright,
run::GenerateWindowsSys, run::GenerateWindowsSys,
run::GenerateCompletions, run::GenerateCompletions,
run::UnicodeTableGenerator,
), ),
Kind::Setup => { Kind::Setup => {
describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor) describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor)

View File

@ -1,5 +1,5 @@
[package] [package]
name = "unicode-bdd" name = "unicode-table-generator"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"

View File

@ -16,16 +16,14 @@ const fn bitset_search<
let bucket_idx = (needle / 64) as usize; let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE; let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE; let chunk_piece = bucket_idx % CHUNK_SIZE;
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index` // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const.
// feature stabilizes.
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
chunk_idx_map[chunk_map_idx] chunk_idx_map[chunk_map_idx]
} else { } else {
return false; return false;
}; };
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index` // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const.
// feature stabilizes.
let word = if idx < bitset_canonical.len() { let word = if idx < bitset_canonical.len() {
bitset_canonical[idx] bitset_canonical[idx]
} else { } else {

View File

@ -679,6 +679,15 @@ instead.
""" """
cc = ["@calebzulawski", "@programmerjake"] cc = ["@calebzulawski", "@programmerjake"]
[mentions."library/core/src/unicode/unicode_data.rs"]
message = """
`library/core/src/unicode/unicode_data.rs` is generated by
`src/tools/unicode-table-generator` via `./x run
src/tools/unicode-table-generator`. If you want to modify `unicode_data.rs`,
please modify the tool then regenerate the library source file with the tool
instead of editing the library source file manually.
"""
[mentions."src/librustdoc/clean/types.rs"] [mentions."src/librustdoc/clean/types.rs"]
cc = ["@camelid"] cc = ["@camelid"]