diff --git a/src/tools/rust-demangler/main.rs b/src/tools/rust-demangler/main.rs index e1e49230ad1..fd031ccb252 100644 --- a/src/tools/rust-demangler/main.rs +++ b/src/tools/rust-demangler/main.rs @@ -21,6 +21,41 @@ //! $ "${TARGET}"/llvm/bin/llvm-cov show --Xdemangler="${TARGET}"/stage0-tools-bin/rust-demangler \ //! --instr-profile=main.profdata ./main --show-line-counts-or-regions //! ``` +//! +//! Note regarding crate disambiguators: +//! +//! Some demangled symbol paths can include "crate disambiguator" suffixes, represented as a large +//! hexadecimal value enclosed in square braces, and appended to the name of the crate. a suffix to the +//! original crate name. For example, the `core` crate, here, includes a disambiguator: +//! +//! ```rust +//! as core[a7a74cee373f048]::ops::drop::Drop>::drop +//! ``` +//! +//! These disambiguators are known to vary depending on environmental circumstances. As a result, +//! tests that compare results including demangled names can fail across development environments, +//! particularly with cross-platform testing. Also, the resulting crate paths are not syntactically +//! valid, and don't match the original source symbol paths, which can impact development tools. +//! +//! For these reasons, by default, `rust-demangler` uses a heuristic to remove crate disambiguators +//! from their original demangled representation before printing them to standard output. If crate +//! disambiguators are required, add the `-d` (or `--disambiguators`) flag, and the disambiguators +//! will not be removed. +//! +//! Also note that the disambiguators are stripped by a Regex pattern that is tolerant to some +//! variation in the number of hexadecimal digits. The disambiguators come from a hash value, which +//! typically generates a 16-digit hex representation on a 64-bit architecture; however, leading +//! zeros are not included, which can shorten the hex digit length, and a different hash algorithm +//! that might also be dependent on the architecture, might shorten the length even further. A +//! minimum length of 5 digits is assumed, which should be more than sufficient to support hex +//! representations that generate only 8-digits of precision with an extremely rare (but not +//! impossible) result with up to 3 leading zeros. +//! +//! Using a minimum number of digits less than 5 risks the possibility of stripping demangled name +//! components with a similar pattern. For example, some closures instantiated multiple times +//! include their own disambiguators, demangled as non-hashed zero-based indexes in square brackets. +//! These disambiguators seem to have more analytical value (for instance, in coverage analysis), so +//! they are not removed. use regex::Regex; use rustc_demangle::demangle; @@ -29,7 +64,25 @@ use std::io::{self, Read, Write}; const REPLACE_COLONS: &str = "::"; fn main() -> io::Result<()> { - let mut strip_crate_disambiguators = Some(Regex::new(r"\[[a-f0-9]{16}\]::").unwrap()); + // FIXME(richkadel): In Issue #77615 discussed updating the `rustc-demangle` library, to provide + // an option to generate demangled names without including crate disambiguators. If that + // happens, update this tool to use that option (if the `-d` flag is not set) instead stripping + // them via the Regex heuristic. The update the doc comments and help. + + // Strip hashed hexadecimal crate disambiguators. Leading zeros are not enforced, and can be + // different across different platform/architecture types, so while 16 hex digits are common, + // they can also be shorter. + // + // Also note that a demangled symbol path may include the `[]` pattern, with zero-based + // indexes (such as for closures, and possibly for types defined in anonymous scopes). Preferably + // these should not be stripped. + // + // The minimum length of 5 digits supports the possibility that some target architecture (maybe + // a 32-bit or smaller architecture) could generate a hash value with a maximum of 8 digits, + // and more than three leading zeros should be extremely unlikely. Conversely, it should be + // sufficient to assume the zero-based indexes for closures and anonymous scopes will never + // exceed the value 9999. + let mut strip_crate_disambiguators = Some(Regex::new(r"\[[a-f0-9]{5,16}\]::").unwrap()); let mut args = std::env::args(); let progname = args.next().unwrap(); @@ -41,14 +94,19 @@ fn main() -> io::Result<()> { eprintln!("Usage: {} [-d|--disambiguators]", progname); eprintln!(); eprintln!( - "This tool converts a list of Rust mangled symbols (one per line) into a\n + "This tool converts a list of Rust mangled symbols (one per line) into a\n\ corresponding list of demangled symbols." ); eprintln!(); eprintln!( "With -d (--disambiguators), Rust symbols mangled with the v0 symbol mangler may\n\ - include crate disambiguators (a 16 character hex value in square brackets).\n\ - Crate disambiguators are removed by default." + include crate disambiguators (a hexadecimal hash value, typically up to 16 digits\n\ + long, enclosed in square brackets)." + ); + eprintln!(); + eprintln!( + "By default, crate disambiguators are removed, using a heuristics-based regular\n\ + expression. (See the `rust-demangler` doc comments for more information.)" ); eprintln!(); std::process::exit(1)