From 487bdeb519a821298b76a35d8c437ef2e733da0d Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 22 Jun 2023 09:19:58 +1000
Subject: [PATCH 1/2] Improve ordering and naming of CGUs for non-incremental
 builds.

Currently there are two problems.

First, the CGUS don't end up in size order. The merging loop does sort
by size on each iteration, but we don't sort after the final merge, so
typically there is one CGU out of place. (And sometimes we don't enter
the merging loop at all, in which case they end up in random order.)

Second, we then assign names that differ only by a numeric suffix, and
then we sort them lexicographically by name, giving us an order like
this:

regex.f10ba03eb5ec7975-cgu.1
regex.f10ba03eb5ec7975-cgu.10
regex.f10ba03eb5ec7975-cgu.11
regex.f10ba03eb5ec7975-cgu.12
regex.f10ba03eb5ec7975-cgu.13
regex.f10ba03eb5ec7975-cgu.14
regex.f10ba03eb5ec7975-cgu.15
regex.f10ba03eb5ec7975-cgu.2
regex.f10ba03eb5ec7975-cgu.3
regex.f10ba03eb5ec7975-cgu.4
regex.f10ba03eb5ec7975-cgu.5
regex.f10ba03eb5ec7975-cgu.6
regex.f10ba03eb5ec7975-cgu.7
regex.f10ba03eb5ec7975-cgu.8
regex.f10ba03eb5ec7975-cgu.9

These two problems are really annoying when debugging and profiling the
CGUs.

This commit ensures CGUs are sorted by name *and* reverse sorted by
size. This involves (a) one extra sort by size operation, and (b)
padding the numeric indices with zeroes, e.g.
`regex.f10ba03eb5ec7975-cgu.01`.

(Note that none of this applies for incremental builds, where a
different hash-based CGU naming scheme is used.)
---
 .../rustc_monomorphize/src/partitioning.rs    | 33 +++++++++++++++----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs
index 97e3748b8b8..10a496cf993 100644
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@@ -368,6 +368,7 @@ fn merge_codegen_units<'tcx>(
 
     let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
 
+    // Rename the newly merged CGUs.
     if cx.tcx.sess.opts.incremental.is_some() {
         // If we are doing incremental compilation, we want CGU names to
         // reflect the path of the source level module they correspond to.
@@ -404,18 +405,38 @@ fn merge_codegen_units<'tcx>(
                 }
             }
         }
+
+        // A sorted order here ensures what follows can be deterministic.
+        codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
     } else {
-        // If we are compiling non-incrementally we just generate simple CGU
-        // names containing an index.
+        // When compiling non-incrementally, we rename the CGUS so they have
+        // identical names except for the numeric suffix, something like
+        // `regex.f10ba03eb5ec7975-cgu.N`, where `N` varies.
+        //
+        // It is useful for debugging and profiling purposes if the resulting
+        // CGUs are sorted by name *and* reverse sorted by size. (CGU 0 is the
+        // biggest, CGU 1 is the second biggest, etc.)
+        //
+        // So first we reverse sort by size. Then we generate the names with
+        // zero-padded suffixes, which means they are automatically sorted by
+        // names. The numeric suffix width depends on the number of CGUs, which
+        // is always greater than zero:
+        // - [1,9]     CGUS: `0`, `1`, `2`, ...
+        // - [10,99]   CGUS: `00`, `01`, `02`, ...
+        // - [100,999] CGUS: `000`, `001`, `002`, ...
+        // - etc.
+        //
+        // If we didn't zero-pad the sorted-by-name order would be `XYZ-cgu.0`,
+        // `XYZ-cgu.1`, `XYZ-cgu.10`, `XYZ-cgu.11`, ..., `XYZ-cgu.2`, etc.
+        codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
+        let num_digits = codegen_units.len().ilog10() as usize + 1;
         for (index, cgu) in codegen_units.iter_mut().enumerate() {
+            let suffix = format!("{index:0num_digits$}");
             let numbered_codegen_unit_name =
-                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix));
             cgu.set_name(numbered_codegen_unit_name);
         }
     }
-
-    // A sorted order here ensures what follows can be deterministic.
-    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
 }
 
 fn internalize_symbols<'tcx>(

From 666b1b68a7200513872303f1bc8136088a09d4ad Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 22 Jun 2023 08:58:48 +1000
Subject: [PATCH 2/2] Tweak thread names for CGU processing.

For non-incremental builds on Unix, currently all the thread names look
like `opt regex.f10ba03eb5ec7975-cgu.0`. But they are truncated by
`pthread_setname` to `opt regex.f10ba`, hiding the numeric suffix that
distinguishes them. This is really annoying when using a profiler like
Samply.

This commit changes these thread names to a form like `opt cgu.0`, which
is much better.
---
 compiler/rustc_codegen_ssa/src/back/write.rs  | 63 ++++++++++++-------
 .../rustc_monomorphize/src/partitioning.rs    |  3 +
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 51ac441a7a4..dbe6f466a2a 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -698,28 +698,49 @@ impl<B: WriteBackendMethods> WorkItem<B> {
 
     /// Generate a short description of this work item suitable for use as a thread name.
     fn short_description(&self) -> String {
-        // `pthread_setname()` on *nix is limited to 15 characters and longer names are ignored.
-        // Use very short descriptions in this case to maximize the space available for the module name.
-        // Windows does not have that limitation so use slightly more descriptive names there.
+        // `pthread_setname()` on *nix ignores anything beyond the first 15
+        // bytes. Use short descriptions to maximize the space available for
+        // the module name.
+        #[cfg(not(windows))]
+        fn desc(short: &str, _long: &str, name: &str) -> String {
+            // The short label is three bytes, and is followed by a space. That
+            // leaves 11 bytes for the CGU name. How we obtain those 11 bytes
+            // depends on the the CGU name form.
+            //
+            // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
+            //   before the `-cgu.0` is the same for every CGU, so use the
+            //   `cgu.0` part. The number suffix will be different for each
+            //   CGU.
+            //
+            // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
+            //   name because each CGU will have a unique ASCII hash, and the
+            //   first 11 bytes will be enough to identify it.
+            //
+            // - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
+            //   `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
+            //   name. The first 11 bytes won't be enough to uniquely identify
+            //   it, but no obvious substring will, and this is a rarely used
+            //   option so it doesn't matter much.
+            //
+            assert_eq!(short.len(), 3);
+            let name = if let Some(index) = name.find("-cgu.") {
+                &name[index + 1..] // +1 skips the leading '-'.
+            } else {
+                name
+            };
+            format!("{short} {name}")
+        }
+
+        // Windows has no thread name length limit, so use more descriptive names.
+        #[cfg(windows)]
+        fn desc(_short: &str, long: &str, name: &str) -> String {
+            format!("{long} {name}")
+        }
+
         match self {
-            WorkItem::Optimize(m) => {
-                #[cfg(windows)]
-                return format!("optimize module {}", m.name);
-                #[cfg(not(windows))]
-                return format!("opt {}", m.name);
-            }
-            WorkItem::CopyPostLtoArtifacts(m) => {
-                #[cfg(windows)]
-                return format!("copy LTO artifacts for {}", m.name);
-                #[cfg(not(windows))]
-                return format!("copy {}", m.name);
-            }
-            WorkItem::LTO(m) => {
-                #[cfg(windows)]
-                return format!("LTO module {}", m.name());
-                #[cfg(not(windows))]
-                return format!("LTO {}", m.name());
-            }
+            WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name),
+            WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name),
+            WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()),
         }
     }
 }
diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs
index 10a496cf993..e663f4486f7 100644
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@@ -431,6 +431,9 @@ fn merge_codegen_units<'tcx>(
         codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
         let num_digits = codegen_units.len().ilog10() as usize + 1;
         for (index, cgu) in codegen_units.iter_mut().enumerate() {
+            // Note: `WorkItem::short_description` depends on this name ending
+            // with `-cgu.` followed by a numeric suffix. Please keep it in
+            // sync with this code.
             let suffix = format!("{index:0num_digits$}");
             let numbered_codegen_unit_name =
                 cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix));