Remove the merging module.

Three of the four methods in `DefaultPartitioning` are defined in `default.rs`. But `merge_codegen_units` is defined in a separate module, `merging`, even though it's less than 100 lines of code and roughly the same size as the other three methods. (Also, the `merging` module currently sits alongside `default`, when it should be a submodule of `default`, adding to the confusion.) In #74275 this explanation was given: > I pulled this out into a separate module since it seemed like we might > want a few different merge algorithms to choose from. But in the three years since there have been no additional merging algorithms, and there is no mechanism for choosing between different merging algorithms. (There is a mechanism, `-Zcgu-partitioning-strategy`, for choosing between different partitioning strategies, but the merging algorithm is just one piece of a partitioning strategy.) This commit merges `merging` into `default`, making the code easier to navigate and read.
2023-05-24 10:08:40 +10:00 · 2023-05-24 10:08:40 +10:00 · b39b7098ea
commit b39b7098ea
parent e26c0c92bd
3 changed files with 94 additions and 109 deletions
--- a/compiler/rustc_monomorphize/src/partitioning/default.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/default.rs
@ -1,3 +1,4 @@
+use std::cmp;
 use std::collections::hash_map::Entry;

 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@ -14,7 +15,6 @@ use rustc_span::symbol::Symbol;

 use super::PartitioningCx;
 use crate::collector::InliningMap;
-use crate::partitioning::merging;
 use crate::partitioning::{
    MonoItemPlacement, Partition, PostInliningPartitioning, PreInliningPartitioning,
 };
@ -103,7 +103,99 @@ impl<'tcx> Partition<'tcx> for DefaultPartitioning {
        cx: &PartitioningCx<'_, 'tcx>,
        initial_partitioning: &mut PreInliningPartitioning<'tcx>,
    ) {
-        merging::merge_codegen_units(cx, initial_partitioning);
+        assert!(cx.target_cgu_count >= 1);
+        let codegen_units = &mut initial_partitioning.codegen_units;
+
+        // Note that at this point in time the `codegen_units` here may not be
+        // in a deterministic order (but we know they're deterministically the
+        // same set). We want this merging to produce a deterministic ordering
+        // of codegen units from the input.
+        //
+        // Due to basically how we've implemented the merging below (merge the
+        // two smallest into each other) we're sure to start off with a
+        // deterministic order (sorted by name). This'll mean that if two cgus
+        // have the same size the stable sort below will keep everything nice
+        // and deterministic.
+        codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
+
+        // This map keeps track of what got merged into what.
+        let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
+            codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
+
+        // Merge the two smallest codegen units until the target size is
+        // reached.
+        while codegen_units.len() > cx.target_cgu_count {
+            // Sort small cgus to the back
+            codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
+            let mut smallest = codegen_units.pop().unwrap();
+            let second_smallest = codegen_units.last_mut().unwrap();
+
+            // Move the mono-items from `smallest` to `second_smallest`
+            second_smallest.modify_size_estimate(smallest.size_estimate());
+            for (k, v) in smallest.items_mut().drain() {
+                second_smallest.items_mut().insert(k, v);
+            }
+
+            // Record that `second_smallest` now contains all the stuff that was
+            // in `smallest` before.
+            let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
+            cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
+
+            debug!(
+                "CodegenUnit {} merged into CodegenUnit {}",
+                smallest.name(),
+                second_smallest.name()
+            );
+        }
+
+        let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
+
+        if cx.tcx.sess.opts.incremental.is_some() {
+            // If we are doing incremental compilation, we want CGU names to
+            // reflect the path of the source level module they correspond to.
+            // For CGUs that contain the code of multiple modules because of the
+            // merging done above, we use a concatenation of the names of all
+            // contained CGUs.
+            let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
+                .into_iter()
+                // This `filter` makes sure we only update the name of CGUs that
+                // were actually modified by merging.
+                .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
+                .map(|(current_cgu_name, cgu_contents)| {
+                    let mut cgu_contents: Vec<&str> =
+                        cgu_contents.iter().map(|s| s.as_str()).collect();
+
+                    // Sort the names, so things are deterministic and easy to
+                    // predict. We are sorting primitive `&str`s here so we can
+                    // use unstable sort.
+                    cgu_contents.sort_unstable();
+
+                    (current_cgu_name, cgu_contents.join("--"))
+                })
+                .collect();
+
+            for cgu in codegen_units.iter_mut() {
+                if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
+                    if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    } else {
+                        // If we don't require CGU names to be human-readable,
+                        // we use a fixed length hash of the composite CGU name
+                        // instead.
+                        let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
+                        cgu.set_name(Symbol::intern(&new_cgu_name));
+                    }
+                }
+            }
+        } else {
+            // If we are compiling non-incrementally we just generate simple CGU
+            // names containing an index.
+            for (index, cgu) in codegen_units.iter_mut().enumerate() {
+                let numbered_codegen_unit_name =
+                    cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
+                cgu.set_name(numbered_codegen_unit_name);
+            }
+        }
    }

    fn place_inlined_mono_items(
--- a/compiler/rustc_monomorphize/src/partitioning/merging.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/merging.rs
@ -1,106 +0,0 @@
-use std::cmp;
-
-use rustc_data_structures::fx::FxHashMap;
-use rustc_hir::def_id::LOCAL_CRATE;
-use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder};
-use rustc_span::symbol::Symbol;
-
-use super::PartitioningCx;
-use crate::partitioning::PreInliningPartitioning;
-
-pub fn merge_codegen_units<'tcx>(
-    cx: &PartitioningCx<'_, 'tcx>,
-    initial_partitioning: &mut PreInliningPartitioning<'tcx>,
-) {
-    assert!(cx.target_cgu_count >= 1);
-    let codegen_units = &mut initial_partitioning.codegen_units;
-
-    // Note that at this point in time the `codegen_units` here may not be in a
-    // deterministic order (but we know they're deterministically the same set).
-    // We want this merging to produce a deterministic ordering of codegen units
-    // from the input.
-    //
-    // Due to basically how we've implemented the merging below (merge the two
-    // smallest into each other) we're sure to start off with a deterministic
-    // order (sorted by name). This'll mean that if two cgus have the same size
-    // the stable sort below will keep everything nice and deterministic.
-    codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
-
-    // This map keeps track of what got merged into what.
-    let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
-        codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
-
-    // Merge the two smallest codegen units until the target size is reached.
-    while codegen_units.len() > cx.target_cgu_count {
-        // Sort small cgus to the back
-        codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));
-        let mut smallest = codegen_units.pop().unwrap();
-        let second_smallest = codegen_units.last_mut().unwrap();
-
-        // Move the mono-items from `smallest` to `second_smallest`
-        second_smallest.modify_size_estimate(smallest.size_estimate());
-        for (k, v) in smallest.items_mut().drain() {
-            second_smallest.items_mut().insert(k, v);
-        }
-
-        // Record that `second_smallest` now contains all the stuff that was in
-        // `smallest` before.
-        let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
-        cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);
-
-        debug!(
-            "CodegenUnit {} merged into CodegenUnit {}",
-            smallest.name(),
-            second_smallest.name()
-        );
-    }
-
-    let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
-
-    if cx.tcx.sess.opts.incremental.is_some() {
-        // If we are doing incremental compilation, we want CGU names to
-        // reflect the path of the source level module they correspond to.
-        // For CGUs that contain the code of multiple modules because of the
-        // merging done above, we use a concatenation of the names of
-        // all contained CGUs.
-        let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
-            .into_iter()
-            // This `filter` makes sure we only update the name of CGUs that
-            // were actually modified by merging.
-            .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
-            .map(|(current_cgu_name, cgu_contents)| {
-                let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
-
-                // Sort the names, so things are deterministic and easy to
-                // predict.
-
-                // We are sorting primitive &strs here so we can use unstable sort
-                cgu_contents.sort_unstable();
-
-                (current_cgu_name, cgu_contents.join("--"))
-            })
-            .collect();
-
-        for cgu in codegen_units.iter_mut() {
-            if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
-                if cx.tcx.sess.opts.unstable_opts.human_readable_cgu_names {
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                } else {
-                    // If we don't require CGU names to be human-readable, we
-                    // use a fixed length hash of the composite CGU name
-                    // instead.
-                    let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name);
-                    cgu.set_name(Symbol::intern(&new_cgu_name));
-                }
-            }
-        }
-    } else {
-        // If we are compiling non-incrementally we just generate simple CGU
-        // names containing an index.
-        for (index, cgu) in codegen_units.iter_mut().enumerate() {
-            let numbered_codegen_unit_name =
-                cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
-            cgu.set_name(numbered_codegen_unit_name);
-        }
-    }
-}
--- a/compiler/rustc_monomorphize/src/partitioning/mod.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/mod.rs
@ -93,7 +93,6 @@
 //! inlining, even when they are not marked `#[inline]`.

 mod default;
-mod merging;

 use std::cmp;
 use std::fs::{self, File};