Rollup merge of #47415 - varkor:cgu-partition-heuristic, r=michaelwoerister

Add CGU size heuristic for partitioning This addresses the concern of #47316 by estimating CGU size based on the size of its MIR. Looking at the size estimate differences for a small selection of crates, this heuristic produces different orderings, which should more accurately reflect optimisation time. (Fixes #47316.) r? @michaelwoerister
2018-01-25 12:48:44 -06:00 · 2018-01-25 12:48:44 -06:00 · 3b2403306f
commit 3b2403306f
parent 9fd7da904b 62703cfd26
8 changed files with 81 additions and 9 deletions
--- a/src/librustc/dep_graph/dep_node.rs
+++ b/src/librustc/dep_graph/dep_node.rs
@ -638,6 +638,7 @@ define_dep_nodes!( <'tcx>
    [input] TargetFeaturesWhitelist,
    [] TargetFeaturesEnabled(DefId),

+    [] InstanceDefSizeEstimate { instance_def: InstanceDef<'tcx> },
 );

 trait DepNodeParams<'a, 'gcx: 'tcx + 'a, 'tcx: 'a> : fmt::Debug {
--- a/src/librustc/mir/mono.rs
+++ b/src/librustc/mir/mono.rs
@ -10,7 +10,7 @@

 use syntax::ast::NodeId;
 use syntax::symbol::InternedString;
-use ty::Instance;
+use ty::{Instance, TyCtxt};
 use util::nodemap::FxHashMap;
 use rustc_data_structures::base_n;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
@ -25,6 +25,21 @@ pub enum MonoItem<'tcx> {
    GlobalAsm(NodeId),
 }

+impl<'tcx> MonoItem<'tcx> {
+    pub fn size_estimate<'a>(&self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) -> usize {
+        match *self {
+            MonoItem::Fn(instance) => {
+                // Estimate the size of a function based on how many statements
+                // it contains.
+                tcx.instance_def_size_estimate(instance.def)
+            },
+            // Conservatively estimate the size of a static declaration
+            // or assembly to be 1.
+            MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
+        }
+    }
+}
+
 impl<'tcx> HashStable<StableHashingContext<'tcx>> for MonoItem<'tcx> {
    fn hash_stable<W: StableHasherResult>(&self,
                                           hcx: &mut StableHashingContext<'tcx>,
@ -52,6 +67,7 @@ pub struct CodegenUnit<'tcx> {
    /// as well as the crate name and disambiguator.
    name: InternedString,
    items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
+    size_estimate: Option<usize>,
 }

 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
@ -101,6 +117,7 @@ impl<'tcx> CodegenUnit<'tcx> {
        CodegenUnit {
            name: name,
            items: FxHashMap(),
+            size_estimate: None,
        }
    }

@ -131,6 +148,24 @@ impl<'tcx> CodegenUnit<'tcx> {
        let hash = hash & ((1u128 << 80) - 1);
        base_n::encode(hash, base_n::CASE_INSENSITIVE)
    }
+
+    pub fn estimate_size<'a>(&mut self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) {
+        // Estimate the size of a codegen unit as (approximately) the number of MIR
+        // statements it corresponds to.
+        self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum());
+    }
+
+    pub fn size_estimate(&self) -> usize {
+        // Should only be called if `estimate_size` has previously been called.
+        self.size_estimate.expect("estimate_size must be called before getting a size_estimate")
+    }
+
+    pub fn modify_size_estimate(&mut self, delta: usize) {
+        assert!(self.size_estimate.is_some());
+        if let Some(size_estimate) = self.size_estimate {
+            self.size_estimate = Some(size_estimate + delta);
+        }
+    }
 }

 impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
@ -140,6 +175,8 @@ impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
        let CodegenUnit {
            ref items,
            name,
+            // The size estimate is not relevant to the hash
+            size_estimate: _,
        } = *self;

        name.hash_stable(hcx, hasher);
--- a/src/librustc/ty/maps/config.rs
+++ b/src/librustc/ty/maps/config.rs
@ -637,6 +637,12 @@ impl<'tcx> QueryDescription<'tcx> for queries::target_features_whitelist<'tcx> {
    }
 }

+impl<'tcx> QueryDescription<'tcx> for queries::instance_def_size_estimate<'tcx> {
+    fn describe(tcx: TyCtxt, def: ty::InstanceDef<'tcx>) -> String {
+        format!("estimating size for `{}`", tcx.item_path_str(def.def_id()))
+    }
+}
+
 macro_rules! impl_disk_cacheable_query(
    ($query_name:ident, |$key:tt| $cond:expr) => {
        impl<'tcx> QueryDescription<'tcx> for queries::$query_name<'tcx> {
--- a/src/librustc/ty/maps/mod.rs
+++ b/src/librustc/ty/maps/mod.rs
@ -365,6 +365,9 @@ define_maps! { <'tcx>
        target_features_whitelist_node(CrateNum) -> Rc<FxHashSet<String>>,
    [] fn target_features_enabled: TargetFeaturesEnabled(DefId) -> Rc<Vec<String>>,

+    // Get an estimate of the size of an InstanceDef based on its MIR for CGU partitioning.
+    [] fn instance_def_size_estimate: instance_def_size_estimate_dep_node(ty::InstanceDef<'tcx>)
+        -> usize,
 }

 //////////////////////////////////////////////////////////////////////
@ -514,3 +517,10 @@ fn substitute_normalize_and_test_predicates_node<'tcx>(key: (DefId, &'tcx Substs
 fn target_features_whitelist_node<'tcx>(_: CrateNum) -> DepConstructor<'tcx> {
    DepConstructor::TargetFeaturesWhitelist
 }
+
+fn instance_def_size_estimate_dep_node<'tcx>(instance_def: ty::InstanceDef<'tcx>)
+                                              -> DepConstructor<'tcx> {
+    DepConstructor::InstanceDefSizeEstimate {
+        instance_def
+    }
+}
--- a/src/librustc/ty/maps/plumbing.rs
+++ b/src/librustc/ty/maps/plumbing.rs
@ -761,6 +761,7 @@ pub fn force_from_dep_node<'a, 'gcx, 'lcx>(tcx: TyCtxt<'a, 'gcx, 'lcx>,
        DepKind::EraseRegionsTy |
        DepKind::NormalizeTy |
        DepKind::SubstituteNormalizeAndTestPredicates |
+        DepKind::InstanceDefSizeEstimate |

        // This one should never occur in this context
        DepKind::Null => {
--- a/src/librustc/ty/mod.rs
+++ b/src/librustc/ty/mod.rs
@ -2695,6 +2695,20 @@ fn crate_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
    tcx.hir.crate_hash
 }

+fn instance_def_size_estimate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+                                        instance_def: InstanceDef<'tcx>)
+                                        -> usize {
+    match instance_def {
+        InstanceDef::Item(..) |
+        InstanceDef::DropGlue(..) => {
+            let mir = tcx.instance_mir(instance_def);
+            mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum()
+        },
+        // Estimate the size of other compiler-generated shims to be 1.
+        _ => 1
+    }
+}
+
 pub fn provide(providers: &mut ty::maps::Providers) {
    context::provide(providers);
    erase_regions::provide(providers);
@ -2712,6 +2726,7 @@ pub fn provide(providers: &mut ty::maps::Providers) {
        original_crate_name,
        crate_hash,
        trait_impls_of: trait_def::trait_impls_of_provider,
+        instance_def_size_estimate,
        ..*providers
    };
 }
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@ -115,6 +115,7 @@ use syntax::ast::NodeId;
 use syntax::symbol::{Symbol, InternedString};
 use rustc::mir::mono::MonoItem;
 use monomorphize::item::{MonoItemExt, InstantiationMode};
+use core::usize;

 pub use rustc::mir::mono::CodegenUnit;

@ -224,6 +225,8 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
    let mut initial_partitioning = place_root_translation_items(tcx,
                                                                trans_items);

+    initial_partitioning.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
+
    debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());

    // If the partitioning should produce a fixed count of codegen units, merge
@ -241,6 +244,8 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
    let mut post_inlining = place_inlined_translation_items(initial_partitioning,
                                                            inlining_map);

+    post_inlining.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
+
    debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter());

    // Next we try to make as many symbols "internal" as possible, so LLVM has
@ -422,14 +427,13 @@ fn merge_codegen_units<'tcx>(initial_partitioning: &mut PreInliningPartitioning<
    codegen_units.sort_by_key(|cgu| cgu.name().clone());

    // Merge the two smallest codegen units until the target size is reached.
-    // Note that "size" is estimated here rather inaccurately as the number of
-    // translation items in a given unit. This could be improved on.
    while codegen_units.len() > target_cgu_count {
        // Sort small cgus to the back
-        codegen_units.sort_by_key(|cgu| -(cgu.items().len() as i64));
+        codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
        let mut smallest = codegen_units.pop().unwrap();
        let second_smallest = codegen_units.last_mut().unwrap();

+        second_smallest.modify_size_estimate(smallest.size_estimate());
        for (k, v) in smallest.items_mut().drain() {
            second_smallest.items_mut().insert(k, v);
        }
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@ -78,7 +78,7 @@ use std::ffi::CString;
 use std::str;
 use std::sync::Arc;
 use std::time::{Instant, Duration};
-use std::i32;
+use std::{i32, usize};
 use std::iter;
 use std::sync::mpsc;
 use syntax_pos::Span;
@ -823,12 +823,10 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
    ongoing_translation.submit_pre_translated_module_to_llvm(tcx, metadata_module);

    // We sort the codegen units by size. This way we can schedule work for LLVM
-    // a bit more efficiently. Note that "size" is defined rather crudely at the
-    // moment as it is just the number of TransItems in the CGU, not taking into
-    // account the size of each TransItem.
+    // a bit more efficiently.
    let codegen_units = {
        let mut codegen_units = codegen_units;
-        codegen_units.sort_by_key(|cgu| -(cgu.items().len() as isize));
+        codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
        codegen_units
    };