Rollup merge of #126985 - Mrmaxmeier:dwarf-embed-source, r=davidtwco

Implement `-Z embed-source` (DWARFv5 source code embedding extension)

Implement https://github.com/rust-lang/compiler-team/issues/764 MCP which adds an unstable flag that exposes LLVM's [DWARFv5 source code embedding](https://dwarfstd.org/issues/180201.1.html) support.
This commit is contained in:
Trevor Gross 2024-08-27 01:46:49 -05:00 committed by GitHub
commit 9c26ebe32e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 135 additions and 4 deletions

View File

@ -629,6 +629,9 @@ fn alloc_new_file_metadata<'ll>(
};
let hash_value = hex_encode(source_file.src_hash.hash_bytes());
let source =
cx.sess().opts.unstable_opts.embed_source.then_some(()).and(source_file.src.as_ref());
unsafe {
llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
@ -639,6 +642,8 @@ fn alloc_new_file_metadata<'ll>(
hash_kind,
hash_value.as_ptr().cast(),
hash_value.len(),
source.map_or(ptr::null(), |x| x.as_ptr().cast()),
source.map_or(0, |x| x.len()),
)
}
}
@ -659,6 +664,8 @@ fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
llvm::ChecksumKind::None,
hash_value.as_ptr().cast(),
hash_value.len(),
ptr::null(),
0,
)
})
}
@ -943,6 +950,8 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
llvm::ChecksumKind::None,
ptr::null(),
0,
ptr::null(),
0,
);
let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(

View File

@ -1860,6 +1860,8 @@ pub fn LLVMRustDIBuilderCreateFile<'a>(
CSKind: ChecksumKind,
Checksum: *const c_char,
ChecksumLen: size_t,
Source: *const c_char,
SourceLen: size_t,
) -> &'a DIFile;
pub fn LLVMRustDIBuilderCreateSubroutineType<'a>(

View File

@ -774,6 +774,7 @@ macro_rules! tracked {
tracked!(direct_access_external_data, Some(true));
tracked!(dual_proc_macros, true);
tracked!(dwarf_version, Some(5));
tracked!(embed_source, true);
tracked!(emit_thin_lto, false);
tracked!(export_executable_symbols, true);
tracked!(fewer_names, Some(true));

View File

@ -913,14 +913,19 @@ extern "C" LLVMMetadataRef
LLVMRustDIBuilderCreateFile(LLVMRustDIBuilderRef Builder, const char *Filename,
size_t FilenameLen, const char *Directory,
size_t DirectoryLen, LLVMRustChecksumKind CSKind,
const char *Checksum, size_t ChecksumLen) {
const char *Checksum, size_t ChecksumLen,
const char *Source, size_t SourceLen) {
std::optional<DIFile::ChecksumKind> llvmCSKind = fromRust(CSKind);
std::optional<DIFile::ChecksumInfo<StringRef>> CSInfo{};
if (llvmCSKind)
CSInfo.emplace(*llvmCSKind, StringRef{Checksum, ChecksumLen});
std::optional<StringRef> oSource{};
if (Source)
oSource = StringRef(Source, SourceLen);
return wrap(Builder->createFile(StringRef(Filename, FilenameLen),
StringRef(Directory, DirectoryLen), CSInfo));
StringRef(Directory, DirectoryLen), CSInfo,
oSource));
}
extern "C" LLVMMetadataRef

View File

@ -14,6 +14,10 @@ session_crate_name_empty = crate name must not be empty
session_crate_name_invalid = crate names cannot start with a `-`, but `{$s}` has a leading hyphen
session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at least `-Z dwarf-version=5` but DWARF version is {$dwarf_version}
session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
session_expr_parentheses_needed = parentheses are required to parse this as an expression
session_failed_to_create_profiler = failed to create profiler: {$err}

View File

@ -165,6 +165,16 @@ pub(crate) struct UnsupportedDwarfVersion {
pub(crate) dwarf_version: u32,
}
#[derive(Diagnostic)]
#[diag(session_embed_source_insufficient_dwarf_version)]
pub(crate) struct EmbedSourceInsufficientDwarfVersion {
pub(crate) dwarf_version: u32,
}
#[derive(Diagnostic)]
#[diag(session_embed_source_requires_debug_info)]
pub(crate) struct EmbedSourceRequiresDebugInfo;
#[derive(Diagnostic)]
#[diag(session_target_stack_protector_not_supported)]
pub(crate) struct StackProtectorNotSupportedForTarget<'a> {

View File

@ -1701,6 +1701,8 @@ pub(crate) fn parse_wasm_c_abi(slot: &mut WasmCAbi, v: Option<&str>) -> bool {
them only if an error has not been emitted"),
ehcont_guard: bool = (false, parse_bool, [TRACKED],
"generate Windows EHCont Guard tables"),
embed_source: bool = (false, parse_bool, [TRACKED],
"embed source text in DWARF debug sections (default: no)"),
emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],
"emit a section containing stack size metadata (default: no)"),
emit_thin_lto: bool = (true, parse_bool, [TRACKED],

View File

@ -37,8 +37,9 @@
use crate::code_stats::CodeStats;
pub use crate::code_stats::{DataTypeKind, FieldInfo, FieldKind, SizeKind, VariantInfo};
use crate::config::{
self, CoverageLevel, CrateType, ErrorOutputType, FunctionReturn, Input, InstrumentCoverage,
OptLevel, OutFileName, OutputType, RemapPathScopeComponents, SwitchWithOptPath,
self, CoverageLevel, CrateType, DebugInfo, ErrorOutputType, FunctionReturn, Input,
InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents,
SwitchWithOptPath,
};
use crate::parse::{add_feature_diagnostics, ParseSess};
use crate::search_paths::{PathKind, SearchPath};
@ -1306,6 +1307,19 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
.emit_err(errors::SplitDebugInfoUnstablePlatform { debuginfo: sess.split_debuginfo() });
}
if sess.opts.unstable_opts.embed_source {
let dwarf_version =
sess.opts.unstable_opts.dwarf_version.unwrap_or(sess.target.default_dwarf_version);
if dwarf_version < 5 {
sess.dcx().emit_warn(errors::EmbedSourceInsufficientDwarfVersion { dwarf_version });
}
if sess.opts.debuginfo == DebugInfo::None {
sess.dcx().emit_warn(errors::EmbedSourceRequiresDebugInfo);
}
}
if sess.opts.unstable_opts.instrument_xray.is_some() && !sess.target.options.supports_xray {
sess.dcx().emit_err(errors::InstrumentationNotSupported { us: "XRay".to_string() });
}

View File

@ -0,0 +1,12 @@
# `embed-source`
This flag controls whether the compiler embeds the program source code text into
the object debug information section. It takes one of the following values:
* `y`, `yes`, `on` or `true`: put source code in debug info.
* `n`, `no`, `off`, `false` or no value: omit source code from debug info (the default).
This flag is ignored in configurations that don't emit DWARF debug information
and is ignored on non-LLVM backends. `-Z embed-source` requires DWARFv5. Use
`-Z dwarf-version=5` to control the compiler's DWARF target version and `-g` to
enable debug info generation.

View File

@ -0,0 +1,2 @@
// hello
fn main() {}

View File

@ -0,0 +1,70 @@
//@ ignore-windows
//@ ignore-apple
// LLVM 17's embed-source implementation requires that source code is attached
// for all files in the output DWARF debug info. This restriction was lifted in
// LLVM 18 (87e22bdd2bd6d77d782f9d64b3e3ae5bdcd5080d).
//@ min-llvm-version: 18
// This test should be replaced with one in tests/debuginfo once we can easily
// tell via GDB or LLDB if debuginfo contains source code. Cheap tricks in LLDB
// like setting an invalid source map path don't appear to work, maybe this'll
// become easier once GDB supports DWARFv6?
use std::collections::HashMap;
use std::path::PathBuf;
use std::rc::Rc;
use gimli::{AttributeValue, EndianRcSlice, Reader, RunTimeEndian};
use object::{Object, ObjectSection};
use run_make_support::{gimli, object, rfs, rustc};
fn main() {
let output = PathBuf::from("embed-source-main");
rustc()
.input("main.rs")
.output(&output)
.arg("-g")
.arg("-Zembed-source=yes")
.arg("-Zdwarf-version=5")
.run();
let output = rfs::read(output);
let obj = object::File::parse(output.as_slice()).unwrap();
let endian = if obj.is_little_endian() { RunTimeEndian::Little } else { RunTimeEndian::Big };
let dwarf = gimli::Dwarf::load(|section| -> Result<_, ()> {
let data = obj.section_by_name(section.name()).map(|s| s.uncompressed_data().unwrap());
Ok(EndianRcSlice::new(Rc::from(data.unwrap_or_default().as_ref()), endian))
})
.unwrap();
let mut sources = HashMap::new();
let mut iter = dwarf.units();
while let Some(header) = iter.next().unwrap() {
let unit = dwarf.unit(header).unwrap();
let unit = unit.unit_ref(&dwarf);
if let Some(program) = &unit.line_program {
let header = program.header();
for file in header.file_names() {
if let Some(source) = file.source() {
let path = unit
.attr_string(file.path_name())
.unwrap()
.to_string_lossy()
.unwrap()
.to_string();
let source =
unit.attr_string(source).unwrap().to_string_lossy().unwrap().to_string();
if !source.is_empty() {
sources.insert(path, source);
}
}
}
}
}
dbg!(&sources);
assert_eq!(sources.len(), 1);
assert_eq!(sources.get("main.rs").unwrap(), "// hello\nfn main() {}\n");
}