[dump_syms/Mac] New -x option to prefer extern names when there's a mismatch

When built with -gmlt, .dSYMs are (by design) missing the
`DW_AT_linkage_name` which Breakpad uses to fill out the
(name-mangled) function names.

Thankfully, the .dSYM contains both the old-school LC_SYMTAB command
containing the STABS-format symbols (which include the fully-qualified
C++ symbol names we want, but no actual compilation unit data), as
well as the LC_SEGMENT_64 containing the __DWARF segment with the
minimal -gmlt debug information (which excludes the name-mangled C++
symbols).

Unfortunately, since the .dSYM's STABS does not define compilation
units, the usual path in `StabsReader` ignores all the fully-qualified
C++ symbol names for the functions:

bd9d94c708/src/common/stabs_reader.cc (100)

Fortunately, when built for macOS platforms (`HAVE_MACH_O_NLIST_H`),
`StabsReader` supports storing all the STABS-format symbols as
`Extern`s, regardless of whether or not they're in a compilation unit:

bd9d94c708/src/common/stabs_reader.cc (119)

Currently, when there's both a `Function` and an `Extern` with the same address, `Module` discards the `Extern`:

bd9d94c708/src/common/module.cc (161)

This CL adds a new `-x` option to the Mac `dump_syms` which prefers
the Extern function name if there's a mismatch.

Bug: https://bugs.chromium.org/p/google-breakpad/issues/detail?id=883
Change-Id: I0d32adc64fbf567600b0a5ca63c71c422b7f0f8c
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4453650
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
This commit is contained in:
Ben Hamilton 2023-04-21 12:02:42 -06:00 committed by Joshua Peraza
parent 16cee17997
commit f548d75c9f
8 changed files with 123 additions and 18 deletions

View file

@ -330,7 +330,10 @@ class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
// Use this from EndAttributes member functions, not ProcessAttribute*
// functions; only the former can be sure that all the DIE's attributes
// have been seen.
StringView ComputeQualifiedName();
//
// On return, if has_qualified_name is non-NULL, *has_qualified_name is set to
// true if the DIE includes a fully-qualified name, false otherwise.
StringView ComputeQualifiedName(bool* has_qualified_name);
CUContext* cu_context_;
DIEContext* parent_context_;
@ -466,7 +469,8 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
}
}
StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName(
bool* has_qualified_name) {
// Use the demangled name, if one is available. Demangled names are
// preferable to those inferred from the DWARF structure because they
// include argument types.
@ -482,6 +486,15 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
StringView* unqualified_name = nullptr;
StringView* enclosing_name = nullptr;
if (!qualified_name) {
if (has_qualified_name) {
// dSYMs built with -gmlt do not include the DW_AT_linkage_name
// with the unmangled symbol, but rather include it in the
// LC_SYMTAB STABS, which end up in the externs of the module.
//
// Remember this so the Module can copy over the extern name later.
*has_qualified_name = false;
}
// Find the unqualified name. If the DIE has its own DW_AT_name
// attribute, then use that; otherwise, check the specification.
if (!name_attribute_.empty()) {
@ -500,6 +513,10 @@ StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
} else if (parent_context_) {
enclosing_name = &parent_context_->name;
}
} else {
if (has_qualified_name) {
*has_qualified_name = true;
}
}
// Prepare the return value before upcoming mutations possibly invalidate the
@ -722,7 +739,8 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
ranges_form_(DW_FORM_sec_offset),
ranges_data_(0),
inline_(false),
handle_inline_(handle_inline) {}
handle_inline_(handle_inline),
has_qualified_name_(false) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
@ -745,6 +763,7 @@ class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
bool inline_;
vector<unique_ptr<Module::Inline>> child_inlines_;
bool handle_inline_;
bool has_qualified_name_;
DIEContext child_context_; // A context for our children.
};
@ -808,7 +827,7 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler(
bool DwarfCUToModule::FuncHandler::EndAttributes() {
// Compute our name, and record a specification, if appropriate.
name_ = ComputeQualifiedName();
name_ = ComputeQualifiedName(&has_qualified_name_);
if (name_.empty() && abstract_origin_) {
name_ = abstract_origin_->name;
}
@ -881,6 +900,9 @@ void DwarfCUToModule::FuncHandler::Finish() {
scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
func->ranges = ranges;
func->parameter_size = 0;
// If the name was unqualified, prefer the Extern name if there's a mismatch
// (the Extern name will be fully-qualified in that case).
func->prefer_extern_name = !has_qualified_name_;
if (func->address) {
// If the function address is zero this is a sign that this function
// description is just empty debug data and should just be discarded.
@ -915,7 +937,7 @@ void DwarfCUToModule::FuncHandler::Finish() {
}
bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
child_context_.name = ComputeQualifiedName();
child_context_.name = ComputeQualifiedName(NULL);
if (child_context_.name.empty() && no_specification) {
cu_context_->reporter->UnknownSpecification(offset_, specification_offset_);
}

View file

@ -267,6 +267,10 @@ class CUFixtureBase {
void TestFunction(int i, const string& name,
Module::Address address, Module::Address size);
// Test that the I'th function (ordered by address) in the module
// this.module_ has the given prefer_extern_name.
void TestFunctionPreferExternName(int i, bool prefer_extern_name);
// Test that the number of source lines owned by the I'th function
// in the module this.module_ is equal to EXPECTED.
void TestLineCount(int i, size_t expected);
@ -615,6 +619,15 @@ void CUFixtureBase::TestFunction(int i, const string& name,
EXPECT_EQ(0U, function->parameter_size);
}
void CUFixtureBase::TestFunctionPreferExternName(int i,
bool prefer_extern_name) {
FillFunctions();
ASSERT_LT((size_t)i, functions_.size());
Module::Function* function = functions_[i];
EXPECT_EQ(prefer_extern_name, function->prefer_extern_name);
}
void CUFixtureBase::TestLineCount(int i, size_t expected) {
FillFunctions();
ASSERT_LT((size_t) i, functions_.size());

View file

@ -420,7 +420,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
// Create a module to hold the debugging information.
module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
"", enable_multiple_));
"", enable_multiple_, prefer_extern_name_));
return true;
}

View file

@ -57,7 +57,8 @@ class DumpSymbols {
DumpSymbols(SymbolData symbol_data,
bool handle_inter_cu_refs,
bool enable_multiple = false,
const std::string& module_name = "")
const std::string& module_name = "",
bool prefer_extern_name = false)
: symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs),
object_filename_(),
@ -68,7 +69,8 @@ class DumpSymbols {
selected_object_file_(),
selected_object_name_(),
enable_multiple_(enable_multiple),
module_name_(module_name) {}
module_name_(module_name),
prefer_extern_name_(prefer_extern_name) {}
~DumpSymbols() = default;
// Prepare to read debugging information from |filename|. |filename| may be
@ -205,6 +207,15 @@ class DumpSymbols {
// If non-empty, used as the module name. Otherwise, the basename of
// |object_filename_| is used as the module name.
const std::string module_name_;
// If a Function and an Extern share the same address but have a different
// name, prefer the name of the Extern.
//
// Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
// as the Function's fully-qualified name will only be present in the STABS
// (which are placed in the Extern), not in the DWARF symbols (which are
// placed in the Function).
bool prefer_extern_name_;
};
} // namespace google_breakpad

View file

@ -105,14 +105,16 @@ Module::Module(const string& name,
const string& architecture,
const string& id,
const string& code_id /* = "" */,
bool enable_multiple_field /* = false*/)
bool enable_multiple_field /* = false*/,
bool prefer_extern_name /* = false*/)
: name_(name),
os_(os),
architecture_(architecture),
id_(id),
code_id_(code_id),
load_address_(0),
enable_multiple_field_(enable_multiple_field) {}
enable_multiple_field_(enable_multiple_field),
prefer_extern_name_(prefer_extern_name) {}
Module::~Module() {
for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
@ -152,11 +154,14 @@ bool Module::AddFunction(Function* function) {
it_ext = externs_.find(&arm_thumb_ext);
}
if (it_ext != externs_.end()) {
Extern* found_ext = it_ext->get();
bool name_mismatch = found_ext->name != function->name;
if (enable_multiple_field_) {
Extern* found_ext = it_ext->get();
// If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
function->is_multiple |=
found_ext->name != function->name || found_ext->is_multiple;
function->is_multiple |= name_mismatch || found_ext->is_multiple;
}
if (name_mismatch && prefer_extern_name_) {
function->name = AddStringToPool(it_ext->get()->name);
}
externs_.erase(it_ext);
}

View file

@ -131,6 +131,10 @@ class Module {
// If this symbol has been folded with other symbols in the linked binary.
bool is_multiple = false;
// If the function's name should be filled out from a matching Extern,
// should they not match.
bool prefer_extern_name = false;
};
struct InlineOrigin {
@ -317,7 +321,8 @@ class Module {
const string& architecture,
const string& id,
const string& code_id = "",
bool enable_multiple_field = false);
bool enable_multiple_field = false,
bool prefer_extern_name = false);
~Module();
// Set the module's load address to LOAD_ADDRESS; addresses given
@ -502,6 +507,15 @@ class Module {
// at
// https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
bool enable_multiple_field_;
// If a Function and an Extern share the same address but have a different
// name, prefer the name of the Extern.
//
// Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
// as the Function's fully-qualified name will only be present in the STABS
// (which are placed in the Extern), not in the DWARF symbols (which are
// placed in the Function).
bool prefer_extern_name_;
};
} // namespace google_breakpad

View file

@ -640,6 +640,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) {
contents.c_str());
}
// If there exists an extern and a function at the same address, only write
// out the FUNC entry.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressPreferExternName) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", false, true);
// Two externs.
auto extern1 = std::make_unique<Module::Extern>(0xabc0);
extern1->name = "extern1";
auto extern2 = std::make_unique<Module::Extern>(0xfff0);
extern2->name = "extern2";
m.AddExtern(std::move(extern1));
m.AddExtern(std::move(extern2));
Module::Function* function = new Module::Function("function2", 0xfff0);
Module::Range range(0xfff0, 0x10);
function->ranges.push_back(range);
function->parameter_size = 0;
m.AddFunction(function);
m.Write(s, ALL_SYMBOL_DATA);
string contents = s.str();
EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME
"\n"
"FUNC fff0 10 0 extern2\n"
"PUBLIC abc0 0 extern1\n",
contents.c_str());
}
// If there exists an extern and a function at the same address, only write
// out the FUNC entry, and mark it with `m` if the multiple field is enabled.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) {

View file

@ -64,7 +64,8 @@ struct Options {
handle_inter_cu_refs(true),
handle_inlines(false),
enable_multiple(false),
module_name() {}
module_name(),
prefer_extern_name(false) {}
string srcPath;
string dsymPath;
@ -75,6 +76,7 @@ struct Options {
bool handle_inlines;
bool enable_multiple;
string module_name;
bool prefer_extern_name;
};
static bool StackFrameEntryComparator(const Module::StackFrameEntry* a,
@ -151,7 +153,8 @@ static bool Start(const Options& options) {
(options.handle_inlines ? INLINES : NO_DATA) |
(options.cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES;
DumpSymbols dump_symbols(symbol_data, options.handle_inter_cu_refs,
options.enable_multiple, options.module_name);
options.enable_multiple, options.module_name,
options.prefer_extern_name);
// For x86_64 binaries, the CFI data is in the __TEXT,__eh_frame of the
// Mach-O file, which is not copied into the dSYM. Whereas in i386, the CFI
@ -244,7 +247,7 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr, "Output a Breakpad symbol file from a Mach-o file.\n");
fprintf(stderr,
"Usage: %s [-a ARCHITECTURE] [-c] [-g dSYM path] "
"[-n MODULE] <Mach-o file>\n",
"[-n MODULE] [-x] <Mach-o file>\n",
argv[0]);
fprintf(stderr, "\t-i: Output module header information only.\n");
fprintf(stderr, "\t-a: Architecture type [default: native, or whatever is\n");
@ -260,6 +263,9 @@ static void Usage(int argc, const char *argv[]) {
fprintf(stderr,
"\t-n: Use MODULE as the name of the module rather than \n"
"the basename of the Mach-O file/dSYM.\n");
fprintf(stderr,
"\t-x: Prefer the PUBLIC (extern) name over the FUNC if\n"
"they do not match.\n");
fprintf(stderr, "\t-h: Usage\n");
fprintf(stderr, "\t-?: Usage\n");
}
@ -269,7 +275,7 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
extern int optind;
signed char ch;
while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:")) != -1) {
while ((ch = getopt(argc, (char* const*)argv, "ia:g:crdm?hn:x")) != -1) {
switch (ch) {
case 'i':
options->header_only = true;
@ -302,6 +308,9 @@ static void SetupOptions(int argc, const char *argv[], Options *options) {
case 'n':
options->module_name = optarg;
break;
case 'x':
options->prefer_extern_name = true;
break;
case '?':
case 'h':
Usage(argc, argv);