From 989f86213449f653ddf83309c1644b70f9e16628 Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Fri, 23 Sep 2022 20:46:59 +0000 Subject: [PATCH] Support marking folded symbols on Posix This is similar to the Windows change at https://chromium-review.googlesource.com/c/breakpad/breakpad/+/773418/ When a `Module` is created with `enable_multiple_field_` = true, all FUNCs and PUBLICs that share the same address will be collapsed into a single entry, and that entry will be marked with `m` for multiple in the final output. `enable_multiple_field_` is temporary just in case people are depending on the current behavior. Support for `dump_syms` executables will be added in a follow-up. Bug: google-breakpad:751 Change-Id: I631a148ed00138924c7bcb5ad6db8b9a6610dd03 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3905122 Reviewed-by: Mark Mentovai --- src/common/module.cc | 72 ++++++++++++++++++++---------- src/common/module.h | 22 +++++++++- src/common/module_unittest.cc | 82 +++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 26 deletions(-) diff --git a/src/common/module.cc b/src/common/module.cc index 73c5f723..4c427da1 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -96,15 +96,19 @@ void Module::InlineOriginMap::SetReference(uint64_t offset, references_[offset] = specification_offset; } -Module::Module(const string& name, const string& os, - const string& architecture, const string& id, - const string& code_id /* = "" */) : - name_(name), - os_(os), - architecture_(architecture), - id_(id), - code_id_(code_id), - load_address_(0) { } +Module::Module(const string& name, + const string& os, + const string& architecture, + const string& id, + const string& code_id /* = "" */, + bool enable_multiple_field /* = false*/) + : name_(name), + os_(os), + architecture_(architecture), + id_(id), + code_id_(code_id), + load_address_(0), + enable_multiple_field_(enable_multiple_field) {} Module::~Module() { for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it) @@ -150,6 +154,12 @@ bool Module::AddFunction(Function* function) { it_ext = externs_.find(&arm_thumb_ext); } if (it_ext != externs_.end()) { + if (enable_multiple_field_) { + Extern* found_ext = *it_ext; + // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple. + function->is_multiple |= + found_ext->name != function->name || found_ext->is_multiple; + } delete *it_ext; externs_.erase(it_ext); } @@ -164,12 +174,26 @@ bool Module::AddFunction(Function* function) { } } #endif - - std::pair ret = functions_.insert(function); - if (!ret.second && (*ret.first != function)) { - // Free the duplicate that was not inserted because this Module - // now owns it. - return false; + if (enable_multiple_field_) { + FunctionSet::iterator existing_function = std::find_if( + functions_.begin(), functions_.end(), + [&](Function* other) { return other->address == function->address; }); + if (existing_function != functions_.end()) { + (*existing_function)->is_multiple = true; + // Free the duplicate that was not inserted because this Module + // now owns it. + return false; + } + std::pair ret = functions_.insert(function); + // We just checked! + assert(ret.second); + } else { + std::pair ret = functions_.insert(function); + if (!ret.second && (*ret.first != function)) { + // Free the duplicate that was not inserted because this Module + // now owns it. + return false; + } } return true; } @@ -188,7 +212,8 @@ void Module::AddExtern(Extern* ext) { } std::pair ret = externs_.insert(ext); - if (!ret.second) { + if (!ret.second && enable_multiple_field_) { + (*ret.first)->is_multiple = true; // Free the duplicate that was not inserted because this Module // now owns it. delete ext; @@ -378,11 +403,10 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { vector::iterator line_it = func->lines.begin(); for (auto range_it = func->ranges.cbegin(); range_it != func->ranges.cend(); ++range_it) { - stream << "FUNC " << hex - << (range_it->address - load_address_) << " " - << range_it->size << " " - << func->parameter_size << " " - << func->name << dec << "\n"; + stream << "FUNC " << (func->is_multiple ? "m " : "") << hex + << (range_it->address - load_address_) << " " << range_it->size + << " " << func->parameter_size << " " << func->name << dec + << "\n"; if (!stream.good()) return ReportError(); @@ -422,9 +446,9 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { for (ExternSet::const_iterator extern_it = externs_.begin(); extern_it != externs_.end(); ++extern_it) { Extern* ext = *extern_it; - stream << "PUBLIC " << hex - << (ext->address - load_address_) << " 0 " - << ext->name << dec << "\n"; + stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex + << (ext->address - load_address_) << " 0 " << ext->name << dec + << "\n"; } } diff --git a/src/common/module.h b/src/common/module.h index 05bdbfb7..f5031ea6 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -128,6 +128,9 @@ class Module { // Inlined call sites belonging to this functions. vector> inlines; + + // If this symbol has been folded with other symbols in the linked binary. + bool is_multiple = false; }; struct InlineOrigin { @@ -241,6 +244,8 @@ class Module { explicit Extern(const Address& address_input) : address(address_input) {} const Address address; string name; + // If this symbol has been folded with other symbols in the linked binary. + bool is_multiple = false; }; // A map from register names to postfix expressions that recover @@ -294,8 +299,14 @@ class Module { // Create a new module with the given name, operating system, // architecture, and ID string. - Module(const string& name, const string& os, const string& architecture, - const string& id, const string& code_id = ""); + // NB: `enable_multiple_field` is temporary while transitioning to enabling + // writing the multiple field permanently. + Module(const string& name, + const string& os, + const string& architecture, + const string& id, + const string& code_id = "", + bool enable_multiple_field = false); ~Module(); // Set the module's load address to LOAD_ADDRESS; addresses given @@ -471,6 +482,13 @@ class Module { ExternSet externs_; unordered_set common_strings_; + + // Whether symbols sharing an address should be collapsed into a single entry + // and marked with an `m` in the output. See + // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs + // at + // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3 + bool enable_multiple_field_; }; } // namespace google_breakpad diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index 5b44f97c..220add03 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -506,6 +506,34 @@ TEST(Module, ConstructFunctionsWithSameAddress) { contents.c_str()); } +// If multiple fields are enabled, only one function is included per address. +// The entry will be tagged with `m` to show that there are multiple symbols +// at that address. +// TODO(lgrey): Remove the non-multiple versions of these tests and remove the +// suffixes from the suffxed ones when removing `enable_multiple_field_`. +TEST(Module, ConstructFunctionsWithSameAddressMultiple) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true); + + // Two functions. + Module::Function* function1 = generate_duplicate_function("_without_form"); + Module::Function* function2 = generate_duplicate_function("_and_void"); + + m.AddFunction(function1); + // If this succeeds, we'll have a double-free with the `delete` below. Avoid + // that. + ASSERT_FALSE(m.AddFunction(function2)); + delete function2; + + m.Write(s, ALL_SYMBOL_DATA); + string contents = s.str(); + EXPECT_STREQ( + "MODULE os-name architecture id-string name with spaces\n" + "FUNC m d35402aac7a7ad5c 200b26e605f99071 f14ac4fed48c4a99" + " _without_form\n", + contents.c_str()); +} + // Externs should be written out as PUBLIC records, sorted by // address. TEST(Module, ConstructExterns) { @@ -554,6 +582,29 @@ TEST(Module, ConstructDuplicateExterns) { "PUBLIC ffff 0 _xyz\n", contents.c_str()); } +// Externs with the same address have the `m` tag if the multiple field are +// enabled. +TEST(Module, ConstructDuplicateExternsMultiple) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true); + + // Two externs. + Module::Extern* extern1 = new Module::Extern(0xffff); + extern1->name = "_xyz"; + Module::Extern* extern2 = new Module::Extern(0xffff); + extern2->name = "_abc"; + + m.AddExtern(extern1); + m.AddExtern(extern2); + + m.Write(s, ALL_SYMBOL_DATA); + string contents = s.str(); + + EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME + "\n" + "PUBLIC m ffff 0 _xyz\n", + contents.c_str()); +} // If there exists an extern and a function at the same address, only write // out the FUNC entry. @@ -586,6 +637,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) { contents.c_str()); } +// If there exists an extern and a function at the same address, only write +// out the FUNC entry, and mark it with `m` if the multiple field is enabled. +TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true); + + // Two externs. + Module::Extern* extern1 = new Module::Extern(0xabc0); + extern1->name = "abc"; + Module::Extern* extern2 = new Module::Extern(0xfff0); + extern2->name = "xyz"; + + m.AddExtern(extern1); + m.AddExtern(extern2); + + Module::Function* function = new Module::Function("_xyz", 0xfff0); + Module::Range range(0xfff0, 0x10); + function->ranges.push_back(range); + function->parameter_size = 0; + m.AddFunction(function); + + m.Write(s, ALL_SYMBOL_DATA); + string contents = s.str(); + + EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME + "\n" + "FUNC m fff0 10 0 _xyz\n" + "PUBLIC abc0 0 abc\n", + contents.c_str()); +} + // If there exists an extern and a function at the same address, only write // out the FUNC entry. For ARM THUMB, the extern that comes from the ELF // symbol section has bit 0 set.