Support marking folded symbols on Posix

This is similar to the Windows change at
https://chromium-review.googlesource.com/c/breakpad/breakpad/+/773418/

When a `Module` is created with `enable_multiple_field_` = true,
all FUNCs and PUBLICs that share the same address will be collapsed
into a single entry, and that entry will be marked with `m` for
multiple in the final output.

`enable_multiple_field_` is temporary just in case people are
depending on the current behavior.

Support for `dump_syms` executables will be added in a follow-up.

Bug: google-breakpad:751
Change-Id: I631a148ed00138924c7bcb5ad6db8b9a6610dd03
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3905122
Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
Leonard Grey 2022-09-23 20:46:59 +00:00
parent 1f9903c161
commit 989f862134
3 changed files with 150 additions and 26 deletions

View file

@ -96,15 +96,19 @@ void Module::InlineOriginMap::SetReference(uint64_t offset,
references_[offset] = specification_offset;
}
Module::Module(const string& name, const string& os,
const string& architecture, const string& id,
const string& code_id /* = "" */) :
name_(name),
os_(os),
architecture_(architecture),
id_(id),
code_id_(code_id),
load_address_(0) { }
Module::Module(const string& name,
const string& os,
const string& architecture,
const string& id,
const string& code_id /* = "" */,
bool enable_multiple_field /* = false*/)
: name_(name),
os_(os),
architecture_(architecture),
id_(id),
code_id_(code_id),
load_address_(0),
enable_multiple_field_(enable_multiple_field) {}
Module::~Module() {
for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
@ -150,6 +154,12 @@ bool Module::AddFunction(Function* function) {
it_ext = externs_.find(&arm_thumb_ext);
}
if (it_ext != externs_.end()) {
if (enable_multiple_field_) {
Extern* found_ext = *it_ext;
// If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
function->is_multiple |=
found_ext->name != function->name || found_ext->is_multiple;
}
delete *it_ext;
externs_.erase(it_ext);
}
@ -164,12 +174,26 @@ bool Module::AddFunction(Function* function) {
}
}
#endif
std::pair<FunctionSet::iterator,bool> ret = functions_.insert(function);
if (!ret.second && (*ret.first != function)) {
// Free the duplicate that was not inserted because this Module
// now owns it.
return false;
if (enable_multiple_field_) {
FunctionSet::iterator existing_function = std::find_if(
functions_.begin(), functions_.end(),
[&](Function* other) { return other->address == function->address; });
if (existing_function != functions_.end()) {
(*existing_function)->is_multiple = true;
// Free the duplicate that was not inserted because this Module
// now owns it.
return false;
}
std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function);
// We just checked!
assert(ret.second);
} else {
std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function);
if (!ret.second && (*ret.first != function)) {
// Free the duplicate that was not inserted because this Module
// now owns it.
return false;
}
}
return true;
}
@ -188,7 +212,8 @@ void Module::AddExtern(Extern* ext) {
}
std::pair<ExternSet::iterator,bool> ret = externs_.insert(ext);
if (!ret.second) {
if (!ret.second && enable_multiple_field_) {
(*ret.first)->is_multiple = true;
// Free the duplicate that was not inserted because this Module
// now owns it.
delete ext;
@ -378,11 +403,10 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) {
vector<Line>::iterator line_it = func->lines.begin();
for (auto range_it = func->ranges.cbegin();
range_it != func->ranges.cend(); ++range_it) {
stream << "FUNC " << hex
<< (range_it->address - load_address_) << " "
<< range_it->size << " "
<< func->parameter_size << " "
<< func->name << dec << "\n";
stream << "FUNC " << (func->is_multiple ? "m " : "") << hex
<< (range_it->address - load_address_) << " " << range_it->size
<< " " << func->parameter_size << " " << func->name << dec
<< "\n";
if (!stream.good())
return ReportError();
@ -422,9 +446,9 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) {
for (ExternSet::const_iterator extern_it = externs_.begin();
extern_it != externs_.end(); ++extern_it) {
Extern* ext = *extern_it;
stream << "PUBLIC " << hex
<< (ext->address - load_address_) << " 0 "
<< ext->name << dec << "\n";
stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex
<< (ext->address - load_address_) << " 0 " << ext->name << dec
<< "\n";
}
}

View file

@ -128,6 +128,9 @@ class Module {
// Inlined call sites belonging to this functions.
vector<std::unique_ptr<Inline>> inlines;
// If this symbol has been folded with other symbols in the linked binary.
bool is_multiple = false;
};
struct InlineOrigin {
@ -241,6 +244,8 @@ class Module {
explicit Extern(const Address& address_input) : address(address_input) {}
const Address address;
string name;
// If this symbol has been folded with other symbols in the linked binary.
bool is_multiple = false;
};
// A map from register names to postfix expressions that recover
@ -294,8 +299,14 @@ class Module {
// Create a new module with the given name, operating system,
// architecture, and ID string.
Module(const string& name, const string& os, const string& architecture,
const string& id, const string& code_id = "");
// NB: `enable_multiple_field` is temporary while transitioning to enabling
// writing the multiple field permanently.
Module(const string& name,
const string& os,
const string& architecture,
const string& id,
const string& code_id = "",
bool enable_multiple_field = false);
~Module();
// Set the module's load address to LOAD_ADDRESS; addresses given
@ -471,6 +482,13 @@ class Module {
ExternSet externs_;
unordered_set<string> common_strings_;
// Whether symbols sharing an address should be collapsed into a single entry
// and marked with an `m` in the output. See
// https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs
// at
// https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
bool enable_multiple_field_;
};
} // namespace google_breakpad

View file

@ -506,6 +506,34 @@ TEST(Module, ConstructFunctionsWithSameAddress) {
contents.c_str());
}
// If multiple fields are enabled, only one function is included per address.
// The entry will be tagged with `m` to show that there are multiple symbols
// at that address.
// TODO(lgrey): Remove the non-multiple versions of these tests and remove the
// suffixes from the suffxed ones when removing `enable_multiple_field_`.
TEST(Module, ConstructFunctionsWithSameAddressMultiple) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true);
// Two functions.
Module::Function* function1 = generate_duplicate_function("_without_form");
Module::Function* function2 = generate_duplicate_function("_and_void");
m.AddFunction(function1);
// If this succeeds, we'll have a double-free with the `delete` below. Avoid
// that.
ASSERT_FALSE(m.AddFunction(function2));
delete function2;
m.Write(s, ALL_SYMBOL_DATA);
string contents = s.str();
EXPECT_STREQ(
"MODULE os-name architecture id-string name with spaces\n"
"FUNC m d35402aac7a7ad5c 200b26e605f99071 f14ac4fed48c4a99"
" _without_form\n",
contents.c_str());
}
// Externs should be written out as PUBLIC records, sorted by
// address.
TEST(Module, ConstructExterns) {
@ -554,6 +582,29 @@ TEST(Module, ConstructDuplicateExterns) {
"PUBLIC ffff 0 _xyz\n",
contents.c_str());
}
// Externs with the same address have the `m` tag if the multiple field are
// enabled.
TEST(Module, ConstructDuplicateExternsMultiple) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true);
// Two externs.
Module::Extern* extern1 = new Module::Extern(0xffff);
extern1->name = "_xyz";
Module::Extern* extern2 = new Module::Extern(0xffff);
extern2->name = "_abc";
m.AddExtern(extern1);
m.AddExtern(extern2);
m.Write(s, ALL_SYMBOL_DATA);
string contents = s.str();
EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME
"\n"
"PUBLIC m ffff 0 _xyz\n",
contents.c_str());
}
// If there exists an extern and a function at the same address, only write
// out the FUNC entry.
@ -586,6 +637,37 @@ TEST(Module, ConstructFunctionsAndExternsWithSameAddress) {
contents.c_str());
}
// If there exists an extern and a function at the same address, only write
// out the FUNC entry, and mark it with `m` if the multiple field is enabled.
TEST(Module, ConstructFunctionsAndExternsWithSameAddressMultiple) {
stringstream s;
Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID, "", true);
// Two externs.
Module::Extern* extern1 = new Module::Extern(0xabc0);
extern1->name = "abc";
Module::Extern* extern2 = new Module::Extern(0xfff0);
extern2->name = "xyz";
m.AddExtern(extern1);
m.AddExtern(extern2);
Module::Function* function = new Module::Function("_xyz", 0xfff0);
Module::Range range(0xfff0, 0x10);
function->ranges.push_back(range);
function->parameter_size = 0;
m.AddFunction(function);
m.Write(s, ALL_SYMBOL_DATA);
string contents = s.str();
EXPECT_STREQ("MODULE " MODULE_OS " " MODULE_ARCH " " MODULE_ID " " MODULE_NAME
"\n"
"FUNC m fff0 10 0 _xyz\n"
"PUBLIC abc0 0 abc\n",
contents.c_str());
}
// If there exists an extern and a function at the same address, only write
// out the FUNC entry. For ARM THUMB, the extern that comes from the ELF
// symbol section has bit 0 set.