summaryrefslogtreecommitdiffstats
path: root/src/dwarf2pdb.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dwarf2pdb.cpp')
-rw-r--r--src/dwarf2pdb.cpp521
1 files changed, 465 insertions, 56 deletions
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 3019856..b97dcb9 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -695,6 +695,147 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo)
cbUdtSymbols += len;
}
+// Helper to format a fully qualified proc name like 'some_ns::Foo::Foo' since
+// for a Foo constructor in a Foo class in a namespace called "some_ns".
+// PDBs require fully qualified names in their symbols.
+// TODO: better error handling for out of space.
+void CV2PDB::formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, size_t cbBuf) const {
+ if (node->specification) {
+ // If the proc has a "specification", i.e. a declaration, use it instead
+ // of the definition, as it has a proper hierarchy connected to it
+ // which will give us a proper fully-qualified name like Foo::Foo
+ // instead of just Foo.
+ const DWARF_InfoData* entry = findEntryByPtr(node->specification);
+ if (entry) {
+ node = entry;
+ }
+ } else {
+ // Find the node's entry in the DWARF tree. We can't use 'node' as is because
+ // it is a local copy without linkage into the tree, as it comes from
+ // the 2nd pass scan after the tree is already built.
+ const DWARF_InfoData* entry = findEntryByPtr(node->entryPtr);
+ assert(entry); // how can it not exist? Bug in tree construction.
+ node = entry;
+ }
+ DWARF_InfoData* parent = node->parent;
+ std::vector<const DWARF_InfoData*> segments;
+ segments.push_back(node);
+
+ // Accumulate all the valid parent scopes so that we can reverse them for
+ // formatting.
+ while (parent) {
+ switch (parent->tag) {
+ // TODO: are there any other kinds of valid parents?
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_namespace:
+ segments.push_back(parent);
+ break;
+ default:
+ break;
+ }
+ parent = parent->parent;
+ }
+
+ int remain = cbBuf;
+ char* p = buf;
+
+ // Format the parents in reverse order with :: operator in between.
+ for (int i = segments.size() - 1; i >= 0; --i) {
+ const char* name = segments[i]->name;
+ char nameBuf[64] = {};
+ int nameLen = 0;
+ if (!segments[i]->name) {
+ // This segment has no name. This could be because it is part of
+ // an anonymous class, which often happens for lambda expressions.
+ // Generate a unique anonymous name for it.
+ nameLen = sprintf_s(nameBuf, "[anon_%x]", segments[i]->entryOff);
+ if (nameLen < 0) {
+ // Formatting failed. Try a default name.
+ assert(false); // crash in debug builds.
+ name = "[anon]";
+ }
+ name = nameBuf;
+ } else {
+ nameLen = strlen(name);
+ }
+ if (remain < nameLen) {
+ fprintf(stderr, "unable to fit full symbol name: %s\n", node->name);
+ return;
+ }
+
+ memcpy(p, name, nameLen);
+
+ p += nameLen;
+ remain -= nameLen;
+
+ if (i > 0) {
+ // Append :: separator
+ if (remain < 2) {
+ fprintf(stderr, "unable to fit full symbol name (:: separator): %s\n", node->name);
+ return;
+ }
+ *p++ = ':';
+ *p++ = ':';
+ remain -= 2;
+ }
+ }
+
+ if (remain > 0) {
+ *p = 0; // NUL terminate.
+ }
+}
+
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
+
+// Find the source of an inlined function by following its 'abstract_origin'
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
+{
+ DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+ if (!abstractOrigin) {
+ // Could not find abstract origin. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
+ // assert(id.tag == idspec.tag);
+
+ if (abstractOrigin->abstract_origin)
+ mergeAbstractOrigin(*abstractOrigin, context);
+ if (abstractOrigin->specification)
+ mergeSpecification(*abstractOrigin, context);
+ id.merge(*abstractOrigin);
+}
+
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
+{
+ DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+ if (!idspec) {
+ // Could not find decl for this definition. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
+ // assert(id.tag == idspec.tag);
+
+ if (idspec->abstract_origin)
+ mergeAbstractOrigin(*idspec, context);
+ if (idspec->specification) {
+ mergeSpecification(*idspec, context);
+ }
+ id.merge(*idspec);
+}
+
bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
{
unsigned int pclo = ranges.front().pclo - codeSegOff;
@@ -723,8 +864,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
cvs->proc_v2.flags = 0;
// printf("GlobalPROC %s\n", procid.name);
-
- len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, procid.name);
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&procid, namebuf, sizeof namebuf);
+ len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
for (; len & (align-1); len++)
udtSymbols[cbUdtSymbols + len] = 0xf4 - (len & 3);
@@ -762,8 +904,13 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
DWARF_InfoData id;
int off = 8;
+ // Save off the cursor to the start of the proc.
DIECursor prev = cursor;
- while (cursor.readNext(id, true))
+
+ // First, collect all the formal parameters of the proc.
+ // Don't worry about storing these in the tree as we're not going to need
+ // to generate fully-qualified names like we would for functions/classes.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_formal_parameter && id.name)
{
@@ -778,7 +925,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
}
appendEndArg();
+ // Now, collect all the lexical blocks and their stack variables.
std::vector<DIECursor> lexicalBlocks;
+
+ // Start from the proc base, and push all nested lexical blocks as you
+ // encounter them.
lexicalBlocks.push_back(prev);
while (!lexicalBlocks.empty())
@@ -786,7 +937,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
cursor = lexicalBlocks.back();
lexicalBlocks.pop_back();
- while (cursor.readNext(id))
+ while (cursor.readNext(&id))
{
if (id.tag == DW_TAG_lexical_block)
{
@@ -813,15 +964,23 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
{
appendLexicalBlock(id, pclo + codeSegOff);
DIECursor next = cursor;
+
+ // Compute the sibling node of this lexical block.
next.gotoSibling();
assert(lexicalBlocks.empty() || next.ptr <= lexicalBlocks.back().ptr);
+
+ // Append the next lexical block to the list of blocks
+ // to scan later.
lexicalBlocks.push_back(next);
+
+ // But for now, scan down the current lexical block.
cursor = cursor.getSubtreeCursor();
continue;
}
}
else if (id.tag == DW_TAG_variable)
{
+ // Found a local variable.
if (id.name && (id.location.type == ExprLoc || id.location.type == Block))
{
Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset)
@@ -864,14 +1023,15 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
return true;
}
-int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff, int flStart)
+// Only looks at DW_TAG_member and DW_TAG_inheritance
+int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int baseoff, int flStart)
{
bool isunion = structid.tag == DW_TAG_union_type;
int nfields = 0;
- // cursor points to the first member
+ // cursor points to the first member of the class/struct/union.
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -905,12 +1065,12 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
// if it doesn't have a name, and it's a struct or union, embed it directly
DIECursor membercursor(cursor, id.type);
DWARF_InfoData memberid;
- if (membercursor.readNext(memberid))
+ if (membercursor.readNext(&memberid))
{
if (memberid.abstract_origin)
- mergeAbstractOrigin(memberid, cursor);
+ mergeAbstractOrigin(memberid, *this);
if (memberid.specification)
- mergeSpecification(memberid, cursor);
+ mergeSpecification(memberid, *this);
int cvtype = -1;
switch (memberid.tag)
@@ -953,6 +1113,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
return nfields;
}
+// Add a class/struct/union to the database.
int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
{
//printf("Adding struct %s, entryoff %d, abbrev %d\n", structid.name, structid.entryOff, structid.abbrev);
@@ -990,25 +1151,29 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
checkUserTypeAlloc(kMaxNameLen + 100);
codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
- const char* name = (structid.name ? structid.name : "__noname");
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&structid, namebuf, sizeof namebuf);
int attr = fieldlistType ? 0 : kPropIncomplete;
- int len = addAggregate(cvt, false, nfields, fieldlistType, attr, 0, 0, structid.byte_size, name, nullptr);
+ int len = addAggregate(cvt, structid.tag == DW_TAG_class_type, nfields, fieldlistType, attr, 0, 0, structid.byte_size, namebuf, nullptr);
cbUserTypes += len;
//ensureUDT()?
int cvtype = nextUserType++;
- addUdtSymbol(cvtype, name);
+ addUdtSymbol(cvtype, namebuf);
return cvtype;
}
-void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
+// Compute the array bounds of the DIE at the given 'cursor'.
+void CV2PDB::getDWARFArrayBounds(DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
{
DWARF_InfoData id;
// TODO: handle multi-dimensional arrays
if (cursor.cu)
{
- while (cursor.readNext(id, true))
+ // Don't insert these elements into the DB. We're just using it for
+ // array bounds calculation.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_subrange_type)
{
@@ -1041,6 +1206,7 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& p
upperBound = subrangeid.upper_bound;
}
+// Compute a type ID for a basic DWARF type.
int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
{
int type = 0, mode = 0, size = 0;
@@ -1103,10 +1269,13 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
return translateType(t);
}
-int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor)
+// TODO: Array wanted to be scanned twice due to DW_TAG_subrange_type being looked at
+// in the caller. See if it can be handled in a single place for clarity, simplicity & efficiency.
+// Goal: don't rescan the same DIE twice.
+int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor)
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
checkUserTypeAlloc(kMaxNameLen + 100);
codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
@@ -1204,10 +1373,10 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
/* Enumerated types are described in CodeView with two components:
1. A LF_ENUM leaf, representing the type itself. We put this one in the
- userTypes buffer.
+ userTypes buffer.
2. One or several LF_FIELDLIST records, to contain the list of
- enumerators (name and value) associated to the enum type
+ enumerators (name and value) associated to the enum type
(LF_ENUMERATE leaves). As type records cannot be larger 2**16 bytes,
we need to create multiple records when there are too many
enumerators. The first record contains the first LF_ENUMERATE leaves,
@@ -1242,7 +1411,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
/* Now fill this field list with the enumerators we find in DWARF. */
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_enumerator && id.has_const_value)
{
@@ -1253,7 +1422,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
int len = addFieldEnumerate(dfieldtype, id.name, id.const_value);
/* If adding this enumerate leaves no room for a LF_INDEX leaf,
- create a new LF_FIELDLIST record now. */
+ create a new LF_FIELDLIST record now. */
if (fieldlistLength + len + sizeof(dfieldtype->index_v2) > 0xffff)
{
/* Append the LF_INDEX leaf. */
@@ -1306,34 +1475,148 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
/* Now the LF_FIELDLIST is ready, create the LF_ENUM type record itself. */
checkUserTypeAlloc();
- int basetype = (enumid.type != 0)
- ? getTypeByDWARFPtr(enumid.type)
- : getDWARFBasicType(enumid.encoding, enumid.byte_size);
+ const DWARF_InfoData* entry = findEntryByPtr(enumid.entryPtr);
+ int prop = 0;
+ if (entry && entry->parent) {
+ int tag = entry->parent->tag;
+ if (tag == DW_TAG_class_type ||
+ tag == DW_TAG_structure_type ||
+ tag == DW_TAG_union_type)
+ {
+ prop |= kPropIsNested;
+ }
+ }
+
+ // NOTE: WinDbg/VS Dbg expects enum types to be base types, not indirect
+ // refs/UDTs.
+ //
+ // Compute the best base/underlying type to use.
+ int encoding = DW_ATE_signed; // default to int
+ const DWARF_InfoData* typeEntry = findEntryByPtr(enumid.type);
+ const DWARF_InfoData* t = typeEntry;
+
+ // Follow all the parent types to get to the base UDT.
+ while (t) {
+ t = findEntryByPtr(t->type);
+ if (t) typeEntry = t;
+ }
+
+ if (typeEntry) {
+ encoding = typeEntry->encoding;
+ assert(typeEntry->byte_size == enumid.byte_size);
+ }
+
+ const int basetype = getDWARFBasicType(encoding, enumid.byte_size);
+
dtype = (codeview_type*)(userTypes + cbUserTypes);
- const char* name = (enumid.name ? enumid.name : "__noname");
- cbUserTypes += addEnum(dtype, count, firstFieldlistType, 0, basetype, name);
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&enumid, namebuf, sizeof namebuf);
+ cbUserTypes += addEnum(dtype, count, firstFieldlistType, prop, basetype, namebuf);
int enumType = nextUserType++;
- addUdtSymbol(enumType, name);
+ addUdtSymbol(enumType, namebuf);
return enumType;
}
-int CV2PDB::getTypeByDWARFPtr(byte* ptr)
+// Try to find or compute the "best" CV TypeID for a given DIE found by following
+// a DW_AT_type attribute or its closest counterpart.
+int CV2PDB::getTypeByDWARFPtr(byte* typePtr)
{
- if (ptr == nullptr)
- return 0x03; // void
- std::unordered_map<byte*, int>::iterator it = mapOffsetToType.find(ptr);
- if (it == mapOffsetToType.end())
- return 0x03; // void
- return it->second;
+ if (typePtr == nullptr)
+ return T_NOTYPE;
+
+ // First just attempt to find the type entry directly.
+ int ret = findTypeIdByPtr(typePtr);
+ if (!ret) {
+ // TypeID was not found in the map. This may be due to struct
+ // decl / definition consolidation. I.e. we don't emit the struct decl
+ // because they show up as "empty" structs (devoid of members).
+ // Try to match this against the logically equivalent "definition"
+ // type.
+ DWARF_InfoData* entry = findEntryByPtr(typePtr);
+ assert(entry); // how can the entry not exist in the map?
+
+ // Skip anonymous structures or similar.
+ if (!entry || !entry->name) {
+ return T_NOTYPE;
+ }
+
+ // See if there exists another "logically equivalent" entry in the tree.
+ //
+ // First, find all entries with the same (local) name as this type.
+ auto range = mapEntryNameToEntries.equal_range(entry->name);
+ for (auto it = range.first; it != range.second; ++it) {
+ DWARF_InfoData* candidate = it->second;
+
+ // Skip self.
+ if (candidate == entry) {
+ continue;
+ }
+
+ // Skip declarations (as when they are of structs, they don't help.
+ // We want definitions only as they define the fields in DWARF.)
+ if (candidate->isDecl) {
+ continue;
+ }
+
+ // Filter nodes based on the matching tag.
+ if (candidate->tag != entry->tag) {
+ continue;
+ }
+
+ // Found a matching tag for this element. Walk up the tree and check
+ // if all parent tags and names match also. If they do, we found an
+ // "equivalent" node to 'typePtr' one that wasn't added to the
+ // typeID registry (because it was likely a decl that we filtered out)
+ DWARF_InfoData* candidateParent = candidate->parent;
+ DWARF_InfoData* entryParent = entry->parent;
+
+ bool equivalentHierarchy = true;
+ while (candidateParent && entryParent) {
+ if (candidateParent->tag != entryParent->tag) {
+ // Tag mismatch.
+ equivalentHierarchy = false;
+ break;
+ }
+
+ // Skip CUs as of course they have different names. We only
+ // care about namespaces, other containing structs, classes, etc.
+ // Entries have the same tag. Checking one is sufficient.
+ if (entryParent->tag != DW_TAG_compile_unit) {
+
+ if (strcmp(candidateParent->name, entryParent->name)) {
+ // Name mismatch.
+ equivalentHierarchy = false;
+ break;
+ }
+ }
+
+ candidateParent = candidateParent->parent;
+ entryParent = entryParent->parent;
+ }
+
+ if (equivalentHierarchy) {
+ // Try another lookup with this new candidate.
+ ret = findTypeIdByPtr(candidate->entryPtr);
+ assert(ret); // how can it now be in the map?
+ } else {
+ fprintf(stderr, "warn: could not find equivalent entry for typePtr %p\n", typePtr);
+ }
+ }
+ }
+ return ret;
}
+// Get the logical size of a DWARF type, starting from 'typePtr' and recursing
+// if necessary. E.g. for arrays.
int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
{
DWARF_InfoData id;
DIECursor cursor(parent, typePtr);
- if (!cursor.readNext(id))
+ // Don't allocate this into the tree since we're just interested
+ // in computing a type.
+ if (!cursor.readNext(&id))
return 0;
if(id.byte_size > 0)
@@ -1348,7 +1631,7 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
case DW_TAG_array_type:
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type);
}
default:
@@ -1359,6 +1642,10 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
return 0;
}
+// Scan the .debug_info section and allocate type IDs for each unique type and
+// create a mapping to look them up by their address.
+// This is the first pass scan that builds up the DWARF tree. The second pass (createTypes)
+// emits the actual PDB symbols.
bool CV2PDB::mapTypes()
{
int typeID = nextUserType;
@@ -1367,13 +1654,21 @@ bool CV2PDB::mapTypes()
if (debug & DbgBasic)
fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
+ // Maintain the first node of each CU to ensure all of them get linked.
+ DWARF_InfoData* firstNode = nullptr;
+
+ // Scan each compilation unit in '.debug_info'.
while (off < img.debug_info.length)
{
DWARF_CompilationUnitInfo cu{};
+
+ // Read the next compilation unit from 'off' and update it to the next
+ // CU.
byte* ptr = cu.read(debug, img, &off);
if (!ptr)
continue;
+ // We only support regular full 'DW_UT_compile' compilation units.
if (cu.unit_type != DW_UT_compile) {
if (debug & DbgDwarfCompilationUnit)
fprintf(stderr, "%s:%d: skipping compilation unit offs=%x, unit_type=%d\n", __FUNCTION__, __LINE__,
@@ -1383,28 +1678,57 @@ bool CV2PDB::mapTypes()
}
DIECursor cursor(&cu, ptr);
- DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Set up link to ensure this CU links to the prior one.
+ cursor.prevNode = firstNode;
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
+ // Start scanning this CU from the beginning and *build a tree of DIE nodes*.
+ while ((node = cursor.readNext(nullptr)) != nullptr)
{
+ DWARF_InfoData& id = *node;
+
+ // Initialize the head of the DWARF DIE list the first time.
+ if (!dwarfHead) {
+ dwarfHead = node;
+ }
+
+ if (!setFirstNode) {
+ firstNode = node;
+ setFirstNode = true;
+ }
+
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Insert the node into the entryPtr-based index.
+ mapEntryPtrToEntry[node->entryPtr] = node;
+
+ // Insert named nodes into the name-based index.
+ if (node->name) {
+ mapEntryNameToEntries.insert({ node->name, node });
+ }
+
switch (id.tag)
{
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_union_type:
+ // skip generating a typeID for declaration flavor of
+ // class/struct/union since we don't emit the PDB symbol
+ // for them. See related code in CV2PDB::createTypes().
+ if (id.isDecl) continue;
case DW_TAG_base_type:
case DW_TAG_typedef:
case DW_TAG_pointer_type:
case DW_TAG_subroutine_type:
case DW_TAG_array_type:
case DW_TAG_const_type:
- case DW_TAG_structure_type:
case DW_TAG_reference_type:
-
- case DW_TAG_class_type:
case DW_TAG_enumeration_type:
case DW_TAG_string_type:
- case DW_TAG_union_type:
case DW_TAG_ptr_to_member_type:
case DW_TAG_set_type:
case DW_TAG_subrange_type:
@@ -1418,20 +1742,22 @@ bool CV2PDB::mapTypes()
case DW_TAG_mutable_type: // withdrawn
case DW_TAG_shared_type:
case DW_TAG_rvalue_reference_type:
- mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
+ // Reserve a typeID and store it in the map for quick lookup.
+ mapEntryPtrToTypeID.insert(std::make_pair(id.entryPtr, typeID));
typeID++;
}
}
}
if (debug & DbgBasic)
- fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size());
+ fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapEntryPtrToTypeID.size());
nextDwarfType = typeID;
- assert(nextDwarfType == nextUserType + mapOffsetToType.size());
+ assert(nextDwarfType == nextUserType + mapEntryPtrToTypeID.size());
return true;
}
+// Walks the .debug_info section and builds a DIE tree.
bool CV2PDB::createTypes()
{
img.createSymbolCache();
@@ -1444,9 +1770,14 @@ bool CV2PDB::createTypes()
fprintf(stderr, "%s:%d: createTypes()\n", __FUNCTION__, __LINE__);
unsigned long off = 0;
+
+ // Scan each compilation unit in '.debug_info'.
while (off < img.debug_info.length)
{
DWARF_CompilationUnitInfo cu{};
+
+ // Read the next compilation unit from 'off' and update it to the next
+ // CU, returning the pointer just beyond the header to the first DIE.
byte* ptr = cu.read(debug, img, &off);
if (!ptr)
continue;
@@ -1460,17 +1791,24 @@ bool CV2PDB::createTypes()
}
DIECursor cursor(&cu, ptr);
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Scan the DIEs in this CU, reusing the elements.
+ while (cursor.readNext(&id))
{
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Merge in related entries. This relies on the DWARF tree having been built
+ // in the first pass (mapTypes).
if (id.abstract_origin)
- mergeAbstractOrigin(id, cursor);
+ mergeAbstractOrigin(id, *this);
if (id.specification)
- mergeSpecification(id, cursor);
+ mergeSpecification(id, *this);
int cvtype = -1;
switch (id.tag)
@@ -1501,14 +1839,23 @@ bool CV2PDB::createTypes()
case DW_TAG_class_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
- cvtype = addDWARFStructure(id, cursor.getSubtreeCursor());
+ if (!id.isDecl) {
+ // Only export the non-declaration version of structs/classes.
+ // DWARF emits multiple copies of the same class, some of
+ // which are marked as declarations and lack members, resulting
+ // in an empty struct UDT in the PDB. Then when we encounter
+ // the non-declaration copy we emit it again, but now we
+ // end up with multiple copies of the same UDT in the PDB
+ // and the debugger gets confused.
+ cvtype = addDWARFStructure(id, cursor);
+ }
break;
case DW_TAG_array_type:
- cvtype = addDWARFArray(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFArray(id, cursor);
break;
case DW_TAG_enumeration_type:
- cvtype = addDWARFEnum(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFEnum(id, cursor);
break;
case DW_TAG_subroutine_type:
@@ -1542,7 +1889,17 @@ bool CV2PDB::createTypes()
mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0);
}
- addDWARFProc(id, ranges, cursor.getSubtreeCursor());
+
+ // Only add the definition, not declaration, because
+ // MSVC toolset only produces a single symbol for
+ // each function and will get confused if there are
+ // 2 PDB symbols for the same routine.
+ //
+ // TODO: Add more type info to the routine. Today we
+ // expose it as "T_NOTYPE" when we could do better.
+ if (!id.isDecl) {
+ addDWARFProc(id, ranges, cursor);
+ }
}
}
break;
@@ -1649,18 +2006,42 @@ bool CV2PDB::createTypes()
if (cvtype >= 0)
{
- assert(cvtype == typeID); typeID++;
- assert(mapOffsetToType[id.entryPtr] == cvtype);
+ assert(cvtype == typeID);
+ typeID++;
+
+ assert(mapEntryPtrToTypeID[id.entryPtr] == cvtype);
assert(typeID == nextUserType);
}
}
}
assert(typeID == nextUserType);
- assert(typeID == firstUserType + mapOffsetToType.size());
+ assert(typeID == firstUserType + mapEntryPtrToTypeID.size());
return true;
}
+void printIndent(int level) {
+ for (int i = 0; i < level; ++i) {
+ printf(" ");
+ }
+}
+
+void dumpTreeHelper(DWARF_InfoData* node, int level) {
+ for (DWARF_InfoData* n = node; n; n = n->next) {
+ const unsigned dieOffset = n->img->debug_info.sectOff(n->entryPtr);
+
+ printIndent(level);
+ printf("offset: %#x, name: \"%s\", tag: %#x, abbrev: %d\n", dieOffset, n->name, n->tag, n->code);
+
+ // Visit the children.
+ dumpTreeHelper(n->children, level + 1);
+ }
+}
+
+void CV2PDB::dumpDwarfTree() const {
+ dumpTreeHelper(dwarfHead, 0);
+}
+
bool CV2PDB::createDWARFModules()
{
if(!img.debug_info.isPresent())
@@ -1709,6 +2090,10 @@ bool CV2PDB::createDWARFModules()
if (!createTypes())
return false;
+ if (debug & DbgPrintDwarfTree) {
+ dumpDwarfTree();
+ }
+
/*
if(!iterateDWARFDebugInfo(kOpMapTypes))
return false;
@@ -1758,12 +2143,36 @@ bool CV2PDB::addDWARFPublics()
mspdb::Mod* mod = globalMod();
int type = 0;
- int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, 0x1000);
+ int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, BASE_USER_TYPE);
if (rc <= 0)
return setError("cannot add public");
return true;
}
+// Try to lookup a DWARF_InfoData in the constructed DWARF tree given its
+// "entryPtr". I.e. its memory-mapped location in the loaded PE image buffer.
+DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
+{
+ auto it = mapEntryPtrToEntry.find(entryPtr);
+ if (it == mapEntryPtrToEntry.end()) {
+ // Could not find decl for this definition.
+ return nullptr;
+ }
+ return it->second;
+}
+
+// Try to lookup a TypeID in the set of registered types by a
+// "typePtr". I.e. its memory-mapped location in the loaded PE image buffer.
+int CV2PDB::findTypeIdByPtr(byte* typePtr) const
+{
+ auto it = mapEntryPtrToTypeID.find(typePtr);
+ if (it == mapEntryPtrToTypeID.end()) {
+ // Could not find type for this definition.
+ return T_NOTYPE;
+ }
+ return it->second;
+}
+
bool CV2PDB::writeDWARFImage(const TCHAR* opath)
{
int len = sizeof(*rsds) + strlen((char*)(rsds + 1)) + 1;