summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/PEImage.h10
-rw-r--r--src/cv2pdb.cpp3
-rw-r--r--src/cv2pdb.h21
-rw-r--r--src/dwarf2pdb.cpp252
-rw-r--r--src/readDwarf.cpp202
-rw-r--r--src/readDwarf.h65
6 files changed, 443 insertions, 110 deletions
diff --git a/src/PEImage.h b/src/PEImage.h
index 3ae00bd..a809523 100644
--- a/src/PEImage.h
+++ b/src/PEImage.h
@@ -178,11 +178,16 @@ private:
template<typename SYM> const char* t_findSectionSymbolName(int s) const;
+ // File handle to PE image.
int fd;
+
+ // Pointer to in-memory buffer containing loaded PE image.
void* dump_base;
+
+ // Size of `dump_base` in bytes.
int dump_total_len;
- // codeview
+ // codeview fields
IMAGE_DOS_HEADER *dos;
IMAGE_NT_HEADERS32* hdr32;
IMAGE_NT_HEADERS64* hdr64;
@@ -200,7 +205,8 @@ private:
std::unordered_map<std::string, SymbolInfo> symbolCache;
public:
- //dwarf
+ // dwarf fields
+ // List of DWARF section descriptors.
#define EXPANDSEC(name) PESection name;
SECTION_LIST()
#undef EXPANDSEC
diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp
index 1ba7a94..704da4c 100644
--- a/src/cv2pdb.cpp
+++ b/src/cv2pdb.cpp
@@ -897,6 +897,7 @@ void CV2PDB::checkGlobalTypeAlloc(int size, int add)
}
}
+// Get the CodeView type descriptor for the given type ID.
// CV-only. Returns NULL for DWARF-based images.
const codeview_type* CV2PDB::getTypeData(int type)
{
@@ -913,6 +914,7 @@ const codeview_type* CV2PDB::getTypeData(int type)
return (codeview_type*)(typeData + offset[type - BASE_USER_TYPE]);
}
+// CV-only. Never called for DWARF.
const codeview_type* CV2PDB::getUserTypeData(int type)
{
type -= BASE_USER_TYPE + globalTypeHeader->cTypes;
@@ -2116,6 +2118,7 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation)
return typedefType;
}
+// CV-only.
void CV2PDB::appendTypedefs()
{
if(Dversion == 0)
diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index 654470b..e5e8144 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -169,17 +169,23 @@ public:
bool addDWARFLines();
bool addDWARFPublics();
bool writeDWARFImage(const TCHAR* opath);
+ DWARF_InfoData* findEntryByPtr(byte* entryPtr) const;
+
+ // Helper to just print the DWARF tree we've built for debugging purposes.
+ void dumpDwarfTree() const;
bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi);
bool addDWARFProc(DWARF_InfoData& id, const std::vector<RangeEntry> &ranges, DIECursor cursor);
+ void formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const;
+
int addDWARFStructure(DWARF_InfoData& id, DIECursor cursor);
- int addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int off, int flStart);
- int addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor);
+ int addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int off, int flStart);
+ int addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor);
int addDWARFBasicType(const char*name, int encoding, int byte_size);
int addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor);
int getTypeByDWARFPtr(byte* ptr);
int getDWARFTypeSize(const DIECursor& parent, byte* ptr);
- void getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor,
+ void getDWARFArrayBounds(DIECursor cursor,
int& basetype, int& lowerBound, int& upperBound);
void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent,
int& basetype, int& lowerBound, int& upperBound);
@@ -278,7 +284,14 @@ public:
// DWARF
int codeSegOff;
- std::unordered_map<byte*, int> mapOffsetToType;
+
+ // Lookup table for type IDs based on the DWARF_InfoData::entryPtr
+ std::unordered_map<byte*, int> mapEntryPtrToTypeID;
+ // Lookup table for entries based on the DWARF_InfoData::entryPtr
+ std::unordered_map<byte*, DWARF_InfoData*> mapEntryPtrToEntry;
+
+ // Head of list of DWARF DIE nodes.
+ DWARF_InfoData* dwarfHead = nullptr;
// Default lower bound for the current compilation unit. This depends on
// the language of the current unit.
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 33c8af6..763be85 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -695,6 +695,74 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo)
cbUdtSymbols += len;
}
+// Helper to format a fully qualified proc name like 'some_ns::Foo::Foo' since
+// for a Foo constructor in a Foo class in a namespace called "some_ns".
+// PDBs require fully qualified names in their symbols.
+// TODO: better error handling for out of space.
+void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const {
+ if (proc->specification) {
+ // If the proc has a "specification", i.e. a declaration, use it instead
+ // of the definition, as it has a proper hierarchy connected to it
+ // which will give us a proper fully-qualified name like Foo::Foo
+ // instead of just Foo.
+ const DWARF_InfoData* entry = findEntryByPtr(proc->specification);
+ if (entry) {
+ proc = entry;
+ }
+ }
+ DWARF_InfoData* parent = proc->parent;
+ std::vector<const DWARF_InfoData*> segments;
+ segments.push_back(proc);
+
+ // Accumulate all the valid parent scopes so that we can reverse them for
+ // formatting.
+ while (parent) {
+ switch (parent->tag) {
+ // TODO: are there any other kinds of valid parents?
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_namespace:
+ segments.push_back(parent);
+ break;
+ default:
+ break;
+ }
+ parent = parent->parent;
+ }
+
+ int remain = cbBuf;
+ char* p = buf;
+
+ // Format the parents in reverse order with :: operator in between.
+ for (int i = segments.size() - 1; i >= 0; --i) {
+ const int nameLen = strlen(segments[i]->name);
+ if (remain < nameLen) {
+ fprintf(stderr, "unable to fit full proc name: %s\n", proc->name);
+ return;
+ }
+
+ memcpy(p, segments[i]->name, nameLen);
+
+ p += nameLen;
+ remain -= nameLen;
+
+ if (i > 0) {
+ // Append :: separator
+ if (remain < 2) {
+ fprintf(stderr, "unable to fit full proc name (:: separator): %s\n", proc->name);
+ return;
+ }
+ *p++ = ':';
+ *p++ = ':';
+ remain -= 2;
+ }
+ }
+
+ if (remain > 0) {
+ *p = 0; // NUL terminate.
+ }
+}
+
bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
{
unsigned int pclo = ranges.front().pclo - codeSegOff;
@@ -723,8 +791,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
cvs->proc_v2.flags = 0;
// printf("GlobalPROC %s\n", procid.name);
-
- len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, procid.name);
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedProcName(&procid, namebuf, sizeof namebuf);
+ len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
for (; len & (align-1); len++)
udtSymbols[cbUdtSymbols + len] = 0xf4 - (len & 3);
@@ -762,8 +831,13 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
DWARF_InfoData id;
int off = 8;
+ // Save off the cursor to the start of the proc.
DIECursor prev = cursor;
- while (cursor.readNext(id, true))
+
+ // First, collect all the formal parameters of the proc.
+ // Don't worry about storing these in the tree as we're not going to need
+ // to generate fully-qualified names like we would for functions/classes.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_formal_parameter && id.name)
{
@@ -778,7 +852,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
}
appendEndArg();
+ // Now, collect all the lexical blocks and their stack variables.
std::vector<DIECursor> lexicalBlocks;
+
+ // Start from the proc base, and push all nested lexical blocks as you
+ // encounter them.
lexicalBlocks.push_back(prev);
while (!lexicalBlocks.empty())
@@ -786,7 +864,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
cursor = lexicalBlocks.back();
lexicalBlocks.pop_back();
- while (cursor.readNext(id))
+ while (cursor.readNext(&id))
{
if (id.tag == DW_TAG_lexical_block)
{
@@ -813,15 +891,23 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
{
appendLexicalBlock(id, pclo + codeSegOff);
DIECursor next = cursor;
+
+ // Compute the sibling node of this lexical block.
next.gotoSibling();
assert(lexicalBlocks.empty() || next.ptr <= lexicalBlocks.back().ptr);
+
+ // Append the next lexical block to the list of blocks
+ // to scan later.
lexicalBlocks.push_back(next);
+
+ // But for now, scan down the current lexical block.
cursor = cursor.getSubtreeCursor();
continue;
}
}
else if (id.tag == DW_TAG_variable)
{
+ // Found a local variable.
if (id.name && (id.location.type == ExprLoc || id.location.type == Block))
{
Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset)
@@ -864,14 +950,15 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
return true;
}
-int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff, int flStart)
+// Only looks at DW_TAG_member and DW_TAG_inheritance
+int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int baseoff, int flStart)
{
bool isunion = structid.tag == DW_TAG_union_type;
int nfields = 0;
- // cursor points to the first member
+ // cursor points to the first member of the class/struct/union.
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true))
{
if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -905,12 +992,12 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
// if it doesn't have a name, and it's a struct or union, embed it directly
DIECursor membercursor(cursor, id.type);
DWARF_InfoData memberid;
- if (membercursor.readNext(memberid))
+ if (membercursor.readNext(&memberid))
{
if (memberid.abstract_origin)
- mergeAbstractOrigin(memberid, cursor);
+ mergeAbstractOrigin(memberid, *this);
if (memberid.specification)
- mergeSpecification(memberid, cursor);
+ mergeSpecification(memberid, *this);
int cvtype = -1;
switch (memberid.tag)
@@ -1002,14 +1089,17 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
return cvtype;
}
-void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
+// Compute the array bounds of the DIE at the given 'cursor'.
+void CV2PDB::getDWARFArrayBounds(DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
{
DWARF_InfoData id;
// TODO: handle multi-dimensional arrays
if (cursor.cu)
{
- while (cursor.readNext(id, true))
+ // Don't insert these elements into the DB. We're just using it for
+ // array bounds calculation.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_subrange_type)
{
@@ -1042,6 +1132,7 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& p
upperBound = subrangeid.upper_bound;
}
+// Compute a type ID for a basic DWARF type.
int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
{
int type = 0, mode = 0, size = 0;
@@ -1104,10 +1195,13 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
return translateType(t);
}
-int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor)
+// TODO: Array wanted to be scanned twice due to DW_TAG_subrange_type being looked at
+// in the caller. See if it can be handled in a single place for clarity, simplicity & efficiency.
+// Goal: don't rescan the same DIE twice.
+int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor)
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
checkUserTypeAlloc(kMaxNameLen + 100);
codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
@@ -1243,7 +1337,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
/* Now fill this field list with the enumerators we find in DWARF. */
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_enumerator && id.has_const_value)
{
@@ -1323,18 +1417,22 @@ int CV2PDB::getTypeByDWARFPtr(byte* ptr)
{
if (ptr == nullptr)
return 0x03; // void
- std::unordered_map<byte*, int>::iterator it = mapOffsetToType.find(ptr);
- if (it == mapOffsetToType.end())
+ std::unordered_map<byte*, int>::iterator it = mapEntryPtrToTypeID.find(ptr);
+ if (it == mapEntryPtrToTypeID.end())
return 0x03; // void
return it->second;
}
+// Get the logical size of a DWARF type, starting from 'typePtr' and recursing
+// if necessary. E.g. for arrays.
int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
{
DWARF_InfoData id;
DIECursor cursor(parent, typePtr);
- if (!cursor.readNext(id))
+ // Don't allocate this into the tree since we're just interested
+ // in computing a type.
+ if (!cursor.readNext(&id))
return 0;
if(id.byte_size > 0)
@@ -1349,7 +1447,7 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
case DW_TAG_array_type:
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type);
}
default:
@@ -1362,6 +1460,8 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
// Scan the .debug_info section and allocate type IDs for each unique type and
// create a mapping to look them up by their address.
+// This is the first pass scan that builds up the DWARF tree. The second pass (createTypes)
+// emits the actual PDB symbols.
bool CV2PDB::mapTypes()
{
int typeID = nextUserType;
@@ -1370,6 +1470,9 @@ bool CV2PDB::mapTypes()
if (debug & DbgBasic)
fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
+ // Maintain the first node of each CU to ensure all of them get linked.
+ DWARF_InfoData* firstNode = nullptr;
+
// Scan each compilation unit in '.debug_info'.
while (off < img.debug_info.length)
{
@@ -1391,13 +1494,34 @@ bool CV2PDB::mapTypes()
}
DIECursor cursor(&cu, ptr);
- DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Set up link to ensure this CU links to the prior one.
+ cursor.prevNode = firstNode;
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
+ // Start scanning this CU from the beginning and *build a tree of DIE nodes*.
+ while ((node = cursor.readNext(nullptr)) != nullptr)
{
+ DWARF_InfoData& id = *node;
+
+ // Initialize the head of the DWARF DIE list the first time.
+ if (!dwarfHead) {
+ dwarfHead = node;
+ }
+
+ if (!setFirstNode) {
+ firstNode = node;
+ setFirstNode = true;
+ }
+
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Insert it into the map.
+ mapEntryPtrToEntry[node->entryPtr] = node;
+
switch (id.tag)
{
case DW_TAG_base_type:
@@ -1427,20 +1551,21 @@ bool CV2PDB::mapTypes()
case DW_TAG_shared_type:
case DW_TAG_rvalue_reference_type:
// Reserve a typeID and store it in the map for quick lookup.
- mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
+ mapEntryPtrToTypeID.insert(std::make_pair(id.entryPtr, typeID));
typeID++;
}
}
}
if (debug & DbgBasic)
- fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size());
+ fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapEntryPtrToTypeID.size());
nextDwarfType = typeID;
- assert(nextDwarfType == nextUserType + mapOffsetToType.size());
+ assert(nextDwarfType == nextUserType + mapEntryPtrToTypeID.size());
return true;
}
+// Walks the .debug_info section and builds a DIE tree.
bool CV2PDB::createTypes()
{
img.createSymbolCache();
@@ -1474,17 +1599,24 @@ bool CV2PDB::createTypes()
}
DIECursor cursor(&cu, ptr);
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Scan the DIEs in this CU, reusing the elements.
+ while (cursor.readNext(&id))
{
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Merge in related entries. This relies on the DWARF tree having been built
+ // in the first pass (mapTypes).
if (id.abstract_origin)
- mergeAbstractOrigin(id, cursor);
+ mergeAbstractOrigin(id, *this);
if (id.specification)
- mergeSpecification(id, cursor);
+ mergeSpecification(id, *this);
int cvtype = -1;
switch (id.tag)
@@ -1515,14 +1647,14 @@ bool CV2PDB::createTypes()
case DW_TAG_class_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
- cvtype = addDWARFStructure(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFStructure(id, cursor);
break;
case DW_TAG_array_type:
- cvtype = addDWARFArray(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFArray(id, cursor);
break;
case DW_TAG_enumeration_type:
- cvtype = addDWARFEnum(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFEnum(id, cursor);
break;
case DW_TAG_subroutine_type:
@@ -1556,7 +1688,17 @@ bool CV2PDB::createTypes()
mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0);
}
- addDWARFProc(id, ranges, cursor.getSubtreeCursor());
+
+ // Only add the definition, not declaration, because
+ // MSVC toolset only produces a single symbol for
+ // each function and will get confused if there are
+ // 2 PDB symbols for the same routine.
+ //
+ // TODO: Add more type info to the routine. Today we
+ // expose it as "T_NOTYPE" when we could do better.
+ if (!id.isDecl) {
+ addDWARFProc(id, ranges, cursor);
+ }
}
}
break;
@@ -1663,18 +1805,42 @@ bool CV2PDB::createTypes()
if (cvtype >= 0)
{
- assert(cvtype == typeID); typeID++;
- assert(mapOffsetToType[id.entryPtr] == cvtype);
+ assert(cvtype == typeID);
+ typeID++;
+
+ assert(mapEntryPtrToTypeID[id.entryPtr] == cvtype);
assert(typeID == nextUserType);
}
}
}
assert(typeID == nextUserType);
- assert(typeID == firstUserType + mapOffsetToType.size());
+ assert(typeID == firstUserType + mapEntryPtrToTypeID.size());
return true;
}
+void printIndent(int level) {
+ for (int i = 0; i < level; ++i) {
+ printf(" ");
+ }
+}
+
+void dumpTreeHelper(DWARF_InfoData* node, int level) {
+ for (DWARF_InfoData* n = node; n; n = n->next) {
+ const unsigned dieOffset = n->img->debug_info.sectOff(n->entryPtr);
+
+ printIndent(level);
+ printf("offset: %#x, name: \"%s\", tag: %#x, abbrev: %d\n", dieOffset, n->name, n->tag, n->code);
+
+ // Visit the children.
+ dumpTreeHelper(n->children, level + 1);
+ }
+}
+
+void CV2PDB::dumpDwarfTree() const {
+ dumpTreeHelper(dwarfHead, 0);
+}
+
bool CV2PDB::createDWARFModules()
{
if(!img.debug_info.isPresent())
@@ -1723,6 +1889,10 @@ bool CV2PDB::createDWARFModules()
if (!createTypes())
return false;
+ if (debug & DbgPrintDwarfTree) {
+ dumpDwarfTree();
+ }
+
/*
if(!iterateDWARFDebugInfo(kOpMapTypes))
return false;
@@ -1778,6 +1948,20 @@ bool CV2PDB::addDWARFPublics()
return true;
}
+// Try to lookup a DWARF_InfoData in the constructed DWARF tree given its
+// "entryPtr". I.e. its memory-mapped location in the loaded PE image buffer.
+DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
+{
+ auto it = mapEntryPtrToEntry.find(entryPtr);
+ if (it == mapEntryPtrToEntry.end()) {
+ // Could not find decl for this definition.
+ return nullptr;
+ }
+ else {
+ return it->second;
+ }
+}
+
bool CV2PDB::writeDWARFImage(const TCHAR* opath)
{
int len = sizeof(*rsds) + strlen((char*)(rsds + 1)) + 1;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 1058c60..9218f41 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -1,14 +1,12 @@
#include "readDwarf.h"
#include <assert.h>
#include <array>
+#include <memory> // unique_ptr
#include "PEImage.h"
+#include "cv2pdb.h"
#include "dwarf.h"
#include "mspdb.h"
-extern "C" {
- #include "mscvpdb.h"
-}
-
// declare hasher for pair<T1,T2>
namespace std
@@ -365,32 +363,52 @@ Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase,
return stack[0];
}
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent)
+// Find the source of an inlined function by following its 'abstract_origin'
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
{
- DIECursor specCursor(parent, id.abstract_origin);
- DWARF_InfoData idspec;
- specCursor.readNext(idspec);
- // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
+ DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+ if (!abstractOrigin) {
+ // Could not find abstract origin. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
// assert(id.tag == idspec.tag);
- if (idspec.abstract_origin)
- mergeAbstractOrigin(idspec, parent);
- if (idspec.specification)
- mergeSpecification(idspec, parent);
- id.merge(idspec);
+
+ if (abstractOrigin->abstract_origin)
+ mergeAbstractOrigin(*abstractOrigin, context);
+ if (abstractOrigin->specification)
+ mergeSpecification(*abstractOrigin, context);
+ id.merge(*abstractOrigin);
}
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent)
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
{
- DIECursor specCursor(parent, id.specification);
- DWARF_InfoData idspec;
- specCursor.readNext(idspec);
- //assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
- //assert(id.tag == idspec.tag);
- if (idspec.abstract_origin)
- mergeAbstractOrigin(idspec, parent);
- if (idspec.specification)
- mergeSpecification(idspec, parent);
- id.merge(idspec);
+ DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+ if (!idspec) {
+ // Could not find decl for this definition. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
+ // assert(id.tag == idspec.tag);
+
+ if (idspec->abstract_origin)
+ mergeAbstractOrigin(*idspec, context);
+ if (idspec->specification) {
+ mergeSpecification(*idspec, context);
+ }
+ id.merge(*idspec);
}
LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
@@ -584,7 +602,7 @@ DIECursor::DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr_)
cu = cu_;
ptr = ptr_;
level = 0;
- hasChild = false;
+ prevHasChild = false;
sibling = 0;
}
@@ -594,40 +612,41 @@ DIECursor::DIECursor(const DIECursor& parent, byte* ptr_)
ptr = ptr_;
}
+// Advance the cursor to the next sibling of the current node, using the fast
+// path when possible.
void DIECursor::gotoSibling()
{
if (sibling)
{
- // use sibling pointer, if available
+ // Fast path: use sibling pointer, if available.
ptr = sibling;
- hasChild = false;
+ prevHasChild = false;
}
- else if (hasChild)
+ else if (prevHasChild)
{
- int currLevel = level;
+ // Slow path. Skip over child nodes until we get back to the current
+ // level.
+ const int currLevel = level;
level = currLevel + 1;
- hasChild = false;
+ prevHasChild = false;
+ // Don't store these in the tree since this is just used for skipping over
+ // last swaths of nodes.
DWARF_InfoData dummy;
- // read untill we pop back to the level we were at
+
+ // read until we pop back to the level we were at
while (level > currLevel)
- readNext(dummy, true);
+ readNext(&dummy, true /* stopAtNull */);
}
}
-bool DIECursor::readSibling(DWARF_InfoData& id)
-{
- gotoSibling();
- return readNext(id, true);
-}
-
DIECursor DIECursor::getSubtreeCursor()
{
- if (hasChild)
+ if (prevHasChild)
{
DIECursor subtree = *this;
subtree.level = 0;
- subtree.hasChild = false;
+ subtree.prevHasChild = false;
return subtree;
}
else // Return invalid cursor
@@ -696,31 +715,80 @@ static byte* getPointerInSection(const PEImage &img, const SectionDescriptor &se
return peSec.byteAt(offset);
}
-bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
+// Scan the next DIE from the current CU.
+// TODO: Allocate a new element each time.
+DWARF_InfoData* DIECursor::readNext(DWARF_InfoData* entry, bool stopAtNull)
{
- id.clear();
+ std::unique_ptr<DWARF_InfoData> node;
+
+ // Controls whether we should bother establishing links between nodes.
+ // If 'entry' is provided, we are just going to be using it instead
+ // of allocating our own nodes. The callers typically reuse the same
+ // node over and over in this case, so don't bother tracking the links.
+ // Furthermore, since we clear the input node in this case, we can't rely
+ // on it from call to call.
+ // TODO: Rethink how to more cleanly express the alloc vs reuse modes of
+ // operation.
+ bool establishLinks = false;
+
+ // If an entry was passed in, use it. Else allocate one.
+ if (!entry) {
+ establishLinks = true;
+ node = std::make_unique<DWARF_InfoData>();
+ entry = node.get();
+ } else {
+ // If an entry was provided, make sure we clear it.
+ entry->clear();
+ }
- if (hasChild)
+ entry->img = img;
+
+ if (prevHasChild) {
+ // Prior element had a child, thus this element is its first child.
++level;
+ if (establishLinks) {
+ // Establish the first child.
+ prevParent->children = entry;
+ }
+ }
+
+ // Set up a convenience alias.
+ DWARF_InfoData& id = *entry;
+
+ // Find the first valid DIE.
for (;;)
{
if (level == -1)
- return false; // we were already at the end of the subtree
+ return nullptr; // we were already at the end of the subtree
if (ptr >= cu->end_ptr)
- return false; // root of the tree does not have a null terminator, but we know the length
+ return nullptr; // root of the tree does not have a null terminator, but we know the length
id.entryPtr = ptr;
entryOff = img->debug_info.sectOff(ptr);
id.code = LEB128(ptr);
+
+ // If the previously scanned node claimed to have a child, this must be a valid DIE.
+ assert(!prevHasChild || id.code);
+
+ // Check if we need to terminate the sibling chain.
if (id.code == 0)
{
- --level; // pop up one level
+ // Done with this level.
+ if (establishLinks) {
+ // Continue linking siblings from the parent node.
+ prevNode = prevParent;
+
+ // Unwind the parent one level up.
+ prevParent = prevParent->parent;
+ }
+
+ --level;
if (stopAtNull)
{
- hasChild = false;
- return false;
+ prevHasChild = false;
+ return nullptr;
}
continue; // read the next DIE
}
@@ -733,17 +801,42 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__,
id.code, entryOff);
assert(abbrev);
- return false;
+ return nullptr;
}
id.abbrev = abbrev;
id.tag = LEB128(abbrev);
id.hasChild = *abbrev++;
+ if (establishLinks) {
+ // If there was a previous node, link it to this one, thus continuing the chain.
+ if (prevNode) {
+ prevNode->next = entry;
+ }
+
+ // Establish parent of current node. If 'prevParent' is NULL, that is fine.
+ // It just means this node is a top-level node.
+ entry->parent = prevParent;
+
+ if (id.hasChild) {
+ // This node has children! Establish it as the new parent for future nodes.
+ prevParent = entry;
+
+ // Clear the last DIE because the next scanned node will form the *start*
+ // of a new linked list comprising the children of the current node.
+ prevNode = nullptr;
+ }
+ else {
+ // Ensure the next node appends itself to this one.
+ prevNode = entry;
+ }
+ }
+
if (debug & DbgDwarfAttrRead)
fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__,
entryOff, level, id.tag, id.code);
+ // Read all the attribute data for this DIE.
int attr, form;
for (;;)
{
@@ -809,7 +902,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = RDref(ptr); break;
case DW_FORM_loclistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_loclists, cu->loclist_base); break;
case DW_FORM_rnglistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_rnglists, cu->rnglist_base); break;
- default: assert(false && "Unsupported DWARF attribute form"); return false;
+ default: assert(false && "Unsupported DWARF attribute form"); return nullptr;
}
switch (attr)
@@ -852,6 +945,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
case DW_AT_type: assert(a.type == Ref); id.type = a.ref; break;
case DW_AT_inline: assert(a.type == Const); id.inlined = a.cons; break;
case DW_AT_external: assert(a.type == Flag); id.external = a.flag; break;
+ case DW_AT_declaration: assert(a.type == Flag); id.isDecl = a.flag; break;
case DW_AT_upper_bound:
assert(a.type == Const || a.type == Ref || a.type == ExprLoc || a.type == Block);
if (a.type == Const) // TODO: other types not supported yet
@@ -912,10 +1006,12 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
}
}
- hasChild = id.hasChild != 0;
+ prevHasChild = id.hasChild != 0;
sibling = id.sibling;
- return true;
+ // Transfer ownership of 'node' to caller, if we allocated one.
+ node.release();
+ return entry;
}
byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode)
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 56e89a3..06779c8 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -11,6 +11,7 @@
typedef unsigned char byte;
class PEImage;
class DIECursor;
+class CV2PDB;
struct SectionDescriptor;
enum DebugLevel : unsigned {
@@ -24,7 +25,8 @@ enum DebugLevel : unsigned {
DbgDwarfAttrRead = 0x400,
DbgDwarfLocLists = 0x800,
DbgDwarfRangeLists = 0x1000,
- DbgDwarfLines = 0x2000
+ DbgDwarfLines = 0x2000,
+ DbgPrintDwarfTree = 0x4000,
};
DEFINE_ENUM_FLAG_OPERATORS(DebugLevel);
@@ -183,7 +185,10 @@ struct DWARF_FileName
// In-memory representation of a DIE (Debugging Info Entry).
struct DWARF_InfoData
{
- // Pointer into the mapped image section where this DIE is located.
+ // The PEImage for this entry.
+ PEImage* img = nullptr;
+
+ // Pointer into the memory-mapped image section where this DIE is located.
byte* entryPtr;
// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
@@ -197,6 +202,18 @@ struct DWARF_InfoData
// Does this DIE have children?
int hasChild;
+ // Parent of this DIE, or NULL if top-level element.
+ DWARF_InfoData* parent = nullptr;
+
+ // Pointer to sibling in the tree. Not to be confused with 'sibling' below,
+ // which is a raw pointer to the DIE in the mapped/loaded image section.
+ // NULL if no more elements.
+ DWARF_InfoData* next = nullptr;
+
+ // Pointer to first child. This forms a linked list with the 'next' pointer.
+ // NULL if no children.
+ DWARF_InfoData* children = nullptr;
+
const char* name;
const char* linkage_name;
const char* dir;
@@ -213,10 +230,14 @@ struct DWARF_InfoData
// Pointer to the DW_AT_type DIE describing the type of this DIE.
byte* type;
byte* containing_type;
+
+ // Pointer to the DIE representing the declaration for this element if it
+ // is a definition. E.g. function decl for its definition/body.
byte* specification;
byte* abstract_origin;
unsigned long inlined;
- bool external;
+ bool external = false; // is this subroutine visible outside its compilation unit?
+ bool isDecl = false; // is this a declaration?
DWARF_Attribute location;
DWARF_Attribute member_location;
DWARF_Attribute frame_base;
@@ -236,6 +257,7 @@ struct DWARF_InfoData
abbrev = 0;
tag = 0;
hasChild = 0;
+ parent = nullptr;
name = 0;
linkage_name = 0;
@@ -252,7 +274,8 @@ struct DWARF_InfoData
specification = 0;
abstract_origin = 0;
inlined = 0;
- external = 0;
+ external = false;
+ isDecl = false;
member_location.type = Invalid;
location.type = Invalid;
frame_base.type = Invalid;
@@ -508,19 +531,31 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t;
// as either an absolute value, a register, or a register-relative address.
Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent);
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent);
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context);
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
// Debug Information Entry Cursor
class DIECursor
{
+ // TODO: make these private.
public:
- DWARF_CompilationUnitInfo* cu;
- byte* ptr;
+ DWARF_CompilationUnitInfo* cu = nullptr; // the CU we are reading from.
+ byte* ptr = nullptr; // the current mapped location we are reading from.
unsigned int entryOff;
- int level;
- bool hasChild; // indicates whether the last read DIE has children
- byte* sibling;
+ int level; // the current level of the tree in the scan.
+ bool prevHasChild = false; // indicates whether the last read DIE has children
+
+ // last DIE scanned. Used to link subsequent nodes in a list.
+ DWARF_InfoData* prevNode = nullptr;
+
+ // The last parent node to which all subsequent nodes should be assigned.
+ // Initially, NULL, but as we encounter a node with children, we establish
+ // it as the new "parent" for future nodes, and reset it once we reach
+ // a top level node.
+ DWARF_InfoData* prevParent = nullptr;
+
+ // The mapped address of the sibling of the last scanned node, if any.
+ byte* sibling = nullptr;
static PEImage *img;
static abbrevMap_t abbrevMap;
@@ -541,17 +576,13 @@ public:
// Goto next sibling DIE. If the last read DIE had any children, they will be skipped over.
void gotoSibling();
- // Reads next sibling DIE. If the last read DIE had any children, they will be skipped over.
- // Returns 'false' upon reaching the last sibling on the current level.
- bool readSibling(DWARF_InfoData& id);
-
// Returns cursor that will enumerate children of the last read DIE.
DIECursor getSubtreeCursor();
- // Reads the next DIE in physical order, returns 'true' if succeeds.
+ // Reads the next DIE in physical order, returns non-NULL if succeeds.
// If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level).
// Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created.
- bool readNext(DWARF_InfoData& id, bool stopAtNull = false);
+ DWARF_InfoData* readNext(DWARF_InfoData* entry, bool stopAtNull = false);
// Read an address from p according to the ambient pointer size.
uint64_t RDAddr(byte* &p) const