From 62f975d2b4030d10a50e140f44f39ede418bcec4 Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 20:37:01 -0500
Subject: DWARF tree for fully-qualified name construction

The Windows debuggers expect PDB symbol names to be fully qualified.
I.e., if a class Foo has a constructor, its name should be emitted as
`Foo::Foo`, not simply `Foo` as is the case today. Linux debuggers like
GDB dynamically reconstruct the symbol tree at runtime each time a
program is debugged. Windows debuggers on the other hand do not, and
expect the name to be fully qualified from the outset. Failing this, the
constructor function `Foo` would have the same name as the class `Foo`
in the PDB, and WinDbg will get confused about what to dump (e.g. using
`dt Foo`) and arbitrarily pick the largest item, which might be the
constructor. Therefore you end up dumping the wrong thing and being
completely unable to inspect the contents of a `Foo` object.

This commit aims to fix that by introducing a DWARF tree during the
conversion process which allows us to efficiently reconstruct such fully
qualified names during the conversion.

A note about DWARF: the DWARF format does not explicitly record the
parent of any given DIE record. It is instead implicit in how the
records are layed out. Any record may have a "has children" flag, and if
it does, then the records following it are its children, terminated by a
special NULL record, popping back up one level of the tree.

The DIECursor already recognized this structure but did not capture it
in memory for later use.

In order to construct fully-qualified names for functions, enums,
classes, etc. (i.e. taking into account namespaces, nesting, etc), we
need a way to efficienctly lookup a node's parent. Thus the DWARF tree
was born.

At a high level, we take advantage of the fact that the DWARF sections
were already scanned in two passes. We hook into the first pass (where
the typeIDs were being reserved) and build the DWARF tree.

Then, in the second pass (where the CV symbols get emitted), we look up
the tree to figure out the correct fully-qualified symbol names.

NOTE: The first phase of this work focuses on subroutines only. Later
work will enable support for structs/classes/enums.

On the subroutine front, I also added a flag to capture whether a DIE is
a "declaration" or definition (based on the DW_AT_declaration
attribute). This is needed to consolidate function decl+defn into one
PDB symbol, as otherwise WinDbg will get confused. This also matches
what the MSVC toolset produces.

A few other related additions:

- Added helper to format a fully qualified function name by looking up
  the tree added in this commit.
- Added helper to print the DWARF tree for debugging purposes and a flag
  to control it.
---
 src/PEImage.h     |  10 ++-
 src/cv2pdb.cpp    |   3 +
 src/cv2pdb.h      |  21 ++++-
 src/dwarf2pdb.cpp | 252 ++++++++++++++++++++++++++++++++++++++++++++++--------
 src/readDwarf.cpp | 202 +++++++++++++++++++++++++++++++------------
 src/readDwarf.h   |  65 ++++++++++----
 6 files changed, 443 insertions(+), 110 deletions(-)
diff --git a/src/PEImage.h b/src/PEImage.h
index 3ae00bd..a809523 100644
--- a/src/PEImage.h
+++ b/src/PEImage.h
@@ -178,11 +178,16 @@ private:
 
     template<typename SYM> const char* t_findSectionSymbolName(int s) const;
 
+	// File handle to PE image.
 	int fd;
+
+	// Pointer to in-memory buffer containing loaded PE image.
 	void* dump_base;
+
+	// Size of `dump_base` in bytes.
 	int dump_total_len;
 
-	// codeview
+	// codeview fields
 	IMAGE_DOS_HEADER *dos;
 	IMAGE_NT_HEADERS32* hdr32;
 	IMAGE_NT_HEADERS64* hdr64;
@@ -200,7 +205,8 @@ private:
 	std::unordered_map<std::string, SymbolInfo> symbolCache;
 
 public:
-	//dwarf
+	// dwarf fields
+	// List of DWARF section descriptors.
 #define EXPANDSEC(name) PESection name;
 	SECTION_LIST()
 #undef EXPANDSEC
diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp
index 1ba7a94..704da4c 100644
--- a/src/cv2pdb.cpp
+++ b/src/cv2pdb.cpp
@@ -897,6 +897,7 @@ void CV2PDB::checkGlobalTypeAlloc(int size, int add)
 	}
 }
 
+// Get the CodeView type descriptor for the given type ID.
 // CV-only. Returns NULL for DWARF-based images.
 const codeview_type* CV2PDB::getTypeData(int type)
 {
@@ -913,6 +914,7 @@ const codeview_type* CV2PDB::getTypeData(int type)
 	return (codeview_type*)(typeData + offset[type - BASE_USER_TYPE]);
 }
 
+// CV-only. Never called for DWARF.
 const codeview_type* CV2PDB::getUserTypeData(int type)
 {
 	type -= BASE_USER_TYPE + globalTypeHeader->cTypes;
@@ -2116,6 +2118,7 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation)
 	return typedefType;
 }
 
+// CV-only.
 void CV2PDB::appendTypedefs()
 {
 	if(Dversion == 0)
diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index 654470b..e5e8144 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -169,17 +169,23 @@ public:
 	bool addDWARFLines();
 	bool addDWARFPublics();
 	bool writeDWARFImage(const TCHAR* opath);
+	DWARF_InfoData* findEntryByPtr(byte* entryPtr) const;
+
+	// Helper to just print the DWARF tree we've built for debugging purposes.
+	void dumpDwarfTree() const;
 
 	bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi);
 	bool addDWARFProc(DWARF_InfoData& id, const std::vector<RangeEntry> &ranges, DIECursor cursor);
+	void formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const;
+
 	int  addDWARFStructure(DWARF_InfoData& id, DIECursor cursor);
-	int  addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int off, int flStart);
-	int  addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor);
+	int  addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int off, int flStart);
+	int  addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor);
 	int  addDWARFBasicType(const char*name, int encoding, int byte_size);
 	int  addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor);
 	int  getTypeByDWARFPtr(byte* ptr);
 	int  getDWARFTypeSize(const DIECursor& parent, byte* ptr);
-	void getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor,
+	void getDWARFArrayBounds(DIECursor cursor,
 		int& basetype, int& lowerBound, int& upperBound);
 	void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent,
 		int& basetype, int& lowerBound, int& upperBound);
@@ -278,7 +284,14 @@ public:
 
 	// DWARF
 	int codeSegOff;
-	std::unordered_map<byte*, int> mapOffsetToType;
+
+	// Lookup table for type IDs based on the DWARF_InfoData::entryPtr
+	std::unordered_map<byte*, int> mapEntryPtrToTypeID;
+	// Lookup table for entries based on the DWARF_InfoData::entryPtr
+	std::unordered_map<byte*, DWARF_InfoData*> mapEntryPtrToEntry;
+
+	// Head of list of DWARF DIE nodes.
+	DWARF_InfoData* dwarfHead = nullptr;
 
 	// Default lower bound for the current compilation unit. This depends on
 	// the language of the current unit.
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 33c8af6..763be85 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -695,6 +695,74 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo)
 	cbUdtSymbols += len;
 }
 
+// Helper to format a fully qualified proc name like 'some_ns::Foo::Foo' since
+// for a Foo constructor in a Foo class in a namespace called "some_ns".
+// PDBs require fully qualified names in their symbols.
+// TODO: better error handling for out of space.
+void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const {
+	if (proc->specification) {
+		// If the proc has a "specification", i.e. a declaration, use it instead
+		// of the definition, as it has a proper hierarchy connected to it
+		// which will give us a proper fully-qualified name like Foo::Foo
+		// instead of just Foo.
+		const DWARF_InfoData* entry = findEntryByPtr(proc->specification);
+		if (entry) {
+			proc = entry;
+		}
+	}
+	DWARF_InfoData* parent = proc->parent;
+	std::vector<const DWARF_InfoData*> segments;
+	segments.push_back(proc);
+
+	// Accumulate all the valid parent scopes so that we can reverse them for
+	// formatting.
+	while (parent) {
+		switch (parent->tag) {
+		// TODO: are there any other kinds of valid parents?
+		case DW_TAG_class_type:
+		case DW_TAG_structure_type:
+		case DW_TAG_namespace:			
+			segments.push_back(parent);
+			break;
+		default:
+			break;
+		}
+		parent = parent->parent;
+	}
+
+	int remain = cbBuf;
+	char* p = buf;
+
+	// Format the parents in reverse order with :: operator in between.
+	for (int i = segments.size() - 1; i >= 0; --i) {
+		const int nameLen = strlen(segments[i]->name);
+		if (remain < nameLen) {
+			fprintf(stderr, "unable to fit full proc name: %s\n", proc->name);
+			return;
+		}
+
+		memcpy(p, segments[i]->name, nameLen);
+
+		p += nameLen;
+		remain -= nameLen;
+
+		if (i > 0) {
+			// Append :: separator
+			if (remain < 2) {
+				fprintf(stderr, "unable to fit full proc name (:: separator): %s\n", proc->name);
+				return;
+			}
+			*p++ = ':';
+			*p++ = ':';
+			remain -= 2;
+		}
+	}
+	
+	if (remain > 0) {
+		*p = 0;  // NUL terminate.
+	}
+}
+
 bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
 {
 	unsigned int pclo = ranges.front().pclo - codeSegOff;
@@ -723,8 +791,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 	cvs->proc_v2.flags    = 0;
 
 //    printf("GlobalPROC %s\n", procid.name);
-
-	len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, procid.name);
+	char namebuf[kMaxNameLen] = {};
+	formatFullyQualifiedProcName(&procid, namebuf, sizeof namebuf);
+	len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
 	len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
 	for (; len & (align-1); len++)
 		udtSymbols[cbUdtSymbols + len] = 0xf4 - (len & 3);
@@ -762,8 +831,13 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 		DWARF_InfoData id;
 		int off = 8;
 
+		// Save off the cursor to the start of the proc.
 		DIECursor prev = cursor;
-		while (cursor.readNext(id, true))
+
+		// First, collect all the formal parameters of the proc.
+		// Don't worry about storing these in the tree as we're not going to need
+		// to generate fully-qualified names like we would for functions/classes.
+		while (cursor.readNext(&id, true /* stopAtNull */))
 		{
 			if (id.tag == DW_TAG_formal_parameter && id.name)
 			{
@@ -778,7 +852,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 		}
 		appendEndArg();
 
+		//  Now, collect all the lexical blocks and their stack variables.
 		std::vector<DIECursor> lexicalBlocks;
+
+		// Start from the proc base, and push all nested lexical blocks as you
+		// encounter them.
 		lexicalBlocks.push_back(prev);
 
 		while (!lexicalBlocks.empty())
@@ -786,7 +864,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 			cursor = lexicalBlocks.back();
 			lexicalBlocks.pop_back();
 
-			while (cursor.readNext(id))
+			while (cursor.readNext(&id))
 			{
 				if (id.tag == DW_TAG_lexical_block)
 				{
@@ -813,15 +891,23 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 					{
 						appendLexicalBlock(id, pclo + codeSegOff);
 						DIECursor next = cursor;
+
+						// Compute the sibling node of this lexical block.
 						next.gotoSibling();
 						assert(lexicalBlocks.empty() || next.ptr <= lexicalBlocks.back().ptr);
+
+						// Append the next lexical block to the list of blocks
+						// to scan later.
 						lexicalBlocks.push_back(next);
+
+						// But for now, scan down the current lexical block.
 						cursor = cursor.getSubtreeCursor();
 						continue;
 					}
 				}
 				else if (id.tag == DW_TAG_variable)
 				{
+					// Found a local variable.
 					if (id.name && (id.location.type == ExprLoc || id.location.type == Block))
 					{
 						Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset)
@@ -864,14 +950,15 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 	return true;
 }
 
-int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff, int flStart)
+// Only looks at DW_TAG_member and DW_TAG_inheritance
+int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int baseoff, int flStart)
 {
 	bool isunion = structid.tag == DW_TAG_union_type;
 	int nfields = 0;
 
-	// cursor points to the first member
+	// cursor points to the first member of the class/struct/union.
 	DWARF_InfoData id;
-	while (cursor.readNext(id, true))
+	while (cursor.readNext(&id, true))
 	{
 		if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
 			break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -905,12 +992,12 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
 					// if it doesn't have a name, and it's a struct or union, embed it directly
 					DIECursor membercursor(cursor, id.type);
 					DWARF_InfoData memberid;
-					if (membercursor.readNext(memberid))
+					if (membercursor.readNext(&memberid))
 					{
 						if (memberid.abstract_origin)
-							mergeAbstractOrigin(memberid, cursor);
+							mergeAbstractOrigin(memberid, *this);
 						if (memberid.specification)
-							mergeSpecification(memberid, cursor);
+							mergeSpecification(memberid, *this);
 
 						int cvtype = -1;
 						switch (memberid.tag)
@@ -1002,14 +1089,17 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
 	return cvtype;
 }
 
-void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
+// Compute the array bounds of the DIE at the given 'cursor'.
+void CV2PDB::getDWARFArrayBounds(DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
 {
 	DWARF_InfoData id;
 
 	// TODO: handle multi-dimensional arrays
 	if (cursor.cu)
 	{
-		while (cursor.readNext(id, true))
+		// Don't insert these elements into the DB. We're just using it for
+		// array bounds calculation.
+		while (cursor.readNext(&id, true /* stopAtNull */))
 		{
 			if (id.tag == DW_TAG_subrange_type)
 			{
@@ -1042,6 +1132,7 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& p
 	upperBound = subrangeid.upper_bound;
 }
 
+// Compute a type ID for a basic DWARF type.
 int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
 {
 	int type = 0, mode = 0, size = 0;
@@ -1104,10 +1195,13 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
 	return translateType(t);
 }
 
-int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor)
+// TODO: Array wanted to be scanned twice due to DW_TAG_subrange_type being looked at
+// in the caller. See if it can be handled in a single place for clarity, simplicity & efficiency.
+// Goal: don't rescan the same DIE twice.
+int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor)
 {
 	int basetype, upperBound, lowerBound;
-	getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound);
+	getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
 
 	checkUserTypeAlloc(kMaxNameLen + 100);
 	codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
@@ -1243,7 +1337,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 
 	/* Now fill this field list with the enumerators we find in DWARF. */
 	DWARF_InfoData id;
-	while (cursor.readNext(id, true))
+	while (cursor.readNext(&id, true /* stopAtNull */))
 	{
 		if (id.tag == DW_TAG_enumerator && id.has_const_value)
 		{
@@ -1323,18 +1417,22 @@ int CV2PDB::getTypeByDWARFPtr(byte* ptr)
 {
 	if (ptr == nullptr)
 		return 0x03; // void
-	std::unordered_map<byte*, int>::iterator it = mapOffsetToType.find(ptr);
-	if (it == mapOffsetToType.end())
+	std::unordered_map<byte*, int>::iterator it = mapEntryPtrToTypeID.find(ptr);
+	if (it == mapEntryPtrToTypeID.end())
 		return 0x03; // void
 	return it->second;
 }
 
+// Get the logical size of a DWARF type, starting from 'typePtr' and recursing
+// if necessary. E.g. for arrays.
 int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 {
 	DWARF_InfoData id;
 	DIECursor cursor(parent, typePtr);
 
-	if (!cursor.readNext(id))
+	// Don't allocate this into the tree since we're just interested
+	// in computing a type.
+	if (!cursor.readNext(&id))
 		return 0;
 
 	if(id.byte_size > 0)
@@ -1349,7 +1447,7 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 		case DW_TAG_array_type:
 		{
 			int basetype, upperBound, lowerBound;
-			getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound);
+			getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
 			return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type);
 		}
 		default:
@@ -1362,6 +1460,8 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 
 // Scan the .debug_info section and allocate type IDs for each unique type and
 // create a mapping to look them up by their address.
+// This is the first pass scan that builds up the DWARF tree. The second pass (createTypes)
+// emits the actual PDB symbols.
 bool CV2PDB::mapTypes()
 {
 	int typeID = nextUserType;
@@ -1370,6 +1470,9 @@ bool CV2PDB::mapTypes()
 	if (debug & DbgBasic)
 		fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
 
+	// Maintain the first node of each CU to ensure all of them get linked.
+	DWARF_InfoData* firstNode = nullptr;
+
 	// Scan each compilation unit in '.debug_info'.
 	while (off < img.debug_info.length)
 	{
@@ -1391,13 +1494,34 @@ bool CV2PDB::mapTypes()
 		}
 
 		DIECursor cursor(&cu, ptr);
-		DWARF_InfoData id;
-		while (cursor.readNext(id))
+
+		// Set up link to ensure this CU links to the prior one.
+		cursor.prevNode = firstNode;
+
+		DWARF_InfoData* node = nullptr;
+		bool setFirstNode = false;
+		// Start scanning this CU from the beginning and *build a tree of DIE nodes*.
+		while ((node = cursor.readNext(nullptr)) != nullptr)
 		{
+			DWARF_InfoData& id = *node;
+
+			// Initialize the head of the DWARF DIE list the first time.
+			if (!dwarfHead) {
+				dwarfHead = node;
+			}
+
+			if (!setFirstNode) {
+				firstNode = node;
+				setFirstNode = true;
+			}
+
 			if (debug & DbgDwarfTagRead)
 				fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
 						cursor.entryOff, cursor.level, id.code, id.tag);
 
+			// Insert it into the map.
+			mapEntryPtrToEntry[node->entryPtr] = node;
+
 			switch (id.tag)
 			{
 				case DW_TAG_base_type:
@@ -1427,20 +1551,21 @@ bool CV2PDB::mapTypes()
 				case DW_TAG_shared_type:
 				case DW_TAG_rvalue_reference_type:
 					// Reserve a typeID and store it in the map for quick lookup.
-					mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
+					mapEntryPtrToTypeID.insert(std::make_pair(id.entryPtr, typeID));
 					typeID++;
 			}
 		}
 	}
 
 	if (debug & DbgBasic)
-		fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size());
+		fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapEntryPtrToTypeID.size());
 
 	nextDwarfType = typeID;
-	assert(nextDwarfType == nextUserType + mapOffsetToType.size());
+	assert(nextDwarfType == nextUserType + mapEntryPtrToTypeID.size());
 	return true;
 }
 
+// Walks the .debug_info section and builds a DIE tree.
 bool CV2PDB::createTypes()
 {
 	img.createSymbolCache();
@@ -1474,17 +1599,24 @@ bool CV2PDB::createTypes()
 		}
 
 		DIECursor cursor(&cu, ptr);
+
+		DWARF_InfoData* node = nullptr;
+		bool setFirstNode = false;
 		DWARF_InfoData id;
-		while (cursor.readNext(id))
+
+		// Scan the DIEs in this CU, reusing the elements.
+		while (cursor.readNext(&id))
 		{
 			if (debug & DbgDwarfTagRead)
 				fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
 						cursor.entryOff, cursor.level, id.code, id.tag);
 
+			// Merge in related entries. This relies on the DWARF tree having been built
+			// in the first pass (mapTypes).
 			if (id.abstract_origin)
-				mergeAbstractOrigin(id, cursor);
+				mergeAbstractOrigin(id, *this);
 			if (id.specification)
-				mergeSpecification(id, cursor);
+				mergeSpecification(id, *this);
 
 			int cvtype = -1;
 			switch (id.tag)
@@ -1515,14 +1647,14 @@ bool CV2PDB::createTypes()
 			case DW_TAG_class_type:
 			case DW_TAG_structure_type:
 			case DW_TAG_union_type:
-				cvtype = addDWARFStructure(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFStructure(id, cursor);
 				break;
 			case DW_TAG_array_type:
-				cvtype = addDWARFArray(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFArray(id, cursor);
 				break;
 
 			case DW_TAG_enumeration_type:
-				cvtype = addDWARFEnum(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFEnum(id, cursor);
 				break;
 
 			case DW_TAG_subroutine_type:
@@ -1556,7 +1688,17 @@ bool CV2PDB::createTypes()
 
 							mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0);
 						}
-						addDWARFProc(id, ranges, cursor.getSubtreeCursor());
+
+						// Only add the definition, not declaration, because
+						// MSVC toolset only produces a single symbol for
+						// each function and will get confused if there are
+						// 2 PDB symbols for the same routine.
+						//
+						// TODO: Add more type info to the routine. Today we
+						// expose it as "T_NOTYPE" when we could do better.
+						if (!id.isDecl) {
+							addDWARFProc(id, ranges, cursor);
+						}
 					}
 				}
 				break;
@@ -1663,18 +1805,42 @@ bool CV2PDB::createTypes()
 
 			if (cvtype >= 0)
 			{
-				assert(cvtype == typeID); typeID++;
-				assert(mapOffsetToType[id.entryPtr] == cvtype);
+				assert(cvtype == typeID); 
+				typeID++;
+
+				assert(mapEntryPtrToTypeID[id.entryPtr] == cvtype);
 				assert(typeID == nextUserType);
 			}
 		}
 	}
 
 	assert(typeID == nextUserType);
-	assert(typeID == firstUserType + mapOffsetToType.size());
+	assert(typeID == firstUserType + mapEntryPtrToTypeID.size());
 	return true;
 }
 
+void printIndent(int level) {
+	for (int i = 0; i < level; ++i) {
+		printf("  ");
+	}
+}
+
+void dumpTreeHelper(DWARF_InfoData* node, int level) {
+	for (DWARF_InfoData* n = node; n; n = n->next) {
+		const unsigned dieOffset = n->img->debug_info.sectOff(n->entryPtr);
+
+		printIndent(level);
+		printf("offset: %#x, name: \"%s\", tag: %#x, abbrev: %d\n", dieOffset, n->name, n->tag, n->code);
+
+		// Visit the children.
+		dumpTreeHelper(n->children, level + 1);
+	}
+}
+
+void CV2PDB::dumpDwarfTree() const {
+	dumpTreeHelper(dwarfHead, 0);	
+}
+
 bool CV2PDB::createDWARFModules()
 {
 	if(!img.debug_info.isPresent())
@@ -1723,6 +1889,10 @@ bool CV2PDB::createDWARFModules()
 	if (!createTypes())
 		return false;
 
+	if (debug & DbgPrintDwarfTree) {
+		dumpDwarfTree();
+	}
+
 	/*
 	if(!iterateDWARFDebugInfo(kOpMapTypes))
 		return false;
@@ -1778,6 +1948,20 @@ bool CV2PDB::addDWARFPublics()
 	return true;
 }
 
+// Try to lookup a DWARF_InfoData in the constructed DWARF tree given its
+// "entryPtr". I.e. its memory-mapped location in the loaded PE image buffer.
+DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
+{
+	auto it = mapEntryPtrToEntry.find(entryPtr);
+	if (it == mapEntryPtrToEntry.end()) {
+		// Could not find decl for this definition.
+		return nullptr;
+	}
+	else {
+		return it->second;
+	}
+}
+
 bool CV2PDB::writeDWARFImage(const TCHAR* opath)
 {
 	int len = sizeof(*rsds) + strlen((char*)(rsds + 1)) + 1;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 1058c60..9218f41 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -1,14 +1,12 @@
 #include "readDwarf.h"
 #include <assert.h>
 #include <array>
+#include <memory> // unique_ptr
 
 #include "PEImage.h"
+#include "cv2pdb.h"
 #include "dwarf.h"
 #include "mspdb.h"
-extern "C" {
-	#include "mscvpdb.h"
-}
-
 
 // declare hasher for pair<T1,T2>
 namespace std
@@ -365,32 +363,52 @@ Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase,
 	return stack[0];
 }
 
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent)
+// Find the source of an inlined function by following its 'abstract_origin' 
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
 {
-	DIECursor specCursor(parent, id.abstract_origin);
-	DWARF_InfoData idspec;
-	specCursor.readNext(idspec);
-	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
+	DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+	if (!abstractOrigin) {
+		// Could not find abstract origin. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
 	// assert(id.tag == idspec.tag);
-	if (idspec.abstract_origin)
-		mergeAbstractOrigin(idspec, parent);
-	if (idspec.specification)
-		mergeSpecification(idspec, parent);
-	id.merge(idspec);
+
+	if (abstractOrigin->abstract_origin)
+		mergeAbstractOrigin(*abstractOrigin, context);
+	if (abstractOrigin->specification)
+		mergeSpecification(*abstractOrigin, context);
+	id.merge(*abstractOrigin);
 }
 
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent)
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
 {
-	DIECursor specCursor(parent, id.specification);
-	DWARF_InfoData idspec;
-	specCursor.readNext(idspec);
-	//assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
-	//assert(id.tag == idspec.tag);
-	if (idspec.abstract_origin)
-		mergeAbstractOrigin(idspec, parent);
-	if (idspec.specification)
-		mergeSpecification(idspec, parent);
-	id.merge(idspec);
+	DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+	if (!idspec) {
+		// Could not find decl for this definition. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
+	// assert(id.tag == idspec.tag);
+
+	if (idspec->abstract_origin)
+		mergeAbstractOrigin(*idspec, context);
+	if (idspec->specification) {
+		mergeSpecification(*idspec, context);
+	}
+	id.merge(*idspec);
 }
 
 LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
@@ -584,7 +602,7 @@ DIECursor::DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr_)
 	cu = cu_;
 	ptr = ptr_;
 	level = 0;
-	hasChild = false;
+	prevHasChild = false;
 	sibling = 0;
 }
 
@@ -594,40 +612,41 @@ DIECursor::DIECursor(const DIECursor& parent, byte* ptr_)
 	ptr = ptr_;
 }
 
+// Advance the cursor to the next sibling of the current node, using the fast
+// path when possible.
 void DIECursor::gotoSibling()
 {
 	if (sibling)
 	{
-		// use sibling pointer, if available
+		// Fast path: use sibling pointer, if available.
 		ptr = sibling;
-		hasChild = false;
+		prevHasChild = false;
 	}
-	else if (hasChild)
+	else if (prevHasChild)
 	{
-		int currLevel = level;
+		// Slow path. Skip over child nodes until we get back to the current
+		// level.
+		const int currLevel = level;
 		level = currLevel + 1;
-		hasChild = false;
+		prevHasChild = false;
 
+		// Don't store these in the tree since this is just used for skipping over
+		// last swaths of nodes.
 		DWARF_InfoData dummy;
-		// read untill we pop back to the level we were at
+
+		// read until we pop back to the level we were at
 		while (level > currLevel)
-			readNext(dummy, true);
+			readNext(&dummy, true /* stopAtNull */);
 	}
 }
 
-bool DIECursor::readSibling(DWARF_InfoData& id)
-{
-    gotoSibling();
-	return readNext(id, true);
-}
-
 DIECursor DIECursor::getSubtreeCursor()
 {
-	if (hasChild)
+	if (prevHasChild)
 	{
 		DIECursor subtree = *this;
 		subtree.level = 0;
-		subtree.hasChild = false;
+		subtree.prevHasChild = false;
 		return subtree;
 	}
 	else // Return invalid cursor
@@ -696,31 +715,80 @@ static byte* getPointerInSection(const PEImage &img, const SectionDescriptor &se
 	return peSec.byteAt(offset);
 }
 
-bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
+// Scan the next DIE from the current CU.
+// TODO: Allocate a new element each time.
+DWARF_InfoData* DIECursor::readNext(DWARF_InfoData* entry, bool stopAtNull)
 {
-	id.clear();
+	std::unique_ptr<DWARF_InfoData> node;
+
+	// Controls whether we should bother establishing links between nodes.
+	// If 'entry' is provided, we are just going to be using it instead
+	// of allocating our own nodes. The callers typically reuse the same
+	// node over and over in this case, so don't bother tracking the links.
+	// Furthermore, since we clear the input node in this case, we can't rely
+	// on it from call to call.
+	// TODO: Rethink how to more cleanly express the alloc vs reuse modes of
+	// operation.
+	bool establishLinks = false;
+
+	// If an entry was passed in, use it. Else allocate one.
+	if (!entry) {
+		establishLinks = true;
+		node = std::make_unique<DWARF_InfoData>();
+		entry = node.get();
+	} else {
+		// If an entry was provided, make sure we clear it.
+		entry->clear();
+	}
 
-	if (hasChild)
+	entry->img = img;
+	
+	if (prevHasChild) {
+		// Prior element had a child, thus this element is its first child.
 		++level;
 
+		if (establishLinks) {
+			// Establish the first child.
+			prevParent->children = entry;
+		}
+	}
+
+	// Set up a convenience alias.
+	DWARF_InfoData& id = *entry;
+
+	// Find the first valid DIE.
 	for (;;)
 	{
 		if (level == -1)
-			return false; // we were already at the end of the subtree
+			return nullptr; // we were already at the end of the subtree
 
 		if (ptr >= cu->end_ptr)
-			return false; // root of the tree does not have a null terminator, but we know the length
+			return nullptr; // root of the tree does not have a null terminator, but we know the length
 
 		id.entryPtr = ptr;
 		entryOff = img->debug_info.sectOff(ptr);
 		id.code = LEB128(ptr);
+
+		// If the previously scanned node claimed to have a child, this must be a valid DIE.
+		assert(!prevHasChild || id.code);
+
+		// Check if we need to terminate the sibling chain.
 		if (id.code == 0)
 		{
-			--level; // pop up one level
+			// Done with this level.
+			if (establishLinks) {
+				// Continue linking siblings from the parent node.
+				prevNode = prevParent;
+
+				// Unwind the parent one level up.
+				prevParent = prevParent->parent;
+			}
+
+			--level;
 			if (stopAtNull)
 			{
-				hasChild = false;
-				return false;
+				prevHasChild = false;
+				return nullptr;
 			}
 			continue; // read the next DIE
 		}
@@ -733,17 +801,42 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 		fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__,
 				id.code, entryOff);
 		assert(abbrev);
-		return false;
+		return nullptr;
 	}
 
 	id.abbrev = abbrev;
 	id.tag = LEB128(abbrev);
 	id.hasChild = *abbrev++;
 
+	if (establishLinks) {
+		// If there was a previous node, link it to this one, thus continuing the chain.
+		if (prevNode) {
+			prevNode->next = entry;
+		}
+
+		// Establish parent of current node. If 'prevParent' is NULL, that is fine.
+		// It just means this node is a top-level node.
+		entry->parent = prevParent;
+
+		if (id.hasChild) {
+			// This node has children! Establish it as the new parent for future nodes.		
+			prevParent = entry;
+
+			// Clear the last DIE because the next scanned node will form the *start*
+			// of a new linked list comprising the children of the current node.
+			prevNode = nullptr;
+		}
+		else {
+			// Ensure the next node appends itself to this one.
+			prevNode = entry;
+		}
+	}
+
 	if (debug & DbgDwarfAttrRead)
 		fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__,
 				entryOff, level, id.tag, id.code);
 
+	// Read all the attribute data for this DIE.
 	int attr, form;
 	for (;;)
 	{
@@ -809,7 +902,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 			case DW_FORM_sec_offset:     a.type = SecOffset; a.sec_offset = RDref(ptr); break;
 			case DW_FORM_loclistx:       a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_loclists, cu->loclist_base); break;
 			case DW_FORM_rnglistx:       a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_rnglists, cu->rnglist_base); break;
-			default: assert(false && "Unsupported DWARF attribute form"); return false;
+			default: assert(false && "Unsupported DWARF attribute form"); return nullptr;
 		}
 
 		switch (attr)
@@ -852,6 +945,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 			case DW_AT_type:      assert(a.type == Ref); id.type = a.ref; break;
 			case DW_AT_inline:    assert(a.type == Const); id.inlined = a.cons; break;
 			case DW_AT_external:  assert(a.type == Flag); id.external = a.flag; break;
+			case DW_AT_declaration: assert(a.type == Flag); id.isDecl = a.flag; break;
 			case DW_AT_upper_bound:
 				assert(a.type == Const || a.type == Ref || a.type == ExprLoc || a.type == Block);
 				if (a.type == Const) // TODO: other types not supported yet
@@ -912,10 +1006,12 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 		}
 	}
 
-	hasChild = id.hasChild != 0;
+	prevHasChild = id.hasChild != 0;
 	sibling = id.sibling;
 
-	return true;
+	// Transfer ownership of 'node' to caller, if we allocated one.
+	node.release();
+	return entry;
 }
 
 byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode)
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 56e89a3..06779c8 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -11,6 +11,7 @@
 typedef unsigned char byte;
 class PEImage;
 class DIECursor;
+class CV2PDB;
 struct SectionDescriptor;
 
 enum DebugLevel : unsigned {
@@ -24,7 +25,8 @@ enum DebugLevel : unsigned {
 	DbgDwarfAttrRead = 0x400,
 	DbgDwarfLocLists = 0x800,
 	DbgDwarfRangeLists = 0x1000,
-	DbgDwarfLines = 0x2000
+	DbgDwarfLines = 0x2000,
+	DbgPrintDwarfTree = 0x4000,
 };
 
 DEFINE_ENUM_FLAG_OPERATORS(DebugLevel);
@@ -183,7 +185,10 @@ struct DWARF_FileName
 // In-memory representation of a DIE (Debugging Info Entry).
 struct DWARF_InfoData
 {
-	// Pointer into the mapped image section where this DIE is located.
+	// The PEImage for this entry.
+	PEImage* img = nullptr;
+
+	// Pointer into the memory-mapped image section where this DIE is located.
 	byte* entryPtr;
 
 	// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
@@ -197,6 +202,18 @@ struct DWARF_InfoData
 	// Does this DIE have children?
 	int hasChild;
 
+	// Parent of this DIE, or NULL if top-level element.
+	DWARF_InfoData* parent = nullptr;
+
+	// Pointer to sibling in the tree. Not to be confused with 'sibling' below,
+	// which is a raw pointer to the DIE in the mapped/loaded image section.
+	// NULL if no more elements.
+	DWARF_InfoData* next = nullptr;
+
+	// Pointer to first child. This forms a linked list with the 'next' pointer.
+	// NULL if no children.
+	DWARF_InfoData* children = nullptr;
+
 	const char* name;
 	const char* linkage_name;
 	const char* dir;
@@ -213,10 +230,14 @@ struct DWARF_InfoData
 	// Pointer to the DW_AT_type DIE describing the type of this DIE.
 	byte* type;
 	byte* containing_type;
+
+	// Pointer to the DIE representing the declaration for this element if it
+	// is a definition. E.g. function decl for its definition/body.
 	byte* specification;
 	byte* abstract_origin;
 	unsigned long inlined;
-	bool external;
+	bool external = false; // is this subroutine visible outside its compilation unit?
+	bool isDecl = false; // is this a declaration?
 	DWARF_Attribute location;
 	DWARF_Attribute member_location;
 	DWARF_Attribute frame_base;
@@ -236,6 +257,7 @@ struct DWARF_InfoData
 		abbrev = 0;
 		tag = 0;
 		hasChild = 0;
+		parent = nullptr;
 
 		name = 0;
 		linkage_name = 0;
@@ -252,7 +274,8 @@ struct DWARF_InfoData
 		specification = 0;
 		abstract_origin = 0;
 		inlined = 0;
-		external = 0;
+		external = false;
+		isDecl = false;
 		member_location.type = Invalid;
 		location.type = Invalid;
 		frame_base.type = Invalid;
@@ -508,19 +531,31 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t;
 // as either an absolute value, a register, or a register-relative address.
 Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
 
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent);
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent);
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context);
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
 
 // Debug Information Entry Cursor
 class DIECursor
 {
+	// TODO: make these private.
 public:
-	DWARF_CompilationUnitInfo* cu;
-	byte* ptr;
+	DWARF_CompilationUnitInfo* cu = nullptr; // the CU we are reading from.
+	byte* ptr = nullptr; // the current mapped location we are reading from.
 	unsigned int entryOff;
-	int level;
-	bool hasChild; // indicates whether the last read DIE has children
-	byte* sibling;
+	int level; // the current level of the tree in the scan.
+	bool prevHasChild = false; // indicates whether the last read DIE has children
+
+	// last DIE scanned. Used to link subsequent nodes in a list.
+	DWARF_InfoData* prevNode = nullptr;
+	
+	// The last parent node to which all subsequent nodes should be assigned.
+	// Initially, NULL, but as we encounter a node with children, we establish
+	// it as the new "parent" for future nodes, and reset it once we reach
+	// a top level node.
+	DWARF_InfoData* prevParent = nullptr;
+
+	// The mapped address of the sibling of the last scanned node, if any.
+	byte* sibling = nullptr;
 
 	static PEImage *img;
 	static abbrevMap_t abbrevMap;
@@ -541,17 +576,13 @@ public:
 	// Goto next sibling DIE.  If the last read DIE had any children, they will be skipped over.
 	void gotoSibling();
 
-	// Reads next sibling DIE.  If the last read DIE had any children, they will be skipped over.
-	// Returns 'false' upon reaching the last sibling on the current level.
-	bool readSibling(DWARF_InfoData& id);
-
 	// Returns cursor that will enumerate children of the last read DIE.
 	DIECursor getSubtreeCursor();
 
-	// Reads the next DIE in physical order, returns 'true' if succeeds.
+	// Reads the next DIE in physical order, returns non-NULL if succeeds.
 	// If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level).
 	// Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created.
-	bool readNext(DWARF_InfoData& id, bool stopAtNull = false);
+	DWARF_InfoData* readNext(DWARF_InfoData* entry, bool stopAtNull = false);
 
 	// Read an address from p according to the ambient pointer size.
 	uint64_t RDAddr(byte* &p) const
-- 
cgit v0.12