summaryrefslogtreecommitdiffstats
path: root/src/readDwarf.h
diff options
context:
space:
mode:
authorAlex Budovski <alexbud@meta.com>2023-03-23 01:37:01 (GMT)
committerAlex Budovski <alexbud@meta.com>2023-03-24 15:12:48 (GMT)
commit62f975d2b4030d10a50e140f44f39ede418bcec4 (patch)
tree3c87638dd38e81bdd851e7257353d36ab6cc188c /src/readDwarf.h
parent2e4c1bf97b1491385c37432aef58b15943eb118a (diff)
downloadcv2pdb-62f975d2b4030d10a50e140f44f39ede418bcec4.zip
cv2pdb-62f975d2b4030d10a50e140f44f39ede418bcec4.tar.gz
cv2pdb-62f975d2b4030d10a50e140f44f39ede418bcec4.tar.bz2
DWARF tree for fully-qualified name construction
The Windows debuggers expect PDB symbol names to be fully qualified. I.e., if a class Foo has a constructor, its name should be emitted as `Foo::Foo`, not simply `Foo` as is the case today. Linux debuggers like GDB dynamically reconstruct the symbol tree at runtime each time a program is debugged. Windows debuggers on the other hand do not, and expect the name to be fully qualified from the outset. Failing this, the constructor function `Foo` would have the same name as the class `Foo` in the PDB, and WinDbg will get confused about what to dump (e.g. using `dt Foo`) and arbitrarily pick the largest item, which might be the constructor. Therefore you end up dumping the wrong thing and being completely unable to inspect the contents of a `Foo` object. This commit aims to fix that by introducing a DWARF tree during the conversion process which allows us to efficiently reconstruct such fully qualified names during the conversion. A note about DWARF: the DWARF format does not explicitly record the parent of any given DIE record. It is instead implicit in how the records are layed out. Any record may have a "has children" flag, and if it does, then the records following it are its children, terminated by a special NULL record, popping back up one level of the tree. The DIECursor already recognized this structure but did not capture it in memory for later use. In order to construct fully-qualified names for functions, enums, classes, etc. (i.e. taking into account namespaces, nesting, etc), we need a way to efficienctly lookup a node's parent. Thus the DWARF tree was born. At a high level, we take advantage of the fact that the DWARF sections were already scanned in two passes. We hook into the first pass (where the typeIDs were being reserved) and build the DWARF tree. Then, in the second pass (where the CV symbols get emitted), we look up the tree to figure out the correct fully-qualified symbol names. NOTE: The first phase of this work focuses on subroutines only. Later work will enable support for structs/classes/enums. On the subroutine front, I also added a flag to capture whether a DIE is a "declaration" or definition (based on the DW_AT_declaration attribute). This is needed to consolidate function decl+defn into one PDB symbol, as otherwise WinDbg will get confused. This also matches what the MSVC toolset produces. A few other related additions: - Added helper to format a fully qualified function name by looking up the tree added in this commit. - Added helper to print the DWARF tree for debugging purposes and a flag to control it.
Diffstat (limited to 'src/readDwarf.h')
-rw-r--r--src/readDwarf.h65
1 files changed, 48 insertions, 17 deletions
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 56e89a3..06779c8 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -11,6 +11,7 @@
typedef unsigned char byte;
class PEImage;
class DIECursor;
+class CV2PDB;
struct SectionDescriptor;
enum DebugLevel : unsigned {
@@ -24,7 +25,8 @@ enum DebugLevel : unsigned {
DbgDwarfAttrRead = 0x400,
DbgDwarfLocLists = 0x800,
DbgDwarfRangeLists = 0x1000,
- DbgDwarfLines = 0x2000
+ DbgDwarfLines = 0x2000,
+ DbgPrintDwarfTree = 0x4000,
};
DEFINE_ENUM_FLAG_OPERATORS(DebugLevel);
@@ -183,7 +185,10 @@ struct DWARF_FileName
// In-memory representation of a DIE (Debugging Info Entry).
struct DWARF_InfoData
{
- // Pointer into the mapped image section where this DIE is located.
+ // The PEImage for this entry.
+ PEImage* img = nullptr;
+
+ // Pointer into the memory-mapped image section where this DIE is located.
byte* entryPtr;
// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
@@ -197,6 +202,18 @@ struct DWARF_InfoData
// Does this DIE have children?
int hasChild;
+ // Parent of this DIE, or NULL if top-level element.
+ DWARF_InfoData* parent = nullptr;
+
+ // Pointer to sibling in the tree. Not to be confused with 'sibling' below,
+ // which is a raw pointer to the DIE in the mapped/loaded image section.
+ // NULL if no more elements.
+ DWARF_InfoData* next = nullptr;
+
+ // Pointer to first child. This forms a linked list with the 'next' pointer.
+ // NULL if no children.
+ DWARF_InfoData* children = nullptr;
+
const char* name;
const char* linkage_name;
const char* dir;
@@ -213,10 +230,14 @@ struct DWARF_InfoData
// Pointer to the DW_AT_type DIE describing the type of this DIE.
byte* type;
byte* containing_type;
+
+ // Pointer to the DIE representing the declaration for this element if it
+ // is a definition. E.g. function decl for its definition/body.
byte* specification;
byte* abstract_origin;
unsigned long inlined;
- bool external;
+ bool external = false; // is this subroutine visible outside its compilation unit?
+ bool isDecl = false; // is this a declaration?
DWARF_Attribute location;
DWARF_Attribute member_location;
DWARF_Attribute frame_base;
@@ -236,6 +257,7 @@ struct DWARF_InfoData
abbrev = 0;
tag = 0;
hasChild = 0;
+ parent = nullptr;
name = 0;
linkage_name = 0;
@@ -252,7 +274,8 @@ struct DWARF_InfoData
specification = 0;
abstract_origin = 0;
inlined = 0;
- external = 0;
+ external = false;
+ isDecl = false;
member_location.type = Invalid;
location.type = Invalid;
frame_base.type = Invalid;
@@ -508,19 +531,31 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t;
// as either an absolute value, a register, or a register-relative address.
Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent);
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent);
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context);
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
// Debug Information Entry Cursor
class DIECursor
{
+ // TODO: make these private.
public:
- DWARF_CompilationUnitInfo* cu;
- byte* ptr;
+ DWARF_CompilationUnitInfo* cu = nullptr; // the CU we are reading from.
+ byte* ptr = nullptr; // the current mapped location we are reading from.
unsigned int entryOff;
- int level;
- bool hasChild; // indicates whether the last read DIE has children
- byte* sibling;
+ int level; // the current level of the tree in the scan.
+ bool prevHasChild = false; // indicates whether the last read DIE has children
+
+ // last DIE scanned. Used to link subsequent nodes in a list.
+ DWARF_InfoData* prevNode = nullptr;
+
+ // The last parent node to which all subsequent nodes should be assigned.
+ // Initially, NULL, but as we encounter a node with children, we establish
+ // it as the new "parent" for future nodes, and reset it once we reach
+ // a top level node.
+ DWARF_InfoData* prevParent = nullptr;
+
+ // The mapped address of the sibling of the last scanned node, if any.
+ byte* sibling = nullptr;
static PEImage *img;
static abbrevMap_t abbrevMap;
@@ -541,17 +576,13 @@ public:
// Goto next sibling DIE. If the last read DIE had any children, they will be skipped over.
void gotoSibling();
- // Reads next sibling DIE. If the last read DIE had any children, they will be skipped over.
- // Returns 'false' upon reaching the last sibling on the current level.
- bool readSibling(DWARF_InfoData& id);
-
// Returns cursor that will enumerate children of the last read DIE.
DIECursor getSubtreeCursor();
- // Reads the next DIE in physical order, returns 'true' if succeeds.
+ // Reads the next DIE in physical order, returns non-NULL if succeeds.
// If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level).
// Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created.
- bool readNext(DWARF_InfoData& id, bool stopAtNull = false);
+ DWARF_InfoData* readNext(DWARF_InfoData* entry, bool stopAtNull = false);
// Read an address from p according to the ambient pointer size.
uint64_t RDAddr(byte* &p) const