diff options
author | Rainer Schuetze <r.sagitario@gmx.de> | 2023-06-10 06:29:13 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-10 06:29:13 (GMT) |
commit | e076c746a94edc5245d67d96123735cc8a274027 (patch) | |
tree | 8cf95547d885ed859dff8aa98b1c88598120aa6c | |
parent | d9b51b76e965fbc121b3c5e22fdd240039219670 (diff) | |
parent | a21b4159ff2e72f8ad356c602db47bf2644498b1 (diff) | |
download | cv2pdb-e076c746a94edc5245d67d96123735cc8a274027.zip cv2pdb-e076c746a94edc5245d67d96123735cc8a274027.tar.gz cv2pdb-e076c746a94edc5245d67d96123735cc8a274027.tar.bz2 |
Merge pull request #86 from alexbudfb/dwarftree
Vastly Improved DWARF to PDB Support
-rw-r--r-- | src/NatvisFile.natvis | 26 | ||||
-rw-r--r-- | src/PEImage.cpp | 6 | ||||
-rw-r--r-- | src/PEImage.h | 10 | ||||
-rw-r--r-- | src/cv2pdb.cpp | 64 | ||||
-rw-r--r-- | src/cv2pdb.h | 38 | ||||
-rw-r--r-- | src/cv2pdb.vcxproj | 9 | ||||
-rw-r--r-- | src/cv2pdb.vcxproj.filters | 6 | ||||
-rw-r--r-- | src/dumplines.vcxproj | 4 | ||||
-rw-r--r-- | src/dviewhelper/dviewhelper.vcxproj | 8 | ||||
-rw-r--r-- | src/dwarf2pdb.cpp | 521 | ||||
-rw-r--r-- | src/readDwarf.cpp | 202 | ||||
-rw-r--r-- | src/readDwarf.h | 97 |
12 files changed, 806 insertions, 185 deletions
diff --git a/src/NatvisFile.natvis b/src/NatvisFile.natvis new file mode 100644 index 0000000..21baadc --- /dev/null +++ b/src/NatvisFile.natvis @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="utf-8"?> +<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010"> + <Type Name="DWARF_InfoData"> + <DisplayString>tag={tag} code={code} {name,s}</DisplayString> + <Expand> + <Synthetic Name="children" Condition="children"> + <Expand> + <LinkedListItems> + <HeadPointer>children</HeadPointer> + <NextPointer>next</NextPointer> + <ValueNode>this</ValueNode> + </LinkedListItems> + </Expand> + </Synthetic> + <Synthetic Name="siblings" Condition="next"> + <Expand> + <LinkedListItems> + <HeadPointer>next</HeadPointer> + <NextPointer>next</NextPointer> + <ValueNode>this</ValueNode> + </LinkedListItems> + </Expand> + </Synthetic> + </Expand> + </Type> +</AutoVisualizer>
\ No newline at end of file diff --git a/src/PEImage.cpp b/src/PEImage.cpp index 247d514..4b39a3f 100644 --- a/src/PEImage.cpp +++ b/src/PEImage.cpp @@ -375,6 +375,7 @@ bool PEImage::_initFromCVDebugDir(IMAGE_DEBUG_DIRECTORY* ddir) }
///////////////////////////////////////////////////////////////////////
+// Used for PE (EXE/DLL) files.
bool PEImage::initDWARFPtr(bool initDbgDir)
{
dos = DPV<IMAGE_DOS_HEADER> (0);
@@ -410,6 +411,7 @@ bool PEImage::initDWARFPtr(bool initDbgDir) return true;
}
+// Used for COFF objects.
bool PEImage::initDWARFObject()
{
IMAGE_FILE_HEADER* hdr = DPV<IMAGE_FILE_HEADER> (0);
@@ -466,8 +468,11 @@ void PEImage::initSec(PESection& peSec, int secNo) const peSec.secNo = secNo;
}
+// Initialize all the DWARF sections present in this PE or COFF file.
+// Common to both object and image modules.
void PEImage::initDWARFSegments()
{
+ // Scan all the PE sections in this image.
for(int s = 0; s < nsec; s++)
{
const char* name = (const char*) sec[s].Name;
@@ -477,6 +482,7 @@ void PEImage::initDWARFSegments() name = strtable + off;
}
+ // Is 'name' one of the DWARF sections?
for (const SectionDescriptor *sec_desc : sec_descriptors) {
if (!strcmp(name, sec_desc->name)) {
PESection& peSec = this->*(sec_desc->pSec);
diff --git a/src/PEImage.h b/src/PEImage.h index 3ae00bd..a809523 100644 --- a/src/PEImage.h +++ b/src/PEImage.h @@ -178,11 +178,16 @@ private: template<typename SYM> const char* t_findSectionSymbolName(int s) const;
+ // File handle to PE image.
int fd;
+
+ // Pointer to in-memory buffer containing loaded PE image.
void* dump_base;
+
+ // Size of `dump_base` in bytes.
int dump_total_len;
- // codeview
+ // codeview fields
IMAGE_DOS_HEADER *dos;
IMAGE_NT_HEADERS32* hdr32;
IMAGE_NT_HEADERS64* hdr64;
@@ -200,7 +205,8 @@ private: std::unordered_map<std::string, SymbolInfo> symbolCache;
public:
- //dwarf
+ // dwarf fields
+ // List of DWARF section descriptors.
#define EXPANDSEC(name) PESection name;
SECTION_LIST()
#undef EXPANDSEC
diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp index 696cb74..704da4c 100644 --- a/src/cv2pdb.cpp +++ b/src/cv2pdb.cpp @@ -36,8 +36,6 @@ CV2PDB::CV2PDB(PEImage& image, DebugLevel debug_) memset(typedefs, 0, sizeof(typedefs));
memset(translatedTypedefs, 0, sizeof(translatedTypedefs));
cntTypedefs = 0;
- nextUserType = 0x1000;
- nextDwarfType = 0x1000;
addClassTypeEnum = true;
addObjectViewHelper = true;
@@ -899,25 +897,28 @@ void CV2PDB::checkGlobalTypeAlloc(int size, int add) }
}
+// Get the CodeView type descriptor for the given type ID.
+// CV-only. Returns NULL for DWARF-based images.
const codeview_type* CV2PDB::getTypeData(int type)
{
- if (!globalTypeHeader)
+ if (!globalTypeHeader) // NULL for DWARF.
return 0;
- if (type < 0x1000 || type >= (int) (0x1000 + globalTypeHeader->cTypes + nextUserType))
+ if (type < BASE_USER_TYPE || type >= (int) (BASE_USER_TYPE + globalTypeHeader->cTypes + nextUserType))
return 0;
- if (type >= (int) (0x1000 + globalTypeHeader->cTypes))
+ if (type >= (int) (BASE_USER_TYPE + globalTypeHeader->cTypes))
return getUserTypeData(type);
DWORD* offset = (DWORD*)(globalTypeHeader + 1);
BYTE* typeData = (BYTE*)(offset + globalTypeHeader->cTypes);
- return (codeview_type*)(typeData + offset[type - 0x1000]);
+ return (codeview_type*)(typeData + offset[type - BASE_USER_TYPE]);
}
+// CV-only. Never called for DWARF.
const codeview_type* CV2PDB::getUserTypeData(int type)
{
- type -= 0x1000 + globalTypeHeader->cTypes;
- if (type < 0 || type >= nextUserType - 0x1000)
+ type -= BASE_USER_TYPE + globalTypeHeader->cTypes;
+ if (type < 0 || type >= nextUserType - BASE_USER_TYPE)
return 0;
int pos = 0;
@@ -933,8 +934,8 @@ const codeview_type* CV2PDB::getUserTypeData(int type) const codeview_type* CV2PDB::getConvertedTypeData(int type)
{
- type -= 0x1000;
- if (type < 0 || type >= nextUserType - 0x1000)
+ type -= BASE_USER_TYPE;
+ if (type < 0 || type >= nextUserType - BASE_USER_TYPE)
return 0;
int pos = typePrefix;
@@ -1013,7 +1014,7 @@ int CV2PDB::findMemberFunctionType(codeview_symbol* lastGProcSym, int thisPtrTyp type->mfunction_v1.call == proctype->procedure_v1.call &&
type->mfunction_v1.rvtype == proctype->procedure_v1.rvtype)
{
- return t + 0x1000;
+ return t + BASE_USER_TYPE;
}
}
}
@@ -1123,7 +1124,7 @@ int CV2PDB::sizeofBasicType(int type) int CV2PDB::sizeofType(int type)
{
- if (type < 0x1000)
+ if (type < BASE_USER_TYPE)
return sizeofBasicType(type);
const codeview_type* cvtype = getTypeData(type);
@@ -1144,11 +1145,14 @@ int CV2PDB::sizeofType(int type) // to be used when writing new type only to avoid double translation
int CV2PDB::translateType(int type)
{
- if (type < 0x1000)
+ if (type < BASE_USER_TYPE)
{
+ // Check D lang typedefs.
for(int i = 0; i < cntTypedefs; i++)
if(type == typedefs[i])
return translatedTypedefs[i];
+
+ // Return original type.
return type;
}
@@ -1279,7 +1283,7 @@ bool CV2PDB::nameOfModifierType(int type, int mod, char* name, int maxlen) bool CV2PDB::nameOfType(int type, char* name, int maxlen)
{
- if(type < 0x1000)
+ if(type < BASE_USER_TYPE)
return nameOfBasicType(type, name, maxlen);
const codeview_type* ptype = getTypeData(type);
@@ -2032,7 +2036,7 @@ void CV2PDB::ensureUDT(int type, const codeview_type* cvtype) if (getStructProperty(cvtype) & kPropIncomplete)
cvtype = findCompleteClassType(cvtype, &type);
- if(findUdtSymbol(type + 0x1000))
+ if(findUdtSymbol(type + BASE_USER_TYPE))
return;
char name[kMaxNameLen];
@@ -2054,9 +2058,9 @@ void CV2PDB::ensureUDT(int type, const codeview_type* cvtype) int viewHelperType = nextUserType++;
// addUdtSymbol(viewHelperType, "object_viewhelper");
addUdtSymbol(viewHelperType, name);
+ } else {
+ addUdtSymbol(type + BASE_USER_TYPE, name);
}
- else
- addUdtSymbol(type + 0x1000, name);
}
int CV2PDB::createEmptyFieldListType()
@@ -2114,6 +2118,7 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation) return typedefType;
}
+// CV-only.
void CV2PDB::appendTypedefs()
{
if(Dversion == 0)
@@ -2135,6 +2140,7 @@ void CV2PDB::appendTypedefs() appendComplex(0x52, 0x42, 10, "creal");
}
+// CV-only.
bool CV2PDB::initGlobalTypes()
{
int object_derived_type = 0;
@@ -2160,7 +2166,7 @@ bool CV2PDB::initGlobalTypes() *(DWORD*) globalTypes = 4;
cbGlobalTypes = typePrefix;
- nextUserType = globalTypeHeader->cTypes + 0x1000;
+ nextUserType = globalTypeHeader->cTypes + BASE_USER_TYPE;
appendTypedefs();
if(Dversion > 0)
@@ -2277,7 +2283,7 @@ bool CV2PDB::initGlobalTypes() if(const codeview_type* td = getTypeData(type->struct_v1.fieldlist))
if(td->generic.id == LF_FIELDLIST_V1 || td->generic.id == LF_FIELDLIST_V2)
dtype->struct_v2.n_element = countFields((const codeview_reftype*)td);
- dtype->struct_v2.property = fixProperty(t + 0x1000, type->struct_v1.property,
+ dtype->struct_v2.property = fixProperty(t + BASE_USER_TYPE, type->struct_v1.property,
type->struct_v1.fieldlist);
#if REMOVE_LF_DERIVED
dtype->struct_v2.derived = 0;
@@ -2308,7 +2314,7 @@ bool CV2PDB::initGlobalTypes() dtype->union_v2.id = v3 ? LF_UNION_V3 : LF_UNION_V2;
dtype->union_v2.count = type->union_v1.count;
dtype->union_v2.fieldlist = type->struct_v1.fieldlist;
- dtype->union_v2.property = fixProperty(t + 0x1000, type->struct_v1.property, type->struct_v1.fieldlist);
+ dtype->union_v2.property = fixProperty(t + BASE_USER_TYPE, type->struct_v1.property, type->struct_v1.fieldlist);
leaf_len = numeric_leaf(&value, &type->union_v1.un_len);
memcpy (&dtype->union_v2.un_len, &type->union_v1.un_len, leaf_len);
len = pstrcpy_v(v3, (BYTE*) &dtype->union_v2.un_len + leaf_len,
@@ -2349,10 +2355,10 @@ bool CV2PDB::initGlobalTypes() dtype->mfunction_v2.rvtype = translateType(type->mfunction_v1.rvtype);
clsstype = type->mfunction_v1.class_type;
dtype->mfunction_v2.class_type = translateType(clsstype);
- if (clsstype >= 0x1000 && clsstype < 0x1000 + globalTypeHeader->cTypes)
+ if (clsstype >= BASE_USER_TYPE && clsstype < BASE_USER_TYPE + globalTypeHeader->cTypes)
{
// fix class_type to point to class, not pointer to class
- codeview_type* ctype = (codeview_type*)(typeData + offset[clsstype - 0x1000]);
+ codeview_type* ctype = (codeview_type*)(typeData + offset[clsstype - BASE_USER_TYPE]);
if (ctype->generic.id == LF_POINTER_V1)
dtype->mfunction_v2.class_type = translateType(ctype->pointer_v1.datatype);
}
@@ -2370,12 +2376,12 @@ bool CV2PDB::initGlobalTypes() dtype->enumeration_v2.count = type->enumeration_v1.count;
dtype->enumeration_v2.type = translateType(type->enumeration_v1.type);
dtype->enumeration_v2.fieldlist = type->enumeration_v1.fieldlist;
- dtype->enumeration_v2.property = fixProperty(t + 0x1000, type->enumeration_v1.property, type->enumeration_v1.fieldlist);
+ dtype->enumeration_v2.property = fixProperty(t + BASE_USER_TYPE, type->enumeration_v1.property, type->enumeration_v1.fieldlist);
len = pstrcpy_v (v3, (BYTE*) &dtype->enumeration_v2.p_name, (BYTE*) &type->enumeration_v1.p_name);
len += sizeof(dtype->enumeration_v2) - sizeof(dtype->enumeration_v2.p_name);
if(dtype->enumeration_v2.fieldlist && v3)
- if(!findUdtSymbol(t + 0x1000))
- addUdtSymbol(t + 0x1000, (char*) &dtype->enumeration_v2.p_name);
+ if(!findUdtSymbol(t + BASE_USER_TYPE))
+ addUdtSymbol(t + BASE_USER_TYPE, (char*) &dtype->enumeration_v2.p_name);
break;
case LF_FIELDLIST_V1:
@@ -2392,7 +2398,7 @@ bool CV2PDB::initGlobalTypes() rdtype->derived_v2.id = LF_DERIVED_V2;
rdtype->derived_v2.num = rtype->derived_v1.num;
for (int i = 0; i < rtype->derived_v1.num; i++)
- if (rtype->derived_v1.drvdcls[i] < 0x1000) // + globalTypeHeader->cTypes)
+ if (rtype->derived_v1.drvdcls[i] < BASE_USER_TYPE) // + globalTypeHeader->cTypes)
rdtype->derived_v2.drvdcls[i] = translateType(rtype->derived_v1.drvdcls[i] + 0xfff);
else
rdtype->derived_v2.drvdcls[i] = translateType(rtype->derived_v1.drvdcls[i]);
@@ -3166,8 +3172,8 @@ int CV2PDB::copySymbols(BYTE* srcSymbols, int srcSize, BYTE* destSymbols, int de codeview_symbol* dsym = (codeview_symbol*)(destSymbols + destSize);
memcpy(dsym, sym, length);
#endif
- if (type >= 0x1000 && pointerTypes[type - 0x1000])
- type = pointerTypes[type - 0x1000];
+ if (type >= BASE_USER_TYPE && pointerTypes[type - BASE_USER_TYPE])
+ type = pointerTypes[type - BASE_USER_TYPE];
}
}
dsym->stack_v2.id = v3 ? S_BPREL_V3 : S_BPREL_V1;
@@ -3279,6 +3285,8 @@ bool isUDTid(int id) return id == S_UDT_V1 || id == S_UDT_V2 || id == S_UDT_V3;
}
+// Find a user-defined type CV symbol.
+// CV-only.
codeview_symbol* CV2PDB::findUdtSymbol(int type)
{
type = translateType(type);
diff --git a/src/cv2pdb.h b/src/cv2pdb.h index 52f3455..c61a74b 100644 --- a/src/cv2pdb.h +++ b/src/cv2pdb.h @@ -169,17 +169,24 @@ public: bool addDWARFLines();
bool addDWARFPublics();
bool writeDWARFImage(const TCHAR* opath);
+ DWARF_InfoData* findEntryByPtr(byte* entryPtr) const;
+
+ // Helper to just print the DWARF tree we've built for debugging purposes.
+ void dumpDwarfTree() const;
bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi);
bool addDWARFProc(DWARF_InfoData& id, const std::vector<RangeEntry> &ranges, DIECursor cursor);
+ void formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, size_t cbBuf) const;
+
int addDWARFStructure(DWARF_InfoData& id, DIECursor cursor);
- int addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int off, int flStart);
- int addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor);
+ int addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int off, int flStart);
+ int addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor);
int addDWARFBasicType(const char*name, int encoding, int byte_size);
int addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor);
- int getTypeByDWARFPtr(byte* ptr);
+ int getTypeByDWARFPtr(byte* typePtr);
+ int findTypeIdByPtr(byte* typePtr) const;
int getDWARFTypeSize(const DIECursor& parent, byte* ptr);
- void getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor,
+ void getDWARFArrayBounds(DIECursor cursor,
int& basetype, int& lowerBound, int& upperBound);
void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent,
int& basetype, int& lowerBound, int& upperBound);
@@ -211,6 +218,7 @@ public: OMFSegMapDesc* segMapDesc;
int* segFrame2Index;
+ // CV-only
OMFGlobalTypes* globalTypeHeader;
unsigned char* globalTypes;
@@ -236,8 +244,10 @@ public: int cbDwarfTypes;
int allocDwarfTypes;
- int nextUserType;
- int nextDwarfType;
+ static constexpr int BASE_USER_TYPE = 0x1000;
+
+ int nextUserType = BASE_USER_TYPE;
+ int nextDwarfType = BASE_USER_TYPE;
int objectType;
int emptyFieldListType;
@@ -272,9 +282,21 @@ public: double Dversion;
- // DWARF
+ // DWARF fields.
+
int codeSegOff;
- std::unordered_map<byte*, int> mapOffsetToType;
+
+ // Lookup table for type IDs based on the DWARF_InfoData::entryPtr
+ std::unordered_map<byte*, int> mapEntryPtrToTypeID;
+
+ // Lookup table for entries based on the DWARF_InfoData::entryPtr
+ std::unordered_map<byte*, DWARF_InfoData*> mapEntryPtrToEntry;
+
+ // A multimap keyed on entry name. Since this is not unique, we use a multimap.
+ std::multimap<std::string, DWARF_InfoData*> mapEntryNameToEntries;
+
+ // Head of list of DWARF DIE nodes.
+ DWARF_InfoData* dwarfHead = nullptr;
// Default lower bound for the current compilation unit. This depends on
// the language of the current unit.
diff --git a/src/cv2pdb.vcxproj b/src/cv2pdb.vcxproj index 6b96851..bc07d66 100644 --- a/src/cv2pdb.vcxproj +++ b/src/cv2pdb.vcxproj @@ -30,14 +30,14 @@ <ProjectGuid>{5E2BD27D-446A-4C99-9829-135F7C000D90}</ProjectGuid>
<RootNamespace>cv2pdb</RootNamespace>
<Keyword>Win32Proj</Keyword>
- <!-- guess the installed Windows SDK -->
+ <!-- guess the installed Windows SDK -->
<WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
<WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
<WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
<WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
<!-- Sometimes the version in the registry has to .0 suffix, and sometimes it doesn't. Check and add it -->
<WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10>
- <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
+ <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
@@ -299,6 +299,9 @@ <ItemGroup>
<None Include="packages.config" />
</ItemGroup>
+ <ItemGroup>
+ <Natvis Include="NatvisFile.natvis" />
+ </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
@@ -310,4 +313,4 @@ </PropertyGroup>
<Error Condition="!Exists('packages\Microsoft.VisualStudio.Setup.Configuration.Native.1.16.30\build\native\Microsoft.VisualStudio.Setup.Configuration.Native.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.VisualStudio.Setup.Configuration.Native.1.16.30\build\native\Microsoft.VisualStudio.Setup.Configuration.Native.targets'))" />
</Target>
-</Project>
+</Project>
\ No newline at end of file diff --git a/src/cv2pdb.vcxproj.filters b/src/cv2pdb.vcxproj.filters index 87f8a84..adf6a1b 100644 --- a/src/cv2pdb.vcxproj.filters +++ b/src/cv2pdb.vcxproj.filters @@ -78,4 +78,10 @@ <Filter>Source Files</Filter>
</MASM>
</ItemGroup>
+ <ItemGroup>
+ <None Include="packages.config" />
+ </ItemGroup>
+ <ItemGroup>
+ <Natvis Include="NatvisFile.natvis" />
+ </ItemGroup>
</Project>
\ No newline at end of file diff --git a/src/dumplines.vcxproj b/src/dumplines.vcxproj index 3460678..4bf62fd 100644 --- a/src/dumplines.vcxproj +++ b/src/dumplines.vcxproj @@ -26,13 +26,13 @@ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
- <PlatformToolset>v120_xp</PlatformToolset>
+ <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
- <PlatformToolset>v120_xp</PlatformToolset>
+ <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/src/dviewhelper/dviewhelper.vcxproj b/src/dviewhelper/dviewhelper.vcxproj index 8f9cb2a..e373574 100644 --- a/src/dviewhelper/dviewhelper.vcxproj +++ b/src/dviewhelper/dviewhelper.vcxproj @@ -13,24 +13,24 @@ <PropertyGroup Label="Globals"> <ProjectGuid>{E4424774-A7A0-4502-8626-2723904D70EA}</ProjectGuid> <Keyword>Win32Proj</Keyword> - <!-- guess the installed Windows SDK --> + <!-- guess the installed Windows SDK --> <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10> <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10> <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10> <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10> <!-- Sometimes the version in the registry has to .0 suffix, and sometimes it doesn't. Check and add it --> <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10> - <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion> + <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> - <PlatformToolset>v120</PlatformToolset> + <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <ConfigurationType>DynamicLibrary</ConfigurationType> - <PlatformToolset>v120</PlatformToolset> + <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset> <CharacterSet>MultiByte</CharacterSet> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index 3019856..b97dcb9 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -695,6 +695,147 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo) cbUdtSymbols += len;
}
+// Helper to format a fully qualified proc name like 'some_ns::Foo::Foo' since
+// for a Foo constructor in a Foo class in a namespace called "some_ns".
+// PDBs require fully qualified names in their symbols.
+// TODO: better error handling for out of space.
+void CV2PDB::formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, size_t cbBuf) const {
+ if (node->specification) {
+ // If the proc has a "specification", i.e. a declaration, use it instead
+ // of the definition, as it has a proper hierarchy connected to it
+ // which will give us a proper fully-qualified name like Foo::Foo
+ // instead of just Foo.
+ const DWARF_InfoData* entry = findEntryByPtr(node->specification);
+ if (entry) {
+ node = entry;
+ }
+ } else {
+ // Find the node's entry in the DWARF tree. We can't use 'node' as is because
+ // it is a local copy without linkage into the tree, as it comes from
+ // the 2nd pass scan after the tree is already built.
+ const DWARF_InfoData* entry = findEntryByPtr(node->entryPtr);
+ assert(entry); // how can it not exist? Bug in tree construction.
+ node = entry;
+ }
+ DWARF_InfoData* parent = node->parent;
+ std::vector<const DWARF_InfoData*> segments;
+ segments.push_back(node);
+
+ // Accumulate all the valid parent scopes so that we can reverse them for
+ // formatting.
+ while (parent) {
+ switch (parent->tag) {
+ // TODO: are there any other kinds of valid parents?
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_namespace:
+ segments.push_back(parent);
+ break;
+ default:
+ break;
+ }
+ parent = parent->parent;
+ }
+
+ int remain = cbBuf;
+ char* p = buf;
+
+ // Format the parents in reverse order with :: operator in between.
+ for (int i = segments.size() - 1; i >= 0; --i) {
+ const char* name = segments[i]->name;
+ char nameBuf[64] = {};
+ int nameLen = 0;
+ if (!segments[i]->name) {
+ // This segment has no name. This could be because it is part of
+ // an anonymous class, which often happens for lambda expressions.
+ // Generate a unique anonymous name for it.
+ nameLen = sprintf_s(nameBuf, "[anon_%x]", segments[i]->entryOff);
+ if (nameLen < 0) {
+ // Formatting failed. Try a default name.
+ assert(false); // crash in debug builds.
+ name = "[anon]";
+ }
+ name = nameBuf;
+ } else {
+ nameLen = strlen(name);
+ }
+ if (remain < nameLen) {
+ fprintf(stderr, "unable to fit full symbol name: %s\n", node->name);
+ return;
+ }
+
+ memcpy(p, name, nameLen);
+
+ p += nameLen;
+ remain -= nameLen;
+
+ if (i > 0) {
+ // Append :: separator
+ if (remain < 2) {
+ fprintf(stderr, "unable to fit full symbol name (:: separator): %s\n", node->name);
+ return;
+ }
+ *p++ = ':';
+ *p++ = ':';
+ remain -= 2;
+ }
+ }
+
+ if (remain > 0) {
+ *p = 0; // NUL terminate.
+ }
+}
+
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
+
+// Find the source of an inlined function by following its 'abstract_origin'
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
+{
+ DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+ if (!abstractOrigin) {
+ // Could not find abstract origin. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
+ // assert(id.tag == idspec.tag);
+
+ if (abstractOrigin->abstract_origin)
+ mergeAbstractOrigin(*abstractOrigin, context);
+ if (abstractOrigin->specification)
+ mergeSpecification(*abstractOrigin, context);
+ id.merge(*abstractOrigin);
+}
+
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
+{
+ DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+ if (!idspec) {
+ // Could not find decl for this definition. Why not?
+ assert(false);
+ return;
+ }
+
+ // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+ // in the wild.
+ //
+ // assert(id.tag == idspec.tag);
+
+ if (idspec->abstract_origin)
+ mergeAbstractOrigin(*idspec, context);
+ if (idspec->specification) {
+ mergeSpecification(*idspec, context);
+ }
+ id.merge(*idspec);
+}
+
bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
{
unsigned int pclo = ranges.front().pclo - codeSegOff;
@@ -723,8 +864,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> cvs->proc_v2.flags = 0;
// printf("GlobalPROC %s\n", procid.name);
-
- len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, procid.name);
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&procid, namebuf, sizeof namebuf);
+ len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
for (; len & (align-1); len++)
udtSymbols[cbUdtSymbols + len] = 0xf4 - (len & 3);
@@ -762,8 +904,13 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> DWARF_InfoData id;
int off = 8;
+ // Save off the cursor to the start of the proc.
DIECursor prev = cursor;
- while (cursor.readNext(id, true))
+
+ // First, collect all the formal parameters of the proc.
+ // Don't worry about storing these in the tree as we're not going to need
+ // to generate fully-qualified names like we would for functions/classes.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_formal_parameter && id.name)
{
@@ -778,7 +925,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> }
appendEndArg();
+ // Now, collect all the lexical blocks and their stack variables.
std::vector<DIECursor> lexicalBlocks;
+
+ // Start from the proc base, and push all nested lexical blocks as you
+ // encounter them.
lexicalBlocks.push_back(prev);
while (!lexicalBlocks.empty())
@@ -786,7 +937,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> cursor = lexicalBlocks.back();
lexicalBlocks.pop_back();
- while (cursor.readNext(id))
+ while (cursor.readNext(&id))
{
if (id.tag == DW_TAG_lexical_block)
{
@@ -813,15 +964,23 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> {
appendLexicalBlock(id, pclo + codeSegOff);
DIECursor next = cursor;
+
+ // Compute the sibling node of this lexical block.
next.gotoSibling();
assert(lexicalBlocks.empty() || next.ptr <= lexicalBlocks.back().ptr);
+
+ // Append the next lexical block to the list of blocks
+ // to scan later.
lexicalBlocks.push_back(next);
+
+ // But for now, scan down the current lexical block.
cursor = cursor.getSubtreeCursor();
continue;
}
}
else if (id.tag == DW_TAG_variable)
{
+ // Found a local variable.
if (id.name && (id.location.type == ExprLoc || id.location.type == Block))
{
Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset)
@@ -864,14 +1023,15 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> return true;
}
-int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff, int flStart)
+// Only looks at DW_TAG_member and DW_TAG_inheritance
+int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int baseoff, int flStart)
{
bool isunion = structid.tag == DW_TAG_union_type;
int nfields = 0;
- // cursor points to the first member
+ // cursor points to the first member of the class/struct/union.
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -905,12 +1065,12 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo // if it doesn't have a name, and it's a struct or union, embed it directly
DIECursor membercursor(cursor, id.type);
DWARF_InfoData memberid;
- if (membercursor.readNext(memberid))
+ if (membercursor.readNext(&memberid))
{
if (memberid.abstract_origin)
- mergeAbstractOrigin(memberid, cursor);
+ mergeAbstractOrigin(memberid, *this);
if (memberid.specification)
- mergeSpecification(memberid, cursor);
+ mergeSpecification(memberid, *this);
int cvtype = -1;
switch (memberid.tag)
@@ -953,6 +1113,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo return nfields;
}
+// Add a class/struct/union to the database.
int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
{
//printf("Adding struct %s, entryoff %d, abbrev %d\n", structid.name, structid.entryOff, structid.abbrev);
@@ -990,25 +1151,29 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor) checkUserTypeAlloc(kMaxNameLen + 100);
codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
- const char* name = (structid.name ? structid.name : "__noname");
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&structid, namebuf, sizeof namebuf);
int attr = fieldlistType ? 0 : kPropIncomplete;
- int len = addAggregate(cvt, false, nfields, fieldlistType, attr, 0, 0, structid.byte_size, name, nullptr);
+ int len = addAggregate(cvt, structid.tag == DW_TAG_class_type, nfields, fieldlistType, attr, 0, 0, structid.byte_size, namebuf, nullptr);
cbUserTypes += len;
//ensureUDT()?
int cvtype = nextUserType++;
- addUdtSymbol(cvtype, name);
+ addUdtSymbol(cvtype, namebuf);
return cvtype;
}
-void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
+// Compute the array bounds of the DIE at the given 'cursor'.
+void CV2PDB::getDWARFArrayBounds(DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
{
DWARF_InfoData id;
// TODO: handle multi-dimensional arrays
if (cursor.cu)
{
- while (cursor.readNext(id, true))
+ // Don't insert these elements into the DB. We're just using it for
+ // array bounds calculation.
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_subrange_type)
{
@@ -1041,6 +1206,7 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& p upperBound = subrangeid.upper_bound;
}
+// Compute a type ID for a basic DWARF type.
int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
{
int type = 0, mode = 0, size = 0;
@@ -1103,10 +1269,13 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size) return translateType(t);
}
-int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor)
+// TODO: Array wanted to be scanned twice due to DW_TAG_subrange_type being looked at
+// in the caller. See if it can be handled in a single place for clarity, simplicity & efficiency.
+// Goal: don't rescan the same DIE twice.
+int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor)
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
checkUserTypeAlloc(kMaxNameLen + 100);
codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
@@ -1204,10 +1373,10 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor) /* Enumerated types are described in CodeView with two components:
1. A LF_ENUM leaf, representing the type itself. We put this one in the
- userTypes buffer.
+ userTypes buffer.
2. One or several LF_FIELDLIST records, to contain the list of
- enumerators (name and value) associated to the enum type
+ enumerators (name and value) associated to the enum type
(LF_ENUMERATE leaves). As type records cannot be larger 2**16 bytes,
we need to create multiple records when there are too many
enumerators. The first record contains the first LF_ENUMERATE leaves,
@@ -1242,7 +1411,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor) /* Now fill this field list with the enumerators we find in DWARF. */
DWARF_InfoData id;
- while (cursor.readNext(id, true))
+ while (cursor.readNext(&id, true /* stopAtNull */))
{
if (id.tag == DW_TAG_enumerator && id.has_const_value)
{
@@ -1253,7 +1422,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor) int len = addFieldEnumerate(dfieldtype, id.name, id.const_value);
/* If adding this enumerate leaves no room for a LF_INDEX leaf,
- create a new LF_FIELDLIST record now. */
+ create a new LF_FIELDLIST record now. */
if (fieldlistLength + len + sizeof(dfieldtype->index_v2) > 0xffff)
{
/* Append the LF_INDEX leaf. */
@@ -1306,34 +1475,148 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor) /* Now the LF_FIELDLIST is ready, create the LF_ENUM type record itself. */
checkUserTypeAlloc();
- int basetype = (enumid.type != 0)
- ? getTypeByDWARFPtr(enumid.type)
- : getDWARFBasicType(enumid.encoding, enumid.byte_size);
+ const DWARF_InfoData* entry = findEntryByPtr(enumid.entryPtr);
+ int prop = 0;
+ if (entry && entry->parent) {
+ int tag = entry->parent->tag;
+ if (tag == DW_TAG_class_type ||
+ tag == DW_TAG_structure_type ||
+ tag == DW_TAG_union_type)
+ {
+ prop |= kPropIsNested;
+ }
+ }
+
+ // NOTE: WinDbg/VS Dbg expects enum types to be base types, not indirect
+ // refs/UDTs.
+ //
+ // Compute the best base/underlying type to use.
+ int encoding = DW_ATE_signed; // default to int
+ const DWARF_InfoData* typeEntry = findEntryByPtr(enumid.type);
+ const DWARF_InfoData* t = typeEntry;
+
+ // Follow all the parent types to get to the base UDT.
+ while (t) {
+ t = findEntryByPtr(t->type);
+ if (t) typeEntry = t;
+ }
+
+ if (typeEntry) {
+ encoding = typeEntry->encoding;
+ assert(typeEntry->byte_size == enumid.byte_size);
+ }
+
+ const int basetype = getDWARFBasicType(encoding, enumid.byte_size);
+
dtype = (codeview_type*)(userTypes + cbUserTypes);
- const char* name = (enumid.name ? enumid.name : "__noname");
- cbUserTypes += addEnum(dtype, count, firstFieldlistType, 0, basetype, name);
+ char namebuf[kMaxNameLen] = {};
+ formatFullyQualifiedName(&enumid, namebuf, sizeof namebuf);
+ cbUserTypes += addEnum(dtype, count, firstFieldlistType, prop, basetype, namebuf);
int enumType = nextUserType++;
- addUdtSymbol(enumType, name);
+ addUdtSymbol(enumType, namebuf);
return enumType;
}
-int CV2PDB::getTypeByDWARFPtr(byte* ptr)
+// Try to find or compute the "best" CV TypeID for a given DIE found by following
+// a DW_AT_type attribute or its closest counterpart.
+int CV2PDB::getTypeByDWARFPtr(byte* typePtr)
{
- if (ptr == nullptr)
- return 0x03; // void
- std::unordered_map<byte*, int>::iterator it = mapOffsetToType.find(ptr);
- if (it == mapOffsetToType.end())
- return 0x03; // void
- return it->second;
+ if (typePtr == nullptr)
+ return T_NOTYPE;
+
+ // First just attempt to find the type entry directly.
+ int ret = findTypeIdByPtr(typePtr);
+ if (!ret) {
+ // TypeID was not found in the map. This may be due to struct
+ // decl / definition consolidation. I.e. we don't emit the struct decl
+ // because they show up as "empty" structs (devoid of members).
+ // Try to match this against the logically equivalent "definition"
+ // type.
+ DWARF_InfoData* entry = findEntryByPtr(typePtr);
+ assert(entry); // how can the entry not exist in the map?
+
+ // Skip anonymous structures or similar.
+ if (!entry || !entry->name) {
+ return T_NOTYPE;
+ }
+
+ // See if there exists another "logically equivalent" entry in the tree.
+ //
+ // First, find all entries with the same (local) name as this type.
+ auto range = mapEntryNameToEntries.equal_range(entry->name);
+ for (auto it = range.first; it != range.second; ++it) {
+ DWARF_InfoData* candidate = it->second;
+
+ // Skip self.
+ if (candidate == entry) {
+ continue;
+ }
+
+ // Skip declarations (as when they are of structs, they don't help.
+ // We want definitions only as they define the fields in DWARF.)
+ if (candidate->isDecl) {
+ continue;
+ }
+
+ // Filter nodes based on the matching tag.
+ if (candidate->tag != entry->tag) {
+ continue;
+ }
+
+ // Found a matching tag for this element. Walk up the tree and check
+ // if all parent tags and names match also. If they do, we found an
+ // "equivalent" node to 'typePtr' one that wasn't added to the
+ // typeID registry (because it was likely a decl that we filtered out)
+ DWARF_InfoData* candidateParent = candidate->parent;
+ DWARF_InfoData* entryParent = entry->parent;
+
+ bool equivalentHierarchy = true;
+ while (candidateParent && entryParent) {
+ if (candidateParent->tag != entryParent->tag) {
+ // Tag mismatch.
+ equivalentHierarchy = false;
+ break;
+ }
+
+ // Skip CUs as of course they have different names. We only
+ // care about namespaces, other containing structs, classes, etc.
+ // Entries have the same tag. Checking one is sufficient.
+ if (entryParent->tag != DW_TAG_compile_unit) {
+
+ if (strcmp(candidateParent->name, entryParent->name)) {
+ // Name mismatch.
+ equivalentHierarchy = false;
+ break;
+ }
+ }
+
+ candidateParent = candidateParent->parent;
+ entryParent = entryParent->parent;
+ }
+
+ if (equivalentHierarchy) {
+ // Try another lookup with this new candidate.
+ ret = findTypeIdByPtr(candidate->entryPtr);
+ assert(ret); // how can it now be in the map?
+ } else {
+ fprintf(stderr, "warn: could not find equivalent entry for typePtr %p\n", typePtr);
+ }
+ }
+ }
+ return ret;
}
+// Get the logical size of a DWARF type, starting from 'typePtr' and recursing
+// if necessary. E.g. for arrays.
int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
{
DWARF_InfoData id;
DIECursor cursor(parent, typePtr);
- if (!cursor.readNext(id))
+ // Don't allocate this into the tree since we're just interested
+ // in computing a type.
+ if (!cursor.readNext(&id))
return 0;
if(id.byte_size > 0)
@@ -1348,7 +1631,7 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr) case DW_TAG_array_type:
{
int basetype, upperBound, lowerBound;
- getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound);
+ getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type);
}
default:
@@ -1359,6 +1642,10 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr) return 0;
}
+// Scan the .debug_info section and allocate type IDs for each unique type and
+// create a mapping to look them up by their address.
+// This is the first pass scan that builds up the DWARF tree. The second pass (createTypes)
+// emits the actual PDB symbols.
bool CV2PDB::mapTypes()
{
int typeID = nextUserType;
@@ -1367,13 +1654,21 @@ bool CV2PDB::mapTypes() if (debug & DbgBasic)
fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
+ // Maintain the first node of each CU to ensure all of them get linked.
+ DWARF_InfoData* firstNode = nullptr;
+
+ // Scan each compilation unit in '.debug_info'.
while (off < img.debug_info.length)
{
DWARF_CompilationUnitInfo cu{};
+
+ // Read the next compilation unit from 'off' and update it to the next
+ // CU.
byte* ptr = cu.read(debug, img, &off);
if (!ptr)
continue;
+ // We only support regular full 'DW_UT_compile' compilation units.
if (cu.unit_type != DW_UT_compile) {
if (debug & DbgDwarfCompilationUnit)
fprintf(stderr, "%s:%d: skipping compilation unit offs=%x, unit_type=%d\n", __FUNCTION__, __LINE__,
@@ -1383,28 +1678,57 @@ bool CV2PDB::mapTypes() }
DIECursor cursor(&cu, ptr);
- DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Set up link to ensure this CU links to the prior one.
+ cursor.prevNode = firstNode;
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
+ // Start scanning this CU from the beginning and *build a tree of DIE nodes*.
+ while ((node = cursor.readNext(nullptr)) != nullptr)
{
+ DWARF_InfoData& id = *node;
+
+ // Initialize the head of the DWARF DIE list the first time.
+ if (!dwarfHead) {
+ dwarfHead = node;
+ }
+
+ if (!setFirstNode) {
+ firstNode = node;
+ setFirstNode = true;
+ }
+
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Insert the node into the entryPtr-based index.
+ mapEntryPtrToEntry[node->entryPtr] = node;
+
+ // Insert named nodes into the name-based index.
+ if (node->name) {
+ mapEntryNameToEntries.insert({ node->name, node });
+ }
+
switch (id.tag)
{
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_union_type:
+ // skip generating a typeID for declaration flavor of
+ // class/struct/union since we don't emit the PDB symbol
+ // for them. See related code in CV2PDB::createTypes().
+ if (id.isDecl) continue;
case DW_TAG_base_type:
case DW_TAG_typedef:
case DW_TAG_pointer_type:
case DW_TAG_subroutine_type:
case DW_TAG_array_type:
case DW_TAG_const_type:
- case DW_TAG_structure_type:
case DW_TAG_reference_type:
-
- case DW_TAG_class_type:
case DW_TAG_enumeration_type:
case DW_TAG_string_type:
- case DW_TAG_union_type:
case DW_TAG_ptr_to_member_type:
case DW_TAG_set_type:
case DW_TAG_subrange_type:
@@ -1418,20 +1742,22 @@ bool CV2PDB::mapTypes() case DW_TAG_mutable_type: // withdrawn
case DW_TAG_shared_type:
case DW_TAG_rvalue_reference_type:
- mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
+ // Reserve a typeID and store it in the map for quick lookup.
+ mapEntryPtrToTypeID.insert(std::make_pair(id.entryPtr, typeID));
typeID++;
}
}
}
if (debug & DbgBasic)
- fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size());
+ fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapEntryPtrToTypeID.size());
nextDwarfType = typeID;
- assert(nextDwarfType == nextUserType + mapOffsetToType.size());
+ assert(nextDwarfType == nextUserType + mapEntryPtrToTypeID.size());
return true;
}
+// Walks the .debug_info section and builds a DIE tree.
bool CV2PDB::createTypes()
{
img.createSymbolCache();
@@ -1444,9 +1770,14 @@ bool CV2PDB::createTypes() fprintf(stderr, "%s:%d: createTypes()\n", __FUNCTION__, __LINE__);
unsigned long off = 0;
+
+ // Scan each compilation unit in '.debug_info'.
while (off < img.debug_info.length)
{
DWARF_CompilationUnitInfo cu{};
+
+ // Read the next compilation unit from 'off' and update it to the next
+ // CU, returning the pointer just beyond the header to the first DIE.
byte* ptr = cu.read(debug, img, &off);
if (!ptr)
continue;
@@ -1460,17 +1791,24 @@ bool CV2PDB::createTypes() }
DIECursor cursor(&cu, ptr);
+
+ DWARF_InfoData* node = nullptr;
+ bool setFirstNode = false;
DWARF_InfoData id;
- while (cursor.readNext(id))
+
+ // Scan the DIEs in this CU, reusing the elements.
+ while (cursor.readNext(&id))
{
if (debug & DbgDwarfTagRead)
fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
cursor.entryOff, cursor.level, id.code, id.tag);
+ // Merge in related entries. This relies on the DWARF tree having been built
+ // in the first pass (mapTypes).
if (id.abstract_origin)
- mergeAbstractOrigin(id, cursor);
+ mergeAbstractOrigin(id, *this);
if (id.specification)
- mergeSpecification(id, cursor);
+ mergeSpecification(id, *this);
int cvtype = -1;
switch (id.tag)
@@ -1501,14 +1839,23 @@ bool CV2PDB::createTypes() case DW_TAG_class_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
- cvtype = addDWARFStructure(id, cursor.getSubtreeCursor());
+ if (!id.isDecl) {
+ // Only export the non-declaration version of structs/classes.
+ // DWARF emits multiple copies of the same class, some of
+ // which are marked as declarations and lack members, resulting
+ // in an empty struct UDT in the PDB. Then when we encounter
+ // the non-declaration copy we emit it again, but now we
+ // end up with multiple copies of the same UDT in the PDB
+ // and the debugger gets confused.
+ cvtype = addDWARFStructure(id, cursor);
+ }
break;
case DW_TAG_array_type:
- cvtype = addDWARFArray(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFArray(id, cursor);
break;
case DW_TAG_enumeration_type:
- cvtype = addDWARFEnum(id, cursor.getSubtreeCursor());
+ cvtype = addDWARFEnum(id, cursor);
break;
case DW_TAG_subroutine_type:
@@ -1542,7 +1889,17 @@ bool CV2PDB::createTypes() mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0);
}
- addDWARFProc(id, ranges, cursor.getSubtreeCursor());
+
+ // Only add the definition, not declaration, because
+ // MSVC toolset only produces a single symbol for
+ // each function and will get confused if there are
+ // 2 PDB symbols for the same routine.
+ //
+ // TODO: Add more type info to the routine. Today we
+ // expose it as "T_NOTYPE" when we could do better.
+ if (!id.isDecl) {
+ addDWARFProc(id, ranges, cursor);
+ }
}
}
break;
@@ -1649,18 +2006,42 @@ bool CV2PDB::createTypes() if (cvtype >= 0)
{
- assert(cvtype == typeID); typeID++;
- assert(mapOffsetToType[id.entryPtr] == cvtype);
+ assert(cvtype == typeID);
+ typeID++;
+
+ assert(mapEntryPtrToTypeID[id.entryPtr] == cvtype);
assert(typeID == nextUserType);
}
}
}
assert(typeID == nextUserType);
- assert(typeID == firstUserType + mapOffsetToType.size());
+ assert(typeID == firstUserType + mapEntryPtrToTypeID.size());
return true;
}
+void printIndent(int level) {
+ for (int i = 0; i < level; ++i) {
+ printf(" ");
+ }
+}
+
+void dumpTreeHelper(DWARF_InfoData* node, int level) {
+ for (DWARF_InfoData* n = node; n; n = n->next) {
+ const unsigned dieOffset = n->img->debug_info.sectOff(n->entryPtr);
+
+ printIndent(level);
+ printf("offset: %#x, name: \"%s\", tag: %#x, abbrev: %d\n", dieOffset, n->name, n->tag, n->code);
+
+ // Visit the children.
+ dumpTreeHelper(n->children, level + 1);
+ }
+}
+
+void CV2PDB::dumpDwarfTree() const {
+ dumpTreeHelper(dwarfHead, 0);
+}
+
bool CV2PDB::createDWARFModules()
{
if(!img.debug_info.isPresent())
@@ -1709,6 +2090,10 @@ bool CV2PDB::createDWARFModules() if (!createTypes())
return false;
+ if (debug & DbgPrintDwarfTree) {
+ dumpDwarfTree();
+ }
+
/*
if(!iterateDWARFDebugInfo(kOpMapTypes))
return false;
@@ -1758,12 +2143,36 @@ bool CV2PDB::addDWARFPublics() mspdb::Mod* mod = globalMod();
int type = 0;
- int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, 0x1000);
+ int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, BASE_USER_TYPE);
if (rc <= 0)
return setError("cannot add public");
return true;
}
+// Try to lookup a DWARF_InfoData in the constructed DWARF tree given its
+// "entryPtr". I.e. its memory-mapped location in the loaded PE image buffer.
+DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
+{
+ auto it = mapEntryPtrToEntry.find(entryPtr);
+ if (it == mapEntryPtrToEntry.end()) {
+ // Could not find decl for this definition.
+ return nullptr;
+ }
+ return it->second;
+}
+
+// Try to lookup a TypeID in the set of registered types by a
+// "typePtr". I.e. its memory-mapped location in the loaded PE image buffer.
+int CV2PDB::findTypeIdByPtr(byte* typePtr) const
+{
+ auto it = mapEntryPtrToTypeID.find(typePtr);
+ if (it == mapEntryPtrToTypeID.end()) {
+ // Could not find type for this definition.
+ return T_NOTYPE;
+ }
+ return it->second;
+}
+
bool CV2PDB::writeDWARFImage(const TCHAR* opath)
{
int len = sizeof(*rsds) + strlen((char*)(rsds + 1)) + 1;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index b77a1d0..e6f187c 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -1,14 +1,12 @@ #include "readDwarf.h"
#include <assert.h>
#include <array>
+#include <memory> // unique_ptr
#include "PEImage.h"
+#include "cv2pdb.h"
#include "dwarf.h"
#include "mspdb.h"
-extern "C" {
- #include "mscvpdb.h"
-}
-
// declare hasher for pair<T1,T2>
namespace std
@@ -34,6 +32,11 @@ void DIECursor::setContext(PEImage* img_, DebugLevel debug_) debug = debug_;
}
+// Read one compilation unit from `img`'s .debug_info section, starting at
+// offset `*off`, updating it in the process to the start of the next one in the
+// section.
+// Returns a pointer to the first DIE, skipping past the CU header, or NULL
+// on failure.
byte* DWARF_CompilationUnitInfo::read(DebugLevel debug, const PEImage& img, unsigned long *off)
{
byte* ptr = img.debug_info.byteAt(*off);
@@ -360,40 +363,15 @@ Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase, return stack[0];
}
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent)
-{
- DIECursor specCursor(parent, id.abstract_origin);
- DWARF_InfoData idspec;
- specCursor.readNext(idspec);
- // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
- // assert(id.tag == idspec.tag);
- if (idspec.abstract_origin)
- mergeAbstractOrigin(idspec, parent);
- if (idspec.specification)
- mergeSpecification(idspec, parent);
- id.merge(idspec);
-}
-
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent)
-{
- DIECursor specCursor(parent, id.specification);
- DWARF_InfoData idspec;
- specCursor.readNext(idspec);
- //assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
- //assert(id.tag == idspec.tag);
- if (idspec.abstract_origin)
- mergeAbstractOrigin(idspec, parent);
- if (idspec.specification)
- mergeSpecification(idspec, parent);
- id.merge(idspec);
-}
-
LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
: parent(parent)
{
+ // Default the base address to the compilation unit (DWARF v4 2.6.2)
base = parent.cu->base_address;
isLocLists = (parent.cu->version >= 5);
+ // DWARF v4 uses .debug_loc, DWARF v5 uses .debug_loclists with a different
+ // schema.
const PESection& sec = isLocLists ? parent.img->debug_loclists : parent.img->debug_loc;
ptr = sec.byteAt(off);
end = sec.endByte();
@@ -403,6 +381,8 @@ bool LOCCursor::readNext(LOCEntry& entry) {
if (isLocLists)
{
+ // DWARF v5 location list parsing.
+
if (parent.debug & DbgDwarfLocLists)
fprintf(stderr, "%s:%d: loclists off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__,
parent.img->debug_loclists.sectOff(ptr), parent.entryOff);
@@ -465,6 +445,8 @@ bool LOCCursor::readNext(LOCEntry& entry) }
else
{
+ // The logic here is goverened by DWARF4 section 2.6.2.
+
if (ptr >= end)
return false;
@@ -472,10 +454,28 @@ bool LOCCursor::readNext(LOCEntry& entry) fprintf(stderr, "%s:%d: loclist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__,
parent.img->debug_loc.sectOff(ptr), parent.entryOff);
+ // Extract the begin and end offset
+ // TODO: Why is this truncating to 32 bit?
entry.beg_offset = (unsigned long) parent.RDAddr(ptr);
entry.end_offset = (unsigned long) parent.RDAddr(ptr);
- if (!entry.beg_offset && !entry.end_offset)
+
+ // Check for a base-address-selection entry.
+ if (entry.beg_offset == -1U) {
+ // This is a base address selection entry and thus has no location
+ // description.
+ // Update the base address with this entry's value.
+ base = entry.end_offset;
+
+ // Continue the scan, but don't try to decode further since there
+ // are no location description records following this type of entry.
+ return true;
+ }
+
+ // Check for end-of-list entry. (Both offsets 0)
+ if (!entry.beg_offset && !entry.end_offset) {
+ // Terminate the scan.
return false;
+ }
DWARF_Attribute attr;
attr.type = Block;
@@ -579,7 +579,7 @@ DIECursor::DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr_) cu = cu_;
ptr = ptr_;
level = 0;
- hasChild = false;
+ prevHasChild = false;
sibling = 0;
}
@@ -589,40 +589,41 @@ DIECursor::DIECursor(const DIECursor& parent, byte* ptr_) ptr = ptr_;
}
+// Advance the cursor to the next sibling of the current node, using the fast
+// path when possible.
void DIECursor::gotoSibling()
{
if (sibling)
{
- // use sibling pointer, if available
+ // Fast path: use sibling pointer, if available.
ptr = sibling;
- hasChild = false;
+ prevHasChild = false;
}
- else if (hasChild)
+ else if (prevHasChild)
{
- int currLevel = level;
+ // Slow path. Skip over child nodes until we get back to the current
+ // level.
+ const int currLevel = level;
level = currLevel + 1;
- hasChild = false;
+ prevHasChild = false;
+ // Don't store these in the tree since this is just used for skipping over
+ // last swaths of nodes.
DWARF_InfoData dummy;
- // read untill we pop back to the level we were at
+
+ // read until we pop back to the level we were at
while (level > currLevel)
- readNext(dummy, true);
+ readNext(&dummy, true /* stopAtNull */);
}
}
-bool DIECursor::readSibling(DWARF_InfoData& id)
-{
- gotoSibling();
- return readNext(id, true);
-}
-
DIECursor DIECursor::getSubtreeCursor()
{
- if (hasChild)
+ if (prevHasChild)
{
DIECursor subtree = *this;
subtree.level = 0;
- subtree.hasChild = false;
+ subtree.prevHasChild = false;
return subtree;
}
else // Return invalid cursor
@@ -691,31 +692,80 @@ static byte* getPointerInSection(const PEImage &img, const SectionDescriptor &se return peSec.byteAt(offset);
}
-bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
+// Scan the next DIE from the current CU.
+// TODO: Allocate a new element each time.
+DWARF_InfoData* DIECursor::readNext(DWARF_InfoData* entry, bool stopAtNull)
{
- id.clear();
+ std::unique_ptr<DWARF_InfoData> node;
+
+ // Controls whether we should bother establishing links between nodes.
+ // If 'entry' is provided, we are just going to be using it instead
+ // of allocating our own nodes. The callers typically reuse the same
+ // node over and over in this case, so don't bother tracking the links.
+ // Furthermore, since we clear the input node in this case, we can't rely
+ // on it from call to call.
+ // TODO: Rethink how to more cleanly express the alloc vs reuse modes of
+ // operation.
+ bool establishLinks = false;
+
+ // If an entry was passed in, use it. Else allocate one.
+ if (!entry) {
+ establishLinks = true;
+ node = std::make_unique<DWARF_InfoData>();
+ entry = node.get();
+ } else {
+ // If an entry was provided, make sure we clear it.
+ entry->clear();
+ }
- if (hasChild)
+ entry->img = img;
+
+ if (prevHasChild) {
+ // Prior element had a child, thus this element is its first child.
++level;
+ if (establishLinks) {
+ // Establish the first child.
+ prevParent->children = entry;
+ }
+ }
+
+ // Set up a convenience alias.
+ DWARF_InfoData& id = *entry;
+
+ // Find the first valid DIE.
for (;;)
{
if (level == -1)
- return false; // we were already at the end of the subtree
+ return nullptr; // we were already at the end of the subtree
if (ptr >= cu->end_ptr)
- return false; // root of the tree does not have a null terminator, but we know the length
+ return nullptr; // root of the tree does not have a null terminator, but we know the length
id.entryPtr = ptr;
- entryOff = img->debug_info.sectOff(ptr);
+ entryOff = id.entryOff = img->debug_info.sectOff(ptr);
id.code = LEB128(ptr);
+
+ // If the previously scanned node claimed to have a child, this must be a valid DIE.
+ assert(!prevHasChild || id.code);
+
+ // Check if we need to terminate the sibling chain.
if (id.code == 0)
{
- --level; // pop up one level
+ // Done with this level.
+ if (establishLinks) {
+ // Continue linking siblings from the parent node.
+ prevNode = prevParent;
+
+ // Unwind the parent one level up.
+ prevParent = prevParent->parent;
+ }
+
+ --level;
if (stopAtNull)
{
- hasChild = false;
- return false;
+ prevHasChild = false;
+ return nullptr;
}
continue; // read the next DIE
}
@@ -728,17 +778,42 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__,
id.code, entryOff);
assert(abbrev);
- return false;
+ return nullptr;
}
id.abbrev = abbrev;
id.tag = LEB128(abbrev);
id.hasChild = *abbrev++;
+ if (establishLinks) {
+ // If there was a previous node, link it to this one, thus continuing the chain.
+ if (prevNode) {
+ prevNode->next = entry;
+ }
+
+ // Establish parent of current node. If 'prevParent' is NULL, that is fine.
+ // It just means this node is a top-level node.
+ entry->parent = prevParent;
+
+ if (id.hasChild) {
+ // This node has children! Establish it as the new parent for future nodes.
+ prevParent = entry;
+
+ // Clear the last DIE because the next scanned node will form the *start*
+ // of a new linked list comprising the children of the current node.
+ prevNode = nullptr;
+ }
+ else {
+ // Ensure the next node appends itself to this one.
+ prevNode = entry;
+ }
+ }
+
if (debug & DbgDwarfAttrRead)
fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__,
entryOff, level, id.tag, id.code);
+ // Read all the attribute data for this DIE.
int attr, form;
for (;;)
{
@@ -804,7 +879,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = RDref(ptr); break;
case DW_FORM_loclistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_loclists, cu->loclist_base); break;
case DW_FORM_rnglistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_rnglists, cu->rnglist_base); break;
- default: assert(false && "Unsupported DWARF attribute form"); return false;
+ default: assert(false && "Unsupported DWARF attribute form"); return nullptr;
}
switch (attr)
@@ -847,6 +922,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_AT_type: assert(a.type == Ref); id.type = a.ref; break;
case DW_AT_inline: assert(a.type == Const); id.inlined = a.cons; break;
case DW_AT_external: assert(a.type == Flag); id.external = a.flag; break;
+ case DW_AT_declaration: assert(a.type == Flag); id.isDecl = a.flag; break;
case DW_AT_upper_bound:
assert(a.type == Const || a.type == Ref || a.type == ExprLoc || a.type == Block);
if (a.type == Const) // TODO: other types not supported yet
@@ -907,10 +983,12 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) }
}
- hasChild = id.hasChild != 0;
+ prevHasChild = id.hasChild != 0;
sibling = id.sibling;
- return true;
+ // Transfer ownership of 'node' to caller, if we allocated one.
+ node.release();
+ return entry;
}
byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode)
diff --git a/src/readDwarf.h b/src/readDwarf.h index 5e1db99..b4f2f85 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -11,6 +11,7 @@ typedef unsigned char byte;
class PEImage;
class DIECursor;
+class CV2PDB;
struct SectionDescriptor;
enum DebugLevel : unsigned {
@@ -24,7 +25,8 @@ enum DebugLevel : unsigned { DbgDwarfAttrRead = 0x400,
DbgDwarfLocLists = 0x800,
DbgDwarfRangeLists = 0x1000,
- DbgDwarfLines = 0x2000
+ DbgDwarfLines = 0x2000,
+ DbgPrintDwarfTree = 0x4000,
};
DEFINE_ENUM_FLAG_OPERATORS(DebugLevel);
@@ -180,30 +182,64 @@ struct DWARF_FileName }
};
+// In-memory representation of a DIE (Debugging Info Entry).
struct DWARF_InfoData
{
+ // The PEImage for this entry.
+ PEImage* img = nullptr;
+
+ // Pointer into the memory-mapped image section where this DIE is located.
byte* entryPtr;
+
+ unsigned int entryOff = 0; // the entry offset in the section it is in.
+
+ // Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
+ // the end of a sibling chain.
int code;
+
+ // Pointer to the abbreviation table entry that corresponds to this DIE.
byte* abbrev;
int tag;
+
+ // Does this DIE have children?
int hasChild;
+ // Parent of this DIE, or NULL if top-level element.
+ DWARF_InfoData* parent = nullptr;
+
+ // Pointer to sibling in the tree. Not to be confused with 'sibling' below,
+ // which is a raw pointer to the DIE in the mapped/loaded image section.
+ // NULL if no more elements.
+ DWARF_InfoData* next = nullptr;
+
+ // Pointer to first child. This forms a linked list with the 'next' pointer.
+ // NULL if no children.
+ DWARF_InfoData* children = nullptr;
+
const char* name;
const char* linkage_name;
const char* dir;
unsigned long byte_size;
+
+ // Pointer to the sibling DIE in the mapped image.
byte* sibling;
unsigned long encoding;
unsigned long pclo;
unsigned long pchi;
unsigned long ranges; // -1u when attribute is not present
unsigned long pcentry;
+
+ // Pointer to the DW_AT_type DIE describing the type of this DIE.
byte* type;
byte* containing_type;
+
+ // Pointer to the DIE representing the declaration for this element if it
+ // is a definition. E.g. function decl for its definition/body.
byte* specification;
byte* abstract_origin;
unsigned long inlined;
- bool external;
+ bool external = false; // is this subroutine visible outside its compilation unit?
+ bool isDecl = false; // is this a declaration?
DWARF_Attribute location;
DWARF_Attribute member_location;
DWARF_Attribute frame_base;
@@ -223,6 +259,7 @@ struct DWARF_InfoData abbrev = 0;
tag = 0;
hasChild = 0;
+ parent = nullptr;
name = 0;
linkage_name = 0;
@@ -239,7 +276,8 @@ struct DWARF_InfoData specification = 0;
abstract_origin = 0;
inlined = 0;
- external = 0;
+ external = false;
+ isDecl = false;
member_location.type = Invalid;
location.type = Invalid;
frame_base.type = Invalid;
@@ -432,9 +470,15 @@ struct Location class LOCEntry
{
public:
- unsigned long beg_offset;
- unsigned long end_offset;
+ // TODO: investigate making these 64bit (or vary). Also consider renaming
+ // to Value0 and Value1 since their meanings varies depending on entry type.
+ unsigned long beg_offset; // or -1U for base address selection entries
+ unsigned long end_offset; // or the base address in base address selection entries
+
+ // DWARF v5 only. See DW_LLE_default_location.
bool isDefault;
+
+ // Location description.
Location loc;
void addBase(uint32_t base)
@@ -444,16 +488,24 @@ public: }
};
-// Location list cursor
+// Location list cursor (see DWARF v4 and v5 Section 2.6).
class LOCCursor
{
public:
LOCCursor(const DIECursor& parent, unsigned long off);
const DIECursor& parent;
+
+ // The base address for subsequent loc list entries read in a given list.
+ // Default to the CU base in the absense of any base address selection entries.
+ //
+ // TODO: So far we only assign to this but never actually use it.
uint32_t base;
+
byte* end;
byte* ptr;
+
+ // Is this image using the new debug_loclists section in DWARF v5?
bool isLocLists;
bool readNext(LOCEntry& entry);
@@ -495,19 +547,28 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t; // as either an absolute value, a register, or a register-relative address.
Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent);
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent);
-
// Debug Information Entry Cursor
class DIECursor
{
+ // TODO: make these private.
public:
- DWARF_CompilationUnitInfo* cu;
- byte* ptr;
+ DWARF_CompilationUnitInfo* cu = nullptr; // the CU we are reading from.
+ byte* ptr = nullptr; // the current mapped location we are reading from.
unsigned int entryOff;
- int level;
- bool hasChild; // indicates whether the last read DIE has children
- byte* sibling;
+ int level; // the current level of the tree in the scan.
+ bool prevHasChild = false; // indicates whether the last read DIE has children
+
+ // last DIE scanned. Used to link subsequent nodes in a list.
+ DWARF_InfoData* prevNode = nullptr;
+
+ // The last parent node to which all subsequent nodes should be assigned.
+ // Initially, NULL, but as we encounter a node with children, we establish
+ // it as the new "parent" for future nodes, and reset it once we reach
+ // a top level node.
+ DWARF_InfoData* prevParent = nullptr;
+
+ // The mapped address of the sibling of the last scanned node, if any.
+ byte* sibling = nullptr;
static PEImage *img;
static abbrevMap_t abbrevMap;
@@ -528,17 +589,13 @@ public: // Goto next sibling DIE. If the last read DIE had any children, they will be skipped over.
void gotoSibling();
- // Reads next sibling DIE. If the last read DIE had any children, they will be skipped over.
- // Returns 'false' upon reaching the last sibling on the current level.
- bool readSibling(DWARF_InfoData& id);
-
// Returns cursor that will enumerate children of the last read DIE.
DIECursor getSubtreeCursor();
- // Reads the next DIE in physical order, returns 'true' if succeeds.
+ // Reads the next DIE in physical order, returns non-NULL if succeeds.
// If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level).
// Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created.
- bool readNext(DWARF_InfoData& id, bool stopAtNull = false);
+ DWARF_InfoData* readNext(DWARF_InfoData* entry, bool stopAtNull = false);
// Read an address from p according to the ambient pointer size.
uint64_t RDAddr(byte* &p) const
|