From 28d3fad41817310b8fc2fd6d9e46137249d65fad Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Mon, 29 Nov 2021 12:44:16 -0800 Subject: Refactor PE image section handling. Add debug logging. Encapsulate dwarf-related PE sections into the PESection class. Remove some unused sections. Add helpers for common section operations. Move the context set via DIECursor::setContext to be static members of the class and add a debug context there. Add a standard method of enabling debug logging across the dwarf and PDB code. --- src/PEImage.cpp | 67 +++++++++++---------------------- src/PEImage.h | 108 +++++++++++++++++++++++++++++++++++++++++++++-------- src/cv2pdb.cpp | 20 ++++++++-- src/cv2pdb.h | 4 +- src/dwarf2pdb.cpp | 78 ++++++++++++++++++++++---------------- src/dwarflines.cpp | 50 +++++++++++-------------- src/main.cpp | 20 +++++++--- src/readDwarf.cpp | 85 +++++++++++++++++++++++------------------ src/readDwarf.h | 30 +++++++++++++-- 9 files changed, 291 insertions(+), 171 deletions(-) diff --git a/src/PEImage.cpp b/src/PEImage.cpp index 99c6776..247d514 100644 --- a/src/PEImage.cpp +++ b/src/PEImage.cpp @@ -35,19 +35,6 @@ PEImage::PEImage(const TCHAR* iname) , hdr32(0) , hdr64(0) , fd(-1) -, debug_aranges(0) -, debug_pubnames(0) -, debug_pubtypes(0) -, debug_info(0), debug_info_length(0) -, debug_abbrev(0), debug_abbrev_length(0) -, debug_line(0), debug_line_length(0) -, debug_frame(0), debug_frame_length(0) -, debug_str(0) -, debug_loc(0), debug_loc_length(0) -, debug_ranges(0), debug_ranges_length(0) -, codeSegment(0) -, linesSegment(-1) -, reloc(0), reloc_length(0) , nsec(0) , nsym(0) , symtable(0) @@ -470,6 +457,15 @@ static DWORD sizeInImage(const IMAGE_SECTION_HEADER& sec) return sec.SizeOfRawData < sec.Misc.VirtualSize ? sec.SizeOfRawData : sec.Misc.VirtualSize; } +void PEImage::initSec(PESection& peSec, int secNo) const +{ + auto &imgSec = sec[secNo]; + + peSec.length = sizeInImage(imgSec); + peSec.base = DPV(imgSec.PointerToRawData, peSec.length); + peSec.secNo = secNo; +} + void PEImage::initDWARFSegments() { for(int s = 0; s < nsec; s++) @@ -480,49 +476,30 @@ void PEImage::initDWARFSegments() int off = strtol(name + 1, 0, 10); name = strtable + off; } - if(strcmp(name, ".debug_aranges") == 0) - debug_aranges = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_pubnames") == 0) - debug_pubnames = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_pubtypes") == 0) - debug_pubtypes = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_info") == 0) - debug_info = DPV(sec[s].PointerToRawData, debug_info_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_abbrev") == 0) - debug_abbrev = DPV(sec[s].PointerToRawData, debug_abbrev_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_line") == 0) - debug_line = DPV(sec[linesSegment = s].PointerToRawData, debug_line_length = sizeInImage(sec[s])); - if (strcmp(name, ".debug_line_str") == 0) - debug_line_str = DPV(sec[s].PointerToRawData, debug_line_str_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_frame") == 0) - debug_frame = DPV(sec[s].PointerToRawData, debug_frame_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_str") == 0) - debug_str = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_loc") == 0) - debug_loc = DPV(sec[s].PointerToRawData, debug_loc_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_ranges") == 0) - debug_ranges = DPV(sec[s].PointerToRawData, debug_ranges_length = sizeInImage(sec[s])); - if(strcmp(name, ".reloc") == 0) - reloc = DPV(sec[s].PointerToRawData, reloc_length = sizeInImage(sec[s])); - if(strcmp(name, ".text") == 0) - codeSegment = s; + + for (const SectionDescriptor *sec_desc : sec_descriptors) { + if (!strcmp(name, sec_desc->name)) { + PESection& peSec = this->*(sec_desc->pSec); + initSec(peSec, s); + } + } } } bool PEImage::relocateDebugLineInfo(unsigned int img_base) { - if(!reloc || !reloc_length) + if(!reloc.isPresent()) return true; - char* relocbase = reloc; - char* relocend = reloc + reloc_length; + byte* relocbase = reloc.startByte(); + byte* relocend = reloc.endByte(); while(relocbase < relocend) { unsigned int virtadr = *(unsigned int *) relocbase; unsigned int chksize = *(unsigned int *) (relocbase + 4); char* p = RVA (virtadr, 1); - if(p >= debug_line && p < debug_line + debug_line_length) + if(debug_line.isPtrInside(p)) { for (unsigned int w = 8; w < chksize; w += 2) { @@ -536,7 +513,7 @@ bool PEImage::relocateDebugLineInfo(unsigned int img_base) } } } - if(chksize == 0 || chksize >= reloc_length) + if(chksize == 0 || chksize >= reloc.length) break; relocbase += chksize; } @@ -545,7 +522,7 @@ bool PEImage::relocateDebugLineInfo(unsigned int img_base) int PEImage::getRelocationInLineSegment(unsigned int offset) const { - return getRelocationInSegment(linesSegment, offset); + return getRelocationInSegment(debug_line.secNo, offset); } int PEImage::getRelocationInSegment(int segment, unsigned int offset) const diff --git a/src/PEImage.h b/src/PEImage.h index dd170bf..a3e3cd7 100644 --- a/src/PEImage.h +++ b/src/PEImage.h @@ -15,6 +15,8 @@ struct OMFDirHeader; struct OMFDirEntry; +typedef unsigned char byte; + struct SymbolInfo { int seg; @@ -22,6 +24,72 @@ struct SymbolInfo bool dllimport; }; +struct PESection +{ + byte* base; + unsigned long length; + unsigned int secNo; + + PESection() + : base(0) + , length(0) + , secNo(0) + { + } + + byte* byteAt(unsigned int off) const + { + return base + off; + } + + byte* startByte() const + { + return byteAt(0); + } + + byte* endByte() const + { + return byteAt(0) + length; + } + + bool isPresent() const + { + return base && length; + } + + bool isPtrInside(const void *p) const + { + auto pInt = (uintptr_t)p; + return (pInt >= (uintptr_t)base && pInt < (uintptr_t)base + length); + } + + unsigned int sectOff(void *p) const + { + return (unsigned int)((uintptr_t)p - (uintptr_t)base); + } +}; + +// Define the list of interesting PE sections in one place so that we can +// generate definitions needed to populate our pointers and reference each +// section. + +#define SECTION_LIST() \ + EXPANDSEC(debug_addr) \ + EXPANDSEC(debug_info) \ + EXPANDSEC(debug_abbrev) \ + EXPANDSEC(debug_line) \ + EXPANDSEC(debug_line_str) \ + EXPANDSEC(debug_frame) \ + EXPANDSEC(debug_str) \ + EXPANDSEC(debug_str_offsets) \ + EXPANDSEC(debug_loc) \ + EXPANDSEC(debug_loclists) \ + EXPANDSEC(debug_ranges) \ + EXPANDSEC(debug_rnglists) \ + EXPANDSEC(reloc) \ + EXPANDSEC(text) + + #define IMGHDR(x) (hdr32 ? hdr32->x : hdr64->x) class PEImage : public LastError @@ -70,6 +138,7 @@ public: bool save(const TCHAR* oname); bool replaceDebugSection (const void* data, int datalen, bool initCV); + void initSec(PESection& peSec, int secNo) const; bool initCVPtr(bool initDbgDir); bool initDbgPtr(bool initDbgDir); bool initDWARFPtr(bool initDbgDir); @@ -77,7 +146,7 @@ public: void initDWARFSegments(); bool relocateDebugLineInfo(unsigned int img_base); - bool hasDWARF() const { return debug_line != 0; } + bool hasDWARF() const { return debug_line.isPresent(); } bool isX64() const { return x64; } bool isDBG() const { return dbgfile; } @@ -131,23 +200,30 @@ private: public: //dwarf - char* debug_aranges; - char* debug_pubnames; - char* debug_pubtypes; - char* debug_info; unsigned long debug_info_length; - char* debug_abbrev; unsigned long debug_abbrev_length; - char* debug_line; unsigned long debug_line_length; - char* debug_line_str; unsigned long debug_line_str_length; - char* debug_frame; unsigned long debug_frame_length; - char* debug_str; - char* debug_loc; unsigned long debug_loc_length; - char* debug_ranges; unsigned long debug_ranges_length; - char* reloc; unsigned long reloc_length; - - int linesSegment; - int codeSegment; +#define EXPANDSEC(name) PESection name; + SECTION_LIST() +#undef EXPANDSEC + int cv_base; }; +struct SectionDescriptor { + const char *name; + PESection PEImage::* pSec; +}; + +#define EXPANDSEC(name) constexpr SectionDescriptor sec_desc_##name { "." #name, &PEImage::name }; +SECTION_LIST() +#undef EXPANDSEC + +constexpr const SectionDescriptor *sec_descriptors[] = +{ +#define EXPANDSEC(name) &sec_desc_##name, + SECTION_LIST() +#undef EXPANDSEC +}; + + +#undef SECTION_LIST #endif //__PEIMAGE_H__ diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp index bb4727b..696cb74 100644 --- a/src/cv2pdb.cpp +++ b/src/cv2pdb.cpp @@ -17,7 +17,7 @@ static const int typePrefix = 4; -CV2PDB::CV2PDB(PEImage& image) +CV2PDB::CV2PDB(PEImage& image, DebugLevel debug_) : img(image), pdb(0), dbi(0), tpi(0), ipi(0), libraries(0), rsds(0), rsdsLen(0), modules(0), globmod(0) , segMap(0), segMapDesc(0), segFrame2Index(0), globalTypeHeader(0) , globalTypes(0), cbGlobalTypes(0), allocGlobalTypes(0) @@ -28,7 +28,7 @@ CV2PDB::CV2PDB(PEImage& image) , srcLineStart(0), srcLineSections(0) , pointerTypes(0) , Dversion(2) -, debug(false) +, debug(debug_) , classEnumType(0), ifaceEnumType(0), cppIfaceEnumType(0), structEnumType(0) , classBaseType(0), ifaceBaseType(0), cppIfaceBaseType(0), structBaseType(0) , emptyFieldListType(0) @@ -149,7 +149,7 @@ bool CV2PDB::openPDB(const TCHAR* pdbname, const TCHAR* pdbref) if (!initMsPdb ()) return setError("cannot load PDB helper DLL"); - if (debug) + if (debug & DbgBasic) { extern HMODULE modMsPdb; char modpath[260]; @@ -737,6 +737,9 @@ int CV2PDB::countNestedTypes(const codeview_reftype* fieldlist, int type) int CV2PDB::addAggregate(codeview_type* dtype, bool clss, int n_element, int fieldlist, int property, int derived, int vshape, int structlen, const char* name, const char* uniquename) { + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding aggregate %s -> fieldlist:%d\n", __FUNCTION__, __LINE__, name, fieldlist); + dtype->struct_v2.id = clss ? (v3 ? LF_CLASS_V3 : LF_CLASS_V2) : (v3 ? LF_STRUCTURE_V3 : LF_STRUCTURE_V2); dtype->struct_v2.n_element = n_element; dtype->struct_v2.fieldlist = fieldlist; @@ -771,6 +774,9 @@ int CV2PDB::addStruct(codeview_type* dtype, int n_element, int fieldlist, int pr int CV2PDB::addEnum(codeview_type* dtype, int count, int fieldlist, int property, int type, const char*name) { + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding enum %s -> fieldlist:%d\n", __FUNCTION__, __LINE__, name, fieldlist); + dtype->enumeration_v2.id = (v3 ? LF_ENUM_V3 : LF_ENUM_V2); dtype->enumeration_v2.count = count; dtype->enumeration_v2.fieldlist = fieldlist; @@ -2074,6 +2080,9 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation) if(type == 0x78) basetype = 0x75; // dchar type not understood by debugger, use uint instead + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding typedef %s -> %d\n", __FUNCTION__, __LINE__, name, type); + int typedefType; if(useTypedefEnum) { @@ -2981,6 +2990,9 @@ bool CV2PDB::addPublics() char symname[kMaxNameLen]; dsym2c((BYTE*)sym->data_v1.p_name.name, sym->data_v1.p_name.namelen, symname, sizeof(symname)); int type = translateType(sym->data_v1.symtype); + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: AddPublic2 %s\n", __FUNCTION__, __LINE__, (const char *)symname); + if (mod) rc = mod->AddPublic2(symname, sym->data_v1.segment, sym->data_v1.offset, type); else @@ -2997,6 +3009,8 @@ bool CV2PDB::addPublics() bool CV2PDB::initGlobalSymbols() { + if (debug & DbgBasic) + fprintf(stderr, "%s:%d, countEntries: %d\n", __FUNCTION__, __LINE__, (int)countEntries); for (int m = 0; m < countEntries; m++) { OMFDirEntry* entry = img.getCVEntry(m); diff --git a/src/cv2pdb.h b/src/cv2pdb.h index ae72ca1..26b2143 100644 --- a/src/cv2pdb.h +++ b/src/cv2pdb.h @@ -30,7 +30,7 @@ class CFIIndex; class CV2PDB : public LastError { public: - CV2PDB(PEImage& image); + CV2PDB(PEImage& image, DebugLevel debug); ~CV2PDB(); bool cleanup(bool commit); @@ -265,7 +265,7 @@ public: bool useGlobalMod; bool thisIsNotRef; bool v3; - bool debug; + DebugLevel debug; const char* lastError; int srcLineSections; diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index bc030e6..f58067d 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -230,8 +230,8 @@ class CFICursor { public: CFICursor(const PEImage& img) - : beg((byte*)img.debug_frame) - , end((byte*)img.debug_frame + img.debug_frame_length) + : beg(img.debug_frame.startByte()) + , end(img.debug_frame.endByte()) , ptr(beg) { default_address_size = img.isX64() ? 8 : 4; @@ -489,7 +489,7 @@ Location findBestCFA(const PEImage& img, const CFIIndex* index, unsigned int pcl { bool x64 = img.isX64(); Location ebp = { Location::RegRel, x64 ? 6 : 5, x64 ? 16 : 8 }; - if (!img.debug_frame) + if (!img.debug_frame.isPresent()) return ebp; byte *fde_ptr = index->lookup(pclo, pchi); @@ -515,7 +515,6 @@ Location findBestCFA(const PEImage& img, const CFIIndex* index, unsigned int pcl class LOCEntry { public: - byte* ptr; unsigned long beg_offset; unsigned long end_offset; Location loc; @@ -529,8 +528,8 @@ class LOCCursor public: LOCCursor(const PEImage& image, unsigned long off) : img (image) - , end((byte*)img.debug_loc + img.debug_loc_length) - , ptr((byte*)img.debug_loc + off) + , end(img.debug_loc.endByte()) + , ptr(img.debug_loc.byteAt(off)) { default_address_size = img.isX64() ? 8 : 4; } @@ -682,7 +681,7 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo) dsym->block_v3.end = 0; // destSize + sizeof(dsym->block_v3) + 12; dsym->block_v3.length = id.pchi - id.pclo; dsym->block_v3.offset = id.pclo - codeSegOff; - dsym->block_v3.segment = img.codeSegment + 1; + dsym->block_v3.segment = img.text.secNo + 1; dsym->block_v3.name[0] = 0; int len = sizeof(dsym->block_v3); for (; len & 3; len++) @@ -701,6 +700,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE checkUdtSymbolAlloc(100 + kMaxNameLen); + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: Adding a proc: %s at %x\n", __FUNCTION__, __LINE__, procid.name, pclo); + // GLOBALPROC codeview_symbol*cvs = (codeview_symbol*) (udtSymbols + cbUdtSymbols); cvs->proc_v2.id = v3 ? S_GPROC_V3 : S_GPROC_V2; @@ -711,7 +713,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE cvs->proc_v2.debug_start = pclo - pclo; cvs->proc_v2.debug_end = pchi - pclo; cvs->proc_v2.offset = pclo; - cvs->proc_v2.segment = img.codeSegment + 1; + cvs->proc_v2.segment = img.text.secNo + 1; cvs->proc_v2.proctype = 0; // translateType(sym->proc_v1.proctype); cvs->proc_v2.flags = 0; @@ -793,8 +795,8 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE id.pchi = 0; // TODO: handle base address selection - byte *r = (byte *)img.debug_ranges + id.ranges; - byte *rend = (byte *)img.debug_ranges + img.debug_ranges_length; + byte *r = img.debug_ranges.byteAt(id.ranges); + byte *rend = img.debug_ranges.endByte(); while (r < rend) { uint64_t pclo, pchi; @@ -1122,7 +1124,7 @@ bool CV2PDB::addDWARFTypes() checkUdtSymbolAlloc(100); int prefix = 4; - DWORD* ddata = new DWORD [img.debug_info_length/4]; // large enough + DWORD* ddata = new DWORD [img.debug_info.length/4]; // large enough unsigned char *data = (unsigned char*) (ddata + prefix); unsigned int off = 0; unsigned int len; @@ -1131,7 +1133,7 @@ bool CV2PDB::addDWARFTypes() // SSEARCH codeview_symbol* cvs = (codeview_symbol*) (data + off); cvs->ssearch_v1.id = S_SSEARCH_V1; - cvs->ssearch_v1.segment = img.codeSegment + 1; + cvs->ssearch_v1.segment = img.text.secNo + 1; cvs->ssearch_v1.offset = 0; len = sizeof(cvs->ssearch_v1); for (; len & (align-1); len++) @@ -1350,9 +1352,9 @@ bool CV2PDB::mapTypes() { int typeID = nextUserType; unsigned long off = 0; - while (off < img.debug_info_length) + while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)(img.debug_info + off); + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); DWARF_InfoData id; @@ -1396,6 +1398,9 @@ bool CV2PDB::mapTypes() off += sizeof(cu->unit_length) + cu->unit_length; } + if (debug & DbgBasic) + fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size()); + nextDwarfType = typeID; return true; } @@ -1407,17 +1412,21 @@ bool CV2PDB::createTypes() int typeID = nextUserType; int pointerAttr = img.isX64() ? 0x1000C : 0x800A; + if (debug & DbgBasic) + fprintf(stderr, "%s:%d: createTypes()\n", __FUNCTION__, __LINE__); + unsigned long off = 0; - while (off < img.debug_info_length) + while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)(img.debug_info + off); + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); DWARF_InfoData id; while (cursor.readNext(id)) { - //printf("0x%08x, level = %d, id.code = %d, id.tag = %d\n", - // (unsigned char*)cu + id.entryOff - (unsigned char*)img.debug_info, cursor.level, id.code, id.tag); + if (debug & DbgDwarfTagRead) + fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__, + cursor.entryOff, cursor.level, id.code, id.tag); if (id.abstract_origin) mergeAbstractOrigin(id, cu); @@ -1497,8 +1506,8 @@ bool CV2PDB::createTypes() else if (id.ranges != ~0) { entry_point = ~0; - byte* r = (byte*)img.debug_ranges + id.ranges; - byte* rend = (byte*)img.debug_ranges + img.debug_ranges_length; + byte* r = (byte*)img.debug_ranges.byteAt(id.ranges); + byte* rend = (byte*)img.debug_ranges.endByte(); while (r < rend) { uint64_t pclo, pchi; @@ -1524,7 +1533,12 @@ bool CV2PDB::createTypes() } if (entry_point) - mod->AddPublic2(id.name, img.codeSegment + 1, entry_point - codeSegOff, 0); + { + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: Adding a public: %s at %x\n", __FUNCTION__, __LINE__, id.name, entry_point); + + mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0); + } } if (id.pclo && id.pchi) @@ -1555,10 +1569,10 @@ bool CV2PDB::createTypes() #if !FULL_CONTRIB if (id.dir && id.name) { - if (id.ranges > 0 && id.ranges < img.debug_ranges_length) + if (id.ranges > 0 && id.ranges < img.debug_ranges.length) { - unsigned char* r = (unsigned char*)img.debug_ranges + id.ranges; - unsigned char* rend = (unsigned char*)img.debug_ranges + img.debug_ranges_length; + unsigned char* r = img.debug_ranges.byteAt(id.ranges); + unsigned char* rend = img.debug_ranges.endByte(); while (r < rend) { unsigned long pclo = RD4(r); @@ -1644,10 +1658,10 @@ bool CV2PDB::createTypes() bool CV2PDB::createDWARFModules() { - if(!img.debug_info) + if(!img.debug_info.isPresent()) return setError("no .debug_info section found"); - codeSegOff = img.getImageBase() + img.getSection(img.codeSegment).VirtualAddress; + codeSegOff = img.getImageBase() + img.getSection(img.text.secNo).VirtualAddress; mspdb::Mod* mod = globalMod(); for (int s = 0; s < img.countSections(); s++) @@ -1662,7 +1676,7 @@ bool CV2PDB::createDWARFModules() #if FULL_CONTRIB // we use a single global module, so we can simply add the whole text segment int segFlags = 0x60101020; // 0x40401040, 0x60500020; // TODO - int s = img.codeSegment; + int s = img.text.secNo; int pclo = 0; // img.getImageBase() + img.getSection(s).VirtualAddress; int pchi = pclo + img.getSection(s).Misc.VirtualSize; int rc = mod->AddSecContrib(s + 1, pclo, pchi - pclo, segFlags); @@ -1682,7 +1696,7 @@ bool CV2PDB::createDWARFModules() appendComplex(0x52, 0x42, 12, "creal"); } - DIECursor::setContext(&img); + DIECursor::setContext(&img, debug); countEntries = 0; if (!mapTypes()) @@ -1725,10 +1739,10 @@ bool CV2PDB::createDWARFModules() bool CV2PDB::addDWARFLines() { - if(!img.debug_line) + if(!img.debug_line.isPresent()) return setError("no .debug_line section found"); - if (!interpretDWARFLines(img, globalMod())) + if (!interpretDWARFLines(img, globalMod(), debug)) return setError("cannot add line number info to module"); return true; @@ -1739,7 +1753,7 @@ bool CV2PDB::addDWARFPublics() mspdb::Mod* mod = globalMod(); int type = 0; - int rc = mod->AddPublic2("public_all", img.codeSegment + 1, 0, 0x1000); + int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, 0x1000); if (rc <= 0) return setError("cannot add public"); return true; @@ -1759,7 +1773,7 @@ bool CV2PDB::writeDWARFImage(const TCHAR* opath) void CV2PDB::build_cfi_index() { - if (img.debug_frame == NULL) + if (!img.debug_frame.isPresent()) return; cfi_index = new CFIIndex(img); } diff --git a/src/dwarflines.cpp b/src/dwarflines.cpp index b061f79..2e0570d 100644 --- a/src/dwarflines.cpp +++ b/src/dwarflines.cpp @@ -10,6 +10,8 @@ #include "dwarf.h" #include "readDwarf.h" +static DebugLevel debug; + bool isRelativePath(const std::string& s) { if(s.length() < 1) @@ -57,12 +59,8 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat // throw away invalid lines (mostly due to "set address to 0") state.lineInfo.resize(0); return true; - //return false; } -// if(saddr >= 0x4000) -// return true; - const DWARF_FileName* dfn; if(state.lineInfo_file == 0) dfn = state.file_ptr; @@ -92,14 +90,8 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat return true; } #if 1 - bool dump = false; // (fname == "cvtest.d"); //qsort(&state.lineInfo[0], state.lineInfo.size(), sizeof(state.lineInfo[0]), cmpAdr); -#if 0 - printf("%s:\n", fname.c_str()); - for(size_t ln = 0; ln < state.lineInfo.size(); ln++) - printf(" %08x: %4d\n", state.lineInfo[ln].offset + 0x401000, state.lineInfo[ln].line); -#endif - + int rc = 1; unsigned int low_offset = state.lineInfo[0].offset; unsigned short low_line = state.lineInfo[0].line; @@ -120,9 +112,11 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat // This subtraction can underflow to (unsigned)-1 if this info is only for a single instruction, but AddLines will immediately increment it to 0, so this is fine. Not underflowing this can cause the debugger to ignore other line info for address ranges that include this address. unsigned int address_range_length = high_offset - low_offset; - if (dump) - printf("AddLines(%08x+%04x, Line=%4d+%3d, %s)\n", low_offset, address_range_length, low_line, - state.lineInfo.size(), fname.c_str()); + if (debug & DbgPdbLines) + fprintf(stderr, "%s:%d: AddLines(%08x+%04x, Line=%4d+%3d, %s)\n", __FUNCTION__, __LINE__, + low_offset, address_range_length, low_line, + (unsigned int)state.lineInfo.size(), fname.c_str()); + rc = mod->AddLines(fname.c_str(), segIndex + 1, low_offset, address_range_length, low_offset, low_line, (unsigned char*)&state.lineInfo[0], state.lineInfo.size() * sizeof(state.lineInfo[0])); @@ -147,10 +141,6 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) if (state.end_sequence) return _flushDWARFLines(img, mod, state); -#if 0 - const char* fname = (state.file == 0 ? state.file_ptr->file_name : state.files[state.file - 1].file_name); - printf("Adr:%08x Line: %5d File: %s\n", state.address, state.line, fname); -#endif if (state.address < state.seg_offset) return true; mspdb::LineInfoEntry entry; @@ -177,15 +167,17 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) return true; } -bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) +bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info; + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.startByte(); int ptrsize = cu ? cu->address_size : 4; + debug = debug_; + DWARF_LineNumberProgramHeader hdr5; - for(unsigned long off = 0; off < img.debug_line_length; ) + for(unsigned long off = 0; off < img.debug_line.length; ) { - DWARF_LineNumberProgramHeader* hdrver = (DWARF_LineNumberProgramHeader*) (img.debug_line + off); + DWARF_LineNumberProgramHeader* hdrver = (DWARF_LineNumberProgramHeader*)img.debug_line.byteAt(off); int length = hdrver->unit_length; if(length < 0) break; @@ -226,6 +218,10 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) unsigned char* p = (unsigned char*) hdrver + hdrlength; unsigned char* end = (unsigned char*) hdrver + length; + if (debug & DbgDwarfLines) + fprintf(stderr, "%s:%d: LineNumberProgramHeader offs=%x ver=%d\n", __FUNCTION__, __LINE__, + off, hdr->version); + std::vector opcode_lengths; opcode_lengths.resize(hdr->opcode_base); if (hdr->opcode_base > 0) @@ -236,7 +232,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) } DWARF_LineState state; - state.seg_offset = img.getImageBase() + img.getSection(img.codeSegment).VirtualAddress; + state.seg_offset = img.getImageBase() + img.getSection(img.text.secNo).VirtualAddress; DWARF_FileName fname; if (hdr->version <= 4) @@ -285,7 +281,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_FORM_line_strp: { size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); - state.include_dirs.push_back(img.debug_line_str + offset); + state.include_dirs.push_back((const char*)img.debug_line_str.byteAt(offset)); break; } case DW_FORM_string: @@ -327,7 +323,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_FORM_line_strp: { size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); - fname.file_name = img.debug_line_str + offset; + fname.file_name = (const char*)img.debug_line_str.byteAt(offset); break; } case DW_FORM_string: @@ -387,8 +383,6 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) switch(excode) { case DW_LNE_end_sequence: - if((char*)p - img.debug_line >= 0xe4e0) - p = p; state.end_sequence = true; state.last_addr = state.address; if(!addLineInfo(img, mod, state)) @@ -398,7 +392,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_LNE_set_address: { if (!mod && state.section == -1) - state.section = img.getRelocationInLineSegment((char*)p - img.debug_line); + state.section = img.getRelocationInLineSegment(img.debug_line.sectOff(p)); unsigned long adr = ptrsize == 8 ? RD8(p) : RD4(p); state.address = adr; state.op_index = 0; diff --git a/src/main.cpp b/src/main.cpp index ced7aac..91b300a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,6 +21,8 @@ double #define T_strcpy wcscpy #define T_strcat wcscat #define T_strstr wcsstr +#define T_strncmp wcsncmp +#define T_strtoul wcstoul #define T_strtod wcstod #define T_strrchr wcsrchr #define T_unlink _wremove @@ -34,6 +36,8 @@ double #define T_strcpy strcpy #define T_strcat strcat #define T_strstr strstr +#define T_strncmp strncmp +#define T_strtoul strtoul #define T_strtod strtod #define T_strrchr strrchr #define T_unlink unlink @@ -120,7 +124,7 @@ int T_main(int argc, TCHAR* argv[]) { double Dversion = 2.072; const TCHAR* pdbref = 0; - bool debug = false; + DebugLevel debug = DebugLevel{}; CoInitialize(nullptr); @@ -138,8 +142,15 @@ int T_main(int argc, TCHAR* argv[]) demangleSymbols = false; else if (argv[0][1] == 'e') useTypedefEnum = true; - else if (argv[0][1] == 'd' && argv[0][2] == 'e' && argv[0][3] == 'b') // deb[ug] - debug = true; + else if (!T_strncmp(&argv[0][1], TEXT("debug"), 5)) // debug[level] + { + debug = (DebugLevel)T_strtoul(&argv[0][6], 0, 0); + if (!debug) { + debug = DbgBasic; + } + + fprintf(stderr, "Debug set to %x\n", debug); + } else if (argv[0][1] == 's' && argv[0][2]) dotReplacementChar = (char)argv[0][2]; else if (argv[0][1] == 'p' && argv[0][2]) @@ -182,9 +193,8 @@ int T_main(int argc, TCHAR* argv[]) img = &dbg; } - CV2PDB cv2pdb(*img); + CV2PDB cv2pdb(*img, debug); cv2pdb.Dversion = Dversion; - cv2pdb.debug = debug; cv2pdb.initLibraries(); TCHAR* outname = argv[1]; diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 652e063..4d35cf2 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -1,8 +1,6 @@ #include "readDwarf.h" #include -#include #include -#include #include "PEImage.h" #include "dwarf.h" @@ -11,6 +9,31 @@ extern "C" { #include "mscvpdb.h" } + +// declare hasher for pair +namespace std +{ +template +struct hash> +{ + size_t operator()(const std::pair& t) const + { + return std::hash()(t.first) ^ std::hash()(t.second); + } +}; +} + +PEImage* DIECursor::img; +abbrevMap_t DIECursor::abbrevMap; +DebugLevel DIECursor::debug; + +void DIECursor::setContext(PEImage* img_, DebugLevel debug_) +{ + img = img_; + abbrevMap.clear(); + debug = debug_; +} + static Location mkInReg(unsigned reg) { Location l; @@ -320,31 +343,6 @@ void mergeSpecification(DWARF_InfoData& id, DWARF_CompilationUnit* cu) id.merge(idspec); } -// declare hasher for pair -namespace std -{ - template - struct hash> - { - size_t operator()(const std::pair& t) const - { - return std::hash()(t.first) ^ std::hash()(t.second); - } - }; -} - -typedef std::unordered_map, byte*> abbrevMap_t; - -static PEImage* img; -static abbrevMap_t abbrevMap; - -void DIECursor::setContext(PEImage* img_) -{ - img = img_; - abbrevMap.clear(); -} - - DIECursor::DIECursor(DWARF_CompilationUnit* cu_, byte* ptr_) { cu = cu_; @@ -415,7 +413,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) return false; // root of the tree does not have a null terminator, but we know the length id.entryPtr = ptr; - id.entryOff = ptr - (byte*)cu; + entryOff = img->debug_info.sectOff(ptr); id.code = LEB128(ptr); if (id.code == 0) { @@ -432,13 +430,20 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) } byte* abbrev = getDWARFAbbrev(cu->debug_abbrev_offset, id.code); - assert(abbrev); - if (!abbrev) + if (!abbrev) { + fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__, + id.code, entryOff); + assert(abbrev); return false; + } id.abbrev = abbrev; id.tag = LEB128(abbrev); id.hasChild = *abbrev++; + + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: offs=%d level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__, + entryOff, level, id.tag, id.code); int attr, form; for (;;) @@ -449,8 +454,16 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) if (attr == 0 && form == 0) break; - while (form == DW_FORM_indirect) + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: offs=%x, attr=%d, form=%d\n", __FUNCTION__, __LINE__, + img->debug_info.sectOff(ptr), attr, form); + + while (form == DW_FORM_indirect) { form = LEB128(ptr); + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: attr=%d, form=%d\n", __FUNCTION__, __LINE__, + attr, form); + } DWARF_Attribute a; switch (form) @@ -467,7 +480,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_sdata: a.type = Const; a.cons = SLEB128(ptr); break; case DW_FORM_udata: a.type = Const; a.cons = LEB128(ptr); break; case DW_FORM_string: a.type = String; a.string = (const char*)ptr; ptr += strlen(a.string) + 1; break; - case DW_FORM_strp: a.type = String; a.string = (const char*)(img->debug_str + RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; + case DW_FORM_strp: a.type = String; a.string = (const char*)img->debug_str.byteAt(RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; case DW_FORM_flag: a.type = Flag; a.flag = (*ptr++ != 0); break; case DW_FORM_flag_present: a.type = Flag; a.flag = true; break; case DW_FORM_ref1: a.type = Ref; a.ref = (byte*)cu + *ptr++; break; @@ -475,7 +488,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_ref4: a.type = Ref; a.ref = (byte*)cu + RD4(ptr); break; case DW_FORM_ref8: a.type = Ref; a.ref = (byte*)cu + RD8(ptr); break; case DW_FORM_ref_udata: a.type = Ref; a.ref = (byte*)cu + LEB128(ptr); break; - case DW_FORM_ref_addr: a.type = Ref; a.ref = (byte*)img->debug_info + (cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; + case DW_FORM_ref_addr: a.type = Ref; a.ref = img->debug_info.byteAt(cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; case DW_FORM_ref_sig8: a.type = Invalid; ptr += 8; break; case DW_FORM_exprloc: a.type = ExprLoc; a.expr.len = LEB128(ptr); a.expr.ptr = ptr; ptr += a.expr.len; break; case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = cu->isDWARF64() ? RD8(ptr) : RD4(ptr); break; @@ -578,7 +591,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode) { - if (!img->debug_abbrev) + if (!img->debug_abbrev.isPresent()) return 0; std::pair key = std::make_pair(off, findcode); @@ -588,8 +601,8 @@ byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode) return it->second; } - byte* p = (byte*)img->debug_abbrev + off; - byte* end = (byte*)img->debug_abbrev + img->debug_abbrev_length; + byte* p = img->debug_abbrev.byteAt(off); + byte* end = img->debug_abbrev.endByte(); while (p < end) { int code = LEB128(p); diff --git a/src/readDwarf.h b/src/readDwarf.h index 7ad56e8..41724f6 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -1,13 +1,30 @@ #ifndef __READDWARF_H__ #define __READDWARF_H__ +#include #include #include #include +#include #include "mspdb.h" +class PEImage; + typedef unsigned char byte; +enum DebugLevel : unsigned { + DbgBasic = 0x1, + DbgPdbTypes = 0x2, + DbgPdbSyms = 0x4, + DbgPdbLines = 0x8, + DbgDwarfTagRead = 0x10, + DbgDwarfAttrRead = 0x20, + DbgDwarfLocLists = 0x40, + DbgDwarfLines = 0x80 +}; + +DEFINE_ENUM_FLAG_OPERATORS(DebugLevel); + inline unsigned int LEB128(byte* &p) { unsigned int x = 0; @@ -138,7 +155,6 @@ struct DWARF_FileName struct DWARF_InfoData { byte* entryPtr; - unsigned entryOff; // offset in the cu int code; byte* abbrev; int tag; @@ -382,7 +398,8 @@ struct Location bool is_regrel() const { return type == RegRel; } }; -class PEImage; +typedef std::unordered_map, byte*> abbrevMap_t; + // Attempts to partially evaluate DWARF location expressions. // The only supported expressions are those, whose result may be represented @@ -398,15 +415,20 @@ class DIECursor public: DWARF_CompilationUnit* cu; byte* ptr; + unsigned int entryOff; int level; bool hasChild; // indicates whether the last read DIE has children byte* sibling; + static PEImage *img; + static abbrevMap_t abbrevMap; + static DebugLevel debug; + byte* getDWARFAbbrev(unsigned off, unsigned findcode); public: - static void setContext(PEImage* img_); + static void setContext(PEImage* img_, DebugLevel debug_); // Create a new DIECursor DIECursor(DWARF_CompilationUnit* cu_, byte* ptr); @@ -429,6 +451,6 @@ public: // iterate over DWARF debug_line information // if mod is null, print them out, otherwise add to module -bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod); +bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug = DebugLevel{}); #endif -- cgit v0.12 From 594cf2b96458448b7bfe721bb1d9786fcf90bc0d Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Mon, 29 Nov 2021 21:03:58 -0800 Subject: carry contextual information with the DIECursor DWARF5 has more contextual information that is associated with the compilation unit. As a preparation for using such information, carry it with the DIECursor and eliminate places where we're passing in the parent compilation unit. Also add the RDAddr helper to read a target-address according to the specification in the compilation unit. --- src/cv2pdb.h | 19 +++++----- src/dwarf2pdb.cpp | 108 +++++++++++++++++++++++++++--------------------------- src/readDwarf.cpp | 21 +++++++---- src/readDwarf.h | 20 ++++++++-- 4 files changed, 91 insertions(+), 77 deletions(-) diff --git a/src/cv2pdb.h b/src/cv2pdb.h index 26b2143..1c0b3b2 100644 --- a/src/cv2pdb.h +++ b/src/cv2pdb.h @@ -15,7 +15,6 @@ #include #include -#include extern "C" { #include "mscvpdb.h" @@ -172,17 +171,17 @@ public: bool writeDWARFImage(const TCHAR* opath); bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi); - bool addDWARFProc(DWARF_InfoData& id, DWARF_CompilationUnit* cu, DIECursor cursor); - int addDWARFStructure(DWARF_InfoData& id, DWARF_CompilationUnit* cu, DIECursor cursor); - int addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, DIECursor cursor, int off); - int addDWARFArray(DWARF_InfoData& arrayid, DWARF_CompilationUnit* cu, DIECursor cursor); + bool addDWARFProc(DWARF_InfoData& id, DIECursor cursor); + int addDWARFStructure(DWARF_InfoData& id, DIECursor cursor); + int addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int off); + int addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor); int addDWARFBasicType(const char*name, int encoding, int byte_size); - int addDWARFEnum(DWARF_InfoData& enumid, DWARF_CompilationUnit* cu, DIECursor cursor); - int getTypeByDWARFPtr(DWARF_CompilationUnit* cu, byte* ptr); - int getDWARFTypeSize(DWARF_CompilationUnit* cu, byte* ptr); - void getDWARFArrayBounds(DWARF_InfoData& arrayid, DWARF_CompilationUnit* cu, DIECursor cursor, + int addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor); + int getTypeByDWARFPtr(byte* ptr); + int getDWARFTypeSize(const DIECursor& parent, byte* ptr); + void getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound); - void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, DWARF_CompilationUnit* cu, + void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent, int& basetype, int& lowerBound, int& upperBound); int getDWARFBasicType(int encoding, int byte_size); diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index f58067d..88670e2 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -558,10 +558,10 @@ public: } }; -Location findBestFBLoc(const PEImage& img, unsigned long fblocoff) +Location findBestFBLoc(const DIECursor& parent, unsigned long fblocoff) { - int regebp = img.isX64() ? 6 : 5; - LOCCursor cursor(img, fblocoff); + int regebp = parent.img->isX64() ? 6 : 5; + LOCCursor cursor(*parent.img, fblocoff); LOCEntry entry; Location longest = { Location::RegRel, DW_REG_CFA, 0 }; unsigned long longest_range = 0; @@ -690,7 +690,7 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo) cbUdtSymbols += len; } -bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIECursor cursor) +bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DIECursor cursor) { unsigned int pclo = procid.pclo - codeSegOff; unsigned int pchi = procid.pchi - codeSegOff; @@ -747,11 +747,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE Location frameBase = decodeLocation(img, procid.frame_base, 0, DW_AT_frame_base); if (frameBase.is_abs()) // pointer into location list in .debug_loc? assume CFA - frameBase = findBestFBLoc(img, frameBase.off); + frameBase = findBestFBLoc(cursor, frameBase.off); Location cfa = findBestCFA(img, cfi_index, procid.pclo, procid.pchi); - if (cu) + if (cursor.cu) { bool endarg = false; DWARF_InfoData id; @@ -764,10 +764,10 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE { if (id.location.type == ExprLoc || id.location.type == Block || id.location.type == SecOffset) { - Location loc = id.location.type == SecOffset ? findBestFBLoc(img, id.location.sec_offset) + Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset) : decodeLocation(img, id.location, &frameBase); if (loc.is_regrel()) - appendStackVar(id.name, getTypeByDWARFPtr(cu, id.type), loc, cfa); + appendStackVar(id.name, getTypeByDWARFPtr(id.type), loc, cfa); } } } @@ -835,10 +835,10 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE { if (id.name && (id.location.type == ExprLoc || id.location.type == Block)) { - Location loc = id.location.type == SecOffset ? findBestFBLoc(img, id.location.sec_offset) + Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset) : decodeLocation(img, id.location, &frameBase); if (loc.is_regrel()) - appendStackVar(id.name, getTypeByDWARFPtr(cu, id.type), loc, cfa); + appendStackVar(id.name, getTypeByDWARFPtr(id.type), loc, cfa); } } cursor.gotoSibling(); @@ -855,7 +855,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE return true; } -int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, DIECursor cursor, int baseoff) +int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff) { bool isunion = structid.tag == DW_TAG_union_type; int nfields = 0; @@ -886,20 +886,20 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, { checkDWARFTypeAlloc(kMaxNameLen + 100); codeview_fieldtype* dfieldtype = (codeview_fieldtype*)(dwarfTypes + cbDwarfTypes); - cbDwarfTypes += addFieldMember(dfieldtype, 0, baseoff + off, getTypeByDWARFPtr(cu, id.type), id.name); + cbDwarfTypes += addFieldMember(dfieldtype, 0, baseoff + off, getTypeByDWARFPtr(id.type), id.name); nfields++; } else if (id.type) { // if it doesn't have a name, and it's a struct or union, embed it directly - DIECursor membercursor(cu, id.type); + DIECursor membercursor(cursor, id.type); DWARF_InfoData memberid; if (membercursor.readNext(memberid)) { if (memberid.abstract_origin) - mergeAbstractOrigin(memberid, cu); + mergeAbstractOrigin(memberid, cursor); if (memberid.specification) - mergeSpecification(memberid, cu); + mergeSpecification(memberid, cursor); int cvtype = -1; switch (memberid.tag) @@ -907,7 +907,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, case DW_TAG_class_type: case DW_TAG_structure_type: case DW_TAG_union_type: - nfields += addDWARFFields(memberid, cu, membercursor, baseoff + off); + nfields += addDWARFFields(memberid, membercursor, baseoff + off); break; } } @@ -929,7 +929,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, codeview_fieldtype* bc = (codeview_fieldtype*)(dwarfTypes + cbDwarfTypes); bc->bclass_v2.id = LF_BCLASS_V2; bc->bclass_v2.offset = baseoff + off; - bc->bclass_v2.type = getTypeByDWARFPtr(cu, id.type); + bc->bclass_v2.type = getTypeByDWARFPtr(id.type); bc->bclass_v2.attribute = 3; // public cbDwarfTypes += sizeof(bc->bclass_v2); for (; cbDwarfTypes & 3; cbDwarfTypes++) @@ -942,13 +942,13 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, return nfields; } -int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DWARF_CompilationUnit* cu, DIECursor cursor) +int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor) { //printf("Adding struct %s, entryoff %d, abbrev %d\n", structid.name, structid.entryOff, structid.abbrev); int fieldlistType = 0; int nfields = 0; - if (cu) + if (cursor.cu) { checkDWARFTypeAlloc(100); codeview_reftype* fl = (codeview_reftype*) (dwarfTypes + cbDwarfTypes); @@ -970,7 +970,7 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DWARF_CompilationUnit* c nfields++; } #endif - nfields += addDWARFFields(structid, cu, cursor, 0); + nfields += addDWARFFields(structid, cursor, 0); fl = (codeview_reftype*) (dwarfTypes + flbegin); fl->fieldlist.len = cbDwarfTypes - flbegin - 2; fieldlistType = nextDwarfType++; @@ -990,19 +990,18 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DWARF_CompilationUnit* c return cvtype; } -void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DWARF_CompilationUnit* cu, - DIECursor cursor, int& basetype, int& lowerBound, int& upperBound) +void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound) { DWARF_InfoData id; // TODO: handle multi-dimensional arrays - if (cu) + if (cursor.cu) { while (cursor.readNext(id, true)) { if (id.tag == DW_TAG_subrange_type) { - getDWARFSubrangeInfo(id, cu, basetype, lowerBound, upperBound); + getDWARFSubrangeInfo(id, cursor, basetype, lowerBound, upperBound); return; } cursor.gotoSibling(); @@ -1010,10 +1009,10 @@ void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DWARF_CompilationUnit* } // In case of error, return plausible defaults - getDWARFSubrangeInfo(id, NULL, basetype, lowerBound, upperBound); + getDWARFSubrangeInfo(id, cursor, basetype, lowerBound, upperBound); } -void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, DWARF_CompilationUnit* cu, +void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent, int& basetype, int& lowerBound, int& upperBound) { // In case of error, return plausible defaults. Assume the array @@ -1022,10 +1021,10 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, DWARF_CompilationU lowerBound = currentDefaultLowerBound; upperBound = lowerBound; - if (!cu || subrangeid.tag != DW_TAG_subrange_type) + if (!parent.cu || subrangeid.tag != DW_TAG_subrange_type) return; - basetype = getTypeByDWARFPtr(cu, subrangeid.type); + basetype = getTypeByDWARFPtr(subrangeid.type); if (subrangeid.has_lower_bound) lowerBound = subrangeid.lower_bound; upperBound = subrangeid.upper_bound; @@ -1093,20 +1092,19 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size) return translateType(t); } -int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DWARF_CompilationUnit* cu, - DIECursor cursor) +int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor) { int basetype, upperBound, lowerBound; - getDWARFArrayBounds(arrayid, cu, cursor, basetype, lowerBound, upperBound); + getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound); checkUserTypeAlloc(kMaxNameLen + 100); codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes); cvt->array_v2.id = v3 ? LF_ARRAY_V3 : LF_ARRAY_V2; - cvt->array_v2.elemtype = getTypeByDWARFPtr(cu, arrayid.type); + cvt->array_v2.elemtype = getTypeByDWARFPtr(arrayid.type); cvt->array_v2.idxtype = basetype; int len = (BYTE*)&cvt->array_v2.arrlen - (BYTE*)cvt; - int size = (upperBound - lowerBound + 1) * getDWARFTypeSize(cu, arrayid.type); + int size = (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, arrayid.type); len += write_numeric_leaf(size, &cvt->array_v2.arrlen); ((BYTE*)cvt)[len++] = 0; // empty name for (; len & 3; len++) @@ -1190,7 +1188,7 @@ int CV2PDB::addDWARFBasicType(const char*name, int encoding, int byte_size) return cvtype; } -int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DWARF_CompilationUnit* cu, DIECursor cursor) +int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor) { /* Enumerated types are described in CodeView with two components: @@ -1298,7 +1296,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DWARF_CompilationUnit* cu, DIEC /* Now the LF_FIELDLIST is ready, create the LF_ENUM type record itself. */ checkUserTypeAlloc(); int basetype = (enumid.type != 0) - ? getTypeByDWARFPtr(cu, enumid.type) + ? getTypeByDWARFPtr(enumid.type) : getDWARFBasicType(enumid.encoding, enumid.byte_size); dtype = (codeview_type*)(userTypes + cbUserTypes); const char* name = (enumid.name ? enumid.name : "__noname"); @@ -1309,7 +1307,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DWARF_CompilationUnit* cu, DIEC return enumType; } -int CV2PDB::getTypeByDWARFPtr(DWARF_CompilationUnit* cu, byte* ptr) +int CV2PDB::getTypeByDWARFPtr(byte* ptr) { std::unordered_map::iterator it = mapOffsetToType.find(ptr); if(it == mapOffsetToType.end()) @@ -1317,10 +1315,10 @@ int CV2PDB::getTypeByDWARFPtr(DWARF_CompilationUnit* cu, byte* ptr) return it->second; } -int CV2PDB::getDWARFTypeSize(DWARF_CompilationUnit* cu, byte* typePtr) +int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr) { DWARF_InfoData id; - DIECursor cursor(cu, typePtr); + DIECursor cursor(parent, typePtr); if (!cursor.readNext(id)) return 0; @@ -1333,16 +1331,16 @@ int CV2PDB::getDWARFTypeSize(DWARF_CompilationUnit* cu, byte* typePtr) case DW_TAG_ptr_to_member_type: case DW_TAG_reference_type: case DW_TAG_pointer_type: - return cu->address_size; + return cursor.cu->address_size; case DW_TAG_array_type: { int basetype, upperBound, lowerBound; - getDWARFArrayBounds(id, cu, cursor, basetype, lowerBound, upperBound); - return (upperBound - lowerBound + 1) * getDWARFTypeSize(cu, id.type); + getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound); + return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type); } default: if(id.type) - return getDWARFTypeSize(cu, id.type); + return getDWARFTypeSize(cursor, id.type); break; } return 0; @@ -1356,7 +1354,7 @@ bool CV2PDB::mapTypes() { DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); - DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); + DIECursor cursor(cu, (byte*)cu + sizeof(*cu)); DWARF_InfoData id; while (cursor.readNext(id)) { @@ -1429,9 +1427,9 @@ bool CV2PDB::createTypes() cursor.entryOff, cursor.level, id.code, id.tag); if (id.abstract_origin) - mergeAbstractOrigin(id, cu); + mergeAbstractOrigin(id, cursor); if (id.specification) - mergeSpecification(id, cu); + mergeSpecification(id, cursor); int cvtype = -1; switch (id.tag) @@ -1440,36 +1438,36 @@ bool CV2PDB::createTypes() cvtype = addDWARFBasicType(id.name, id.encoding, id.byte_size); break; case DW_TAG_typedef: - cvtype = appendModifierType(getTypeByDWARFPtr(cu, id.type), 0); + cvtype = appendModifierType(getTypeByDWARFPtr(id.type), 0); addUdtSymbol(cvtype, id.name); break; case DW_TAG_pointer_type: - cvtype = appendPointerType(getTypeByDWARFPtr(cu, id.type), pointerAttr); + cvtype = appendPointerType(getTypeByDWARFPtr(id.type), pointerAttr); break; case DW_TAG_const_type: - cvtype = appendModifierType(getTypeByDWARFPtr(cu, id.type), 1); + cvtype = appendModifierType(getTypeByDWARFPtr(id.type), 1); break; case DW_TAG_reference_type: - cvtype = appendPointerType(getTypeByDWARFPtr(cu, id.type), pointerAttr | 0x20); + cvtype = appendPointerType(getTypeByDWARFPtr(id.type), pointerAttr | 0x20); break; case DW_TAG_subrange_type: // It seems we cannot materialize bounds for scalar types in // CodeView, so just redirect to a mere base type. - cvtype = appendModifierType(getTypeByDWARFPtr(cu, id.type), 0); + cvtype = appendModifierType(getTypeByDWARFPtr(id.type), 0); break; case DW_TAG_class_type: case DW_TAG_structure_type: case DW_TAG_union_type: - cvtype = addDWARFStructure(id, cu, cursor.getSubtreeCursor()); + cvtype = addDWARFStructure(id, cursor.getSubtreeCursor()); break; case DW_TAG_array_type: - cvtype = addDWARFArray(id, cu, cursor.getSubtreeCursor()); + cvtype = addDWARFArray(id, cursor.getSubtreeCursor()); break; case DW_TAG_enumeration_type: - cvtype = addDWARFEnum(id, cu, cursor.getSubtreeCursor()); + cvtype = addDWARFEnum(id, cursor.getSubtreeCursor()); break; case DW_TAG_subroutine_type: @@ -1542,7 +1540,7 @@ bool CV2PDB::createTypes() } if (id.pclo && id.pchi) - addDWARFProc(id, cu, cursor.getSubtreeCursor()); + addDWARFProc(id, cursor.getSubtreeCursor()); } break; @@ -1621,7 +1619,7 @@ bool CV2PDB::createTypes() } if (seg >= 0) { - int type = getTypeByDWARFPtr(cu, id.type); + int type = getTypeByDWARFPtr(id.type); if (dllimport) { checkDWARFTypeAlloc(100); diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 4d35cf2..f712b29 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -315,31 +315,31 @@ Location decodeLocation(const PEImage& img, const DWARF_Attribute& attr, const L return stack[0]; } -void mergeAbstractOrigin(DWARF_InfoData& id, DWARF_CompilationUnit* cu) +void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent) { - DIECursor specCursor(cu, id.abstract_origin); + DIECursor specCursor(parent, id.abstract_origin); DWARF_InfoData idspec; specCursor.readNext(idspec); // assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild // assert(id.tag == idspec.tag); if (idspec.abstract_origin) - mergeAbstractOrigin(idspec, cu); + mergeAbstractOrigin(idspec, parent); if (idspec.specification) - mergeSpecification(idspec, cu); + mergeSpecification(idspec, parent); id.merge(idspec); } -void mergeSpecification(DWARF_InfoData& id, DWARF_CompilationUnit* cu) +void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent) { - DIECursor specCursor(cu, id.specification); + DIECursor specCursor(parent, id.specification); DWARF_InfoData idspec; specCursor.readNext(idspec); //assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild //assert(id.tag == idspec.tag); if (idspec.abstract_origin) - mergeAbstractOrigin(idspec, cu); + mergeAbstractOrigin(idspec, parent); if (idspec.specification) - mergeSpecification(idspec, cu); + mergeSpecification(idspec, parent); id.merge(idspec); } @@ -352,6 +352,11 @@ DIECursor::DIECursor(DWARF_CompilationUnit* cu_, byte* ptr_) sibling = 0; } +DIECursor::DIECursor(const DIECursor& parent, byte* ptr_) + : DIECursor(parent) +{ + ptr = ptr_; +} void DIECursor::gotoSibling() { diff --git a/src/readDwarf.h b/src/readDwarf.h index 41724f6..bb78285 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -8,9 +8,9 @@ #include #include "mspdb.h" -class PEImage; - typedef unsigned char byte; +class PEImage; +class DIECursor; enum DebugLevel : unsigned { DbgBasic = 0x1, @@ -406,8 +406,8 @@ typedef std::unordered_map, byte*> abbrevMap_t; // as either an absolute value, a register, or a register-relative address. Location decodeLocation(const PEImage& img, const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0); -void mergeAbstractOrigin(DWARF_InfoData& id, DWARF_CompilationUnit* cu); -void mergeSpecification(DWARF_InfoData& id, DWARF_CompilationUnit* cu); +void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent); +void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent); // Debug Information Entry Cursor class DIECursor @@ -433,6 +433,9 @@ public: // Create a new DIECursor DIECursor(DWARF_CompilationUnit* cu_, byte* ptr); + // Create a child DIECursor + DIECursor(const DIECursor& parent, byte* ptr_); + // Goto next sibling DIE. If the last read DIE had any children, they will be skipped over. void gotoSibling(); @@ -447,6 +450,15 @@ public: // If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level). // Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created. bool readNext(DWARF_InfoData& id, bool stopAtNull = false); + + // Read an address from p according to the ambient pointer size. + uint64_t RDAddr(byte* &p) const + { + if (cu->address_size == 4) + return RD4(p); + + return RD8(p); + } }; // iterate over DWARF debug_line information -- cgit v0.12 From 27a6223aecfc6914f49c13ee737c8ce59b09c55b Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Tue, 30 Nov 2021 00:01:51 -0800 Subject: more pre-DWARF5 refactoring: unit headers, location cursor, range cursor Read the compilation unit header byte-by-byte rather than by casting the data to a structure. Add the currentBaseAddress contextual info to the compilation unit data. Move the LOCCursor into readDwarf.cpp. Implement a RangeCursor similar to the LOCCursor. Add more debug printing. --- src/cv2pdb.h | 5 -- src/dwarf2pdb.cpp | 165 ++++++++++++++++------------------------------------- src/dwarflines.cpp | 13 +++-- src/readDwarf.cpp | 112 +++++++++++++++++++++++++++++++++--- src/readDwarf.h | 100 ++++++++++++++++++++++++++------ 5 files changed, 243 insertions(+), 152 deletions(-) diff --git a/src/cv2pdb.h b/src/cv2pdb.h index 1c0b3b2..4b58140 100644 --- a/src/cv2pdb.h +++ b/src/cv2pdb.h @@ -279,11 +279,6 @@ public: // Default lower bound for the current compilation unit. This depends on // the language of the current unit. unsigned currentDefaultLowerBound; - - // Value of the DW_AT_low_pc attribute for the current compilation unit. - // Specify the default base address for use in location lists and range - // lists. - uint32_t currentBaseAddress; }; #endif //__CV2PDB_H__ diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index 88670e2..3750702 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -419,7 +419,7 @@ public: attr.type = ExprLoc; attr.expr.len = LEB128(ptr); attr.expr.ptr = ptr; - cfa = decodeLocation(img, attr); + cfa = decodeLocation(attr); ptr += attr.expr.len; break; } @@ -458,7 +458,7 @@ public: attr.type = Block; attr.block.len = LEB128(ptr); attr.block.ptr = ptr; - cfa = decodeLocation(img, attr); // TODO: push cfa on stack + cfa = decodeLocation(attr); // TODO: push cfa on stack ptr += attr.expr.len; break; } @@ -511,61 +511,14 @@ Location findBestCFA(const PEImage& img, const CFIIndex* index, unsigned int pcl return ebp; } -// Location list entry -class LOCEntry -{ -public: - unsigned long beg_offset; - unsigned long end_offset; - Location loc; - - bool eol() const { return beg_offset == 0 && end_offset == 0; } -}; - -// Location list cursor -class LOCCursor -{ -public: - LOCCursor(const PEImage& image, unsigned long off) - : img (image) - , end(img.debug_loc.endByte()) - , ptr(img.debug_loc.byteAt(off)) - { - default_address_size = img.isX64() ? 8 : 4; - } - - const PEImage& img; - byte* end; - byte* ptr; - byte default_address_size; - - bool readNext(LOCEntry& entry) - { - if(ptr >= end) - return false; - entry.beg_offset = (unsigned long) RDsize(ptr, default_address_size); - entry.end_offset = (unsigned long) RDsize(ptr, default_address_size); - if (entry.eol()) - return true; - - DWARF_Attribute attr; - attr.type = Block; - attr.block.len = RD2(ptr); - attr.block.ptr = ptr; - entry.loc = decodeLocation(img, attr); - ptr += attr.expr.len; - return true; - } -}; - Location findBestFBLoc(const DIECursor& parent, unsigned long fblocoff) { int regebp = parent.img->isX64() ? 6 : 5; - LOCCursor cursor(*parent.img, fblocoff); + LOCCursor cursor(parent, fblocoff); LOCEntry entry; Location longest = { Location::RegRel, DW_REG_CFA, 0 }; unsigned long longest_range = 0; - while(cursor.readNext(entry) && !entry.eol()) + while(cursor.readNext(entry)) { if(entry.loc.is_regrel() && entry.loc.reg == regebp) return entry.loc; @@ -745,7 +698,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DIECursor cursor) addStackVar("local_var", 0x1001, 8); #endif - Location frameBase = decodeLocation(img, procid.frame_base, 0, DW_AT_frame_base); + Location frameBase = decodeLocation(procid.frame_base, 0, DW_AT_frame_base); if (frameBase.is_abs()) // pointer into location list in .debug_loc? assume CFA frameBase = findBestFBLoc(cursor, frameBase.off); @@ -765,7 +718,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DIECursor cursor) if (id.location.type == ExprLoc || id.location.type == Block || id.location.type == SecOffset) { Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset) - : decodeLocation(img, id.location, &frameBase); + : decodeLocation(id.location, &frameBase); if (loc.is_regrel()) appendStackVar(id.name, getTypeByDWARFPtr(id.type), loc, cfa); } @@ -795,28 +748,12 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DIECursor cursor) id.pchi = 0; // TODO: handle base address selection - byte *r = img.debug_ranges.byteAt(id.ranges); - byte *rend = img.debug_ranges.endByte(); - while (r < rend) + RangeEntry range; + RangeCursor rangeCursor(cursor, id.ranges); + while (rangeCursor.readNext(range)) { - uint64_t pclo, pchi; - - if (img.isX64()) - { - pclo = RD8(r); - pchi = RD8(r); - } - else - { - pclo = RD4(r); - pchi = RD4(r); - } - if (pclo == 0 && pchi == 0) - break; - if (pclo >= pchi) - continue; - id.pclo = min(id.pclo, pclo + currentBaseAddress); - id.pchi = max(id.pchi, pchi + currentBaseAddress); + id.pclo = min(id.pclo, range.pclo); + id.pchi = max(id.pchi, range.pchi); } } @@ -836,7 +773,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DIECursor cursor) if (id.name && (id.location.type == ExprLoc || id.location.type == Block)) { Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset) - : decodeLocation(img, id.location, &frameBase); + : decodeLocation(id.location, &frameBase); if (loc.is_regrel()) appendStackVar(id.name, getTypeByDWARFPtr(id.type), loc, cfa); } @@ -872,7 +809,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo int off = 0; if (!isunion) { - Location loc = decodeLocation(img, id.member_location, 0, DW_AT_data_member_location); + Location loc = decodeLocation(id.member_location, 0, DW_AT_data_member_location); if (loc.is_abs()) { off = loc.off; @@ -917,7 +854,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo else if (id.tag == DW_TAG_inheritance) { int off = 0; - Location loc = decodeLocation(img, id.member_location, 0, DW_AT_data_member_location); + Location loc = decodeLocation(id.member_location, 0, DW_AT_data_member_location); if (loc.is_abs()) { cvid = S_CONSTANT_V2; @@ -1350,11 +1287,18 @@ bool CV2PDB::mapTypes() { int typeID = nextUserType; unsigned long off = 0; + + if (debug & DbgBasic) + fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__); + while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); + DWARF_CompilationUnitInfo cu{}; + byte* ptr = cu.read(img, &off); + if (!ptr) + continue; - DIECursor cursor(cu, (byte*)cu + sizeof(*cu)); + DIECursor cursor(&cu, ptr); DWARF_InfoData id; while (cursor.readNext(id)) { @@ -1392,8 +1336,6 @@ bool CV2PDB::mapTypes() typeID++; } } - - off += sizeof(cu->unit_length) + cu->unit_length; } if (debug & DbgBasic) @@ -1416,9 +1358,12 @@ bool CV2PDB::createTypes() unsigned long off = 0; while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); + DWARF_CompilationUnitInfo cu{}; + byte* ptr = cu.read(img, &off); + if (!ptr) + continue; - DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); + DIECursor cursor(&cu, ptr); DWARF_InfoData id; while (cursor.readNext(id)) { @@ -1504,28 +1449,13 @@ bool CV2PDB::createTypes() else if (id.ranges != ~0) { entry_point = ~0; - byte* r = (byte*)img.debug_ranges.byteAt(id.ranges); - byte* rend = (byte*)img.debug_ranges.endByte(); - while (r < rend) + RangeEntry range; + RangeCursor rangeCursor(cursor, id.ranges); + while (rangeCursor.readNext(range)) { - uint64_t pclo, pchi; - - if (img.isX64()) - { - pclo = RD8(r); - pchi = RD8(r); - } - else - { - pclo = RD4(r); - pchi = RD4(r); - } - if (pclo == 0 && pchi == 0) - break; - if (pclo >= pchi) - continue; - entry_point = min(entry_point, pclo + currentBaseAddress); + entry_point = min(entry_point, range.pclo); } + if (entry_point == ~0) entry_point = 0; } @@ -1545,7 +1475,8 @@ bool CV2PDB::createTypes() break; case DW_TAG_compile_unit: - currentBaseAddress = id.pclo; + // Set the implicit base address for range lists. + cu.base_address = id.pclo; switch (id.language) { case DW_LANG_Ada83: @@ -1569,22 +1500,24 @@ bool CV2PDB::createTypes() { if (id.ranges > 0 && id.ranges < img.debug_ranges.length) { - unsigned char* r = img.debug_ranges.byteAt(id.ranges); - unsigned char* rend = img.debug_ranges.endByte(); - while (r < rend) + RangeEntry range; + RangeCursor rangeCursor(cursor, id.ranges); + while (rangeCursor.readNext(range)) { - unsigned long pclo = RD4(r); - unsigned long pchi = RD4(r); - if (pclo == 0 && pchi == 0) - break; - //printf("%s %s %x - %x\n", dir, name, pclo, pchi); - if (!addDWARFSectionContrib(mod, pclo, pchi)) + if (debug & DbgPdbContrib) + fprintf(stderr, "%s:%d: Adding a section contrib: %I64x-%I64x\n", __FUNCTION__, __LINE__, + range.pclo, range.pchi); + + if (!addDWARFSectionContrib(mod, range.pclo, range.pchi)) return false; } } else { - //printf("%s %s %x - %x\n", dir, name, pclo, pchi); + if (debug & DbgPdbContrib) + fprintf(stderr, "%s:%d: Adding a section contrib: %x-%x\n", __FUNCTION__, __LINE__, + id.pclo, id.pchi); + if (!addDWARFSectionContrib(mod, id.pclo, id.pchi)) return false; } @@ -1608,7 +1541,7 @@ bool CV2PDB::createTypes() } else { - Location loc = decodeLocation(img, id.location); + Location loc = decodeLocation(id.location); if (loc.is_abs()) { segOff = loc.off; @@ -1647,8 +1580,6 @@ bool CV2PDB::createTypes() assert(mapOffsetToType[id.entryPtr] == cvtype); } } - - off += sizeof(cu->unit_length) + cu->unit_length; } return true; diff --git a/src/dwarflines.cpp b/src/dwarflines.cpp index 2e0570d..dd48995 100644 --- a/src/dwarflines.cpp +++ b/src/dwarflines.cpp @@ -169,8 +169,13 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.startByte(); - int ptrsize = cu ? cu->address_size : 4; + DWARF_CompilationUnitInfo cu{}; + + if (!cu.read(img, 0)) { + return false; + } + + int ptrsize = cu.address_size; debug = debug_; @@ -280,7 +285,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { case DW_FORM_line_strp: { - size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); + size_t offset = cu.isDWARF64() ? RD8(p) : RD4(p); state.include_dirs.push_back((const char*)img.debug_line_str.byteAt(offset)); break; } @@ -322,7 +327,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { case DW_FORM_line_strp: { - size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); + size_t offset = cu.isDWARF64() ? RD8(p) : RD4(p); fname.file_name = (const char*)img.debug_line_str.byteAt(offset); break; } diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index f712b29..12fc84d 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -34,6 +34,41 @@ void DIECursor::setContext(PEImage* img_, DebugLevel debug_) debug = debug_; } +byte* DWARF_CompilationUnitInfo::read(const PEImage& img, unsigned long *off) +{ + byte* ptr = img.debug_info.byteAt(*off); + + start_ptr = ptr; + cu_offset = *off; + is_dwarf64 = false; + base_address = 0; + unit_length = RD4(ptr); + if (unit_length == ~0) { + // DWARF64 doesn't make sense in the context of the PE format since the + // section size is limited to 32 bits. + fprintf(stderr, "%s:%d: WARNING: DWARF64 compilation unit at offset=%x is not supported\n", __FUNCTION__, __LINE__, + cu_offset); + + uint64_t len64 = RD8(ptr); + *off = img.debug_info.sectOff(ptr + (intptr_t)len64); + return nullptr; + } + + end_ptr = ptr + unit_length; + *off = img.debug_info.sectOff(end_ptr); + version = RD2(ptr); + if (version >= 5) { + fprintf(stderr, "%s:%d: WARNING: Unsupported dwarf version %d for compilation unit at offset=%x\n", __FUNCTION__, __LINE__, + version, cu_offset); + + return nullptr; + } + + debug_abbrev_offset = RD4(ptr); + address_size = *ptr++; + return ptr; +} + static Location mkInReg(unsigned reg) { Location l; @@ -61,7 +96,7 @@ static Location mkRegRel(int reg, int off) return l; } -Location decodeLocation(const PEImage& img, const DWARF_Attribute& attr, const Location* frameBase, int at) +Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase, int at) { static Location invalid = { Location::Invalid }; @@ -343,7 +378,66 @@ void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent) id.merge(idspec); } -DIECursor::DIECursor(DWARF_CompilationUnit* cu_, byte* ptr_) +LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off) + : parent(parent) + , end(parent.img->debug_loc.endByte()) + , ptr(parent.img->debug_loc.byteAt(off)) +{ +} + +bool LOCCursor::readNext(LOCEntry& entry) +{ + if (ptr >= end) + return false; + + if (parent.debug & DbgDwarfLocLists) + fprintf(stderr, "%s:%d: loclist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_loc.sectOff(ptr), parent.entryOff); + + entry.beg_offset = (unsigned long) parent.RDAddr(ptr); + entry.end_offset = (unsigned long) parent.RDAddr(ptr); + if (!entry.beg_offset && !entry.end_offset) + return false; + + DWARF_Attribute attr; + attr.type = Block; + attr.block.len = RD2(ptr); + attr.block.ptr = ptr; + entry.loc = decodeLocation(attr); + ptr += attr.expr.len; + return true; +} + +RangeCursor::RangeCursor(const DIECursor& parent, unsigned long off) + : parent(parent) + , end(parent.img->debug_ranges.endByte()) + , ptr(parent.img->debug_ranges.byteAt(off)) +{ +} + +bool RangeCursor::readNext(RangeEntry& entry) +{ + while (ptr < end) { + if (parent.debug & DbgDwarfRangeLists) + fprintf(stderr, "%s:%d: rangelist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_ranges.sectOff(ptr), parent.entryOff); + + entry.pclo = parent.RDAddr(ptr); + entry.pchi = parent.RDAddr(ptr); + if (!entry.pclo && !entry.pchi) + return false; + + if (entry.pclo >= entry.pchi) + continue; + + entry.addBase(parent.cu->base_address); + return true; + } + + return false; +} + +DIECursor::DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr_) { cu = cu_; ptr = ptr_; @@ -414,7 +508,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) if (level == -1) return false; // we were already at the end of the subtree - if (ptr >= ((byte*)cu + sizeof(cu->unit_length) + cu->unit_length)) + if (ptr >= cu->end_ptr) return false; // root of the tree does not have a null terminator, but we know the length id.entryPtr = ptr; @@ -447,7 +541,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) id.hasChild = *abbrev++; if (debug & DbgDwarfAttrRead) - fprintf(stderr, "%s:%d: offs=%d level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__, + fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__, entryOff, level, id.tag, id.code); int attr, form; @@ -488,11 +582,11 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_strp: a.type = String; a.string = (const char*)img->debug_str.byteAt(RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; case DW_FORM_flag: a.type = Flag; a.flag = (*ptr++ != 0); break; case DW_FORM_flag_present: a.type = Flag; a.flag = true; break; - case DW_FORM_ref1: a.type = Ref; a.ref = (byte*)cu + *ptr++; break; - case DW_FORM_ref2: a.type = Ref; a.ref = (byte*)cu + RD2(ptr); break; - case DW_FORM_ref4: a.type = Ref; a.ref = (byte*)cu + RD4(ptr); break; - case DW_FORM_ref8: a.type = Ref; a.ref = (byte*)cu + RD8(ptr); break; - case DW_FORM_ref_udata: a.type = Ref; a.ref = (byte*)cu + LEB128(ptr); break; + case DW_FORM_ref1: a.type = Ref; a.ref = cu->start_ptr + *ptr++; break; + case DW_FORM_ref2: a.type = Ref; a.ref = cu->start_ptr + RD2(ptr); break; + case DW_FORM_ref4: a.type = Ref; a.ref = cu->start_ptr + RD4(ptr); break; + case DW_FORM_ref8: a.type = Ref; a.ref = cu->start_ptr + RD8(ptr); break; + case DW_FORM_ref_udata: a.type = Ref; a.ref = cu->start_ptr + LEB128(ptr); break; case DW_FORM_ref_addr: a.type = Ref; a.ref = img->debug_info.byteAt(cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; case DW_FORM_ref_sig8: a.type = Invalid; ptr += 8; break; case DW_FORM_exprloc: a.type = ExprLoc; a.expr.len = LEB128(ptr); a.expr.ptr = ptr; ptr += a.expr.len; break; diff --git a/src/readDwarf.h b/src/readDwarf.h index bb78285..3bc7d09 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -17,10 +17,12 @@ enum DebugLevel : unsigned { DbgPdbTypes = 0x2, DbgPdbSyms = 0x4, DbgPdbLines = 0x8, - DbgDwarfTagRead = 0x10, - DbgDwarfAttrRead = 0x20, - DbgDwarfLocLists = 0x40, - DbgDwarfLines = 0x80 + DbgPdbContrib = 0x10, + DbgDwarfTagRead = 0x100, + DbgDwarfAttrRead = 0x200, + DbgDwarfLocLists = 0x400, + DbgDwarfRangeLists = 0x800, + DbgDwarfLines = 0x1000 }; DEFINE_ENUM_FLAG_OPERATORS(DebugLevel); @@ -57,7 +59,7 @@ inline int SLEB128(byte* &p) return x; } -inline unsigned int RD2(byte* &p) +inline unsigned short RD2(byte* &p) { unsigned int x = *p++; x |= *p++ << 8; @@ -122,17 +124,30 @@ struct DWARF_Attribute /////////////////////////////////////////////////////////////////////////////// -#include "pshpack1.h" - -struct DWARF_CompilationUnit +struct DWARF_CompilationUnitInfo { - unsigned int unit_length; // 12 byte in DWARF-64 + uint32_t unit_length; // 12 byte in DWARF-64 unsigned short version; - unsigned int debug_abbrev_offset; // 8 byte in DWARF-64 byte address_size; + byte unit_type; + unsigned int debug_abbrev_offset; // 8 byte in DWARF-64 - bool isDWARF64() const { return unit_length == ~0; } - int refSize() const { return unit_length == ~0 ? 8 : 4; } + // Value of the DW_AT_low_pc attribute for the current compilation unit. + // Specify the default base address for use in location lists and range + // lists. + uint32_t base_address; + + // Offset within the debug_info section + uint32_t cu_offset; + byte* start_ptr; + byte* end_ptr; + + bool is_dwarf64; + + byte* read(const PEImage& img, unsigned long *off); + + bool isDWARF64() const { return is_dwarf64; } + int refSize() const { return isDWARF64() ? 8 : 4; } }; struct DWARF_FileName @@ -260,6 +275,8 @@ struct DWARF_TypeForm unsigned int type, form; }; +#include "pshpack1.h" + struct DWARF_LineNumberProgramHeader { unsigned int unit_length; // 12 byte in DWARF-64 @@ -310,6 +327,8 @@ struct DWARF2_LineNumberProgramHeader // DWARF_FileNames file_names[] // zero byte terminated }; +#include "poppack.h" + struct DWARF_LineState { // hdr info @@ -372,7 +391,6 @@ struct DWARF_LineState } }; -#include "poppack.h" /////////////////////////////////////////////////////////////////////////////// @@ -398,13 +416,61 @@ struct Location bool is_regrel() const { return type == RegRel; } }; -typedef std::unordered_map, byte*> abbrevMap_t; +// Location list entry +class LOCEntry +{ +public: + unsigned long beg_offset; + unsigned long end_offset; + Location loc; +}; +// Location list cursor +class LOCCursor +{ +public: + LOCCursor(const DIECursor& parent, unsigned long off); + + const DIECursor& parent; + byte* end; + byte* ptr; + + bool readNext(LOCEntry& entry); +}; + +// Range list entry +class RangeEntry +{ +public: + uint64_t pclo; + uint64_t pchi; + + void addBase(uint64_t base) + { + pclo += base; + pchi += base; + } +}; + +// Range list cursor +class RangeCursor +{ +public: + RangeCursor(const DIECursor& parent, unsigned long off); + + const DIECursor& parent; + byte *end; + byte *ptr; + + bool readNext(RangeEntry& entry); +}; + +typedef std::unordered_map, byte*> abbrevMap_t; // Attempts to partially evaluate DWARF location expressions. // The only supported expressions are those, whose result may be represented // as either an absolute value, a register, or a register-relative address. -Location decodeLocation(const PEImage& img, const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0); +Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0); void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent); void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent); @@ -413,7 +479,7 @@ void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent); class DIECursor { public: - DWARF_CompilationUnit* cu; + DWARF_CompilationUnitInfo* cu; byte* ptr; unsigned int entryOff; int level; @@ -431,7 +497,7 @@ public: static void setContext(PEImage* img_, DebugLevel debug_); // Create a new DIECursor - DIECursor(DWARF_CompilationUnit* cu_, byte* ptr); + DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr); // Create a child DIECursor DIECursor(const DIECursor& parent, byte* ptr_); -- cgit v0.12 From f26b66e9563cc92efd3ced6cfb2e940cebe596d0 Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Tue, 30 Nov 2021 16:47:00 -0800 Subject: Read DWARF5 header and add DWARF5 constants. Handle the new DWARF5 compilation unit header and add new constants to dwarf.h. We still don't decode the new forms though. --- src/dwarf.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dwarf2pdb.cpp | 26 +++++++++++++++++--- src/dwarflines.cpp | 4 ++- src/readDwarf.cpp | 25 ++++++++++++++----- src/readDwarf.h | 13 +++++----- 5 files changed, 123 insertions(+), 17 deletions(-) diff --git a/src/dwarf.h b/src/dwarf.h index 1565eb3..4641c78 100644 --- a/src/dwarf.h +++ b/src/dwarf.h @@ -229,10 +229,32 @@ extern "C" { #define DW_FORM_flag_present 0x19 /* DWARF4 */ #define DW_FORM_strx 0x1a /* DWARF5 */ #define DW_FORM_addrx 0x1b /* DWARF5 */ +#define DW_FORM_ref_sup4 0x1c /* DWARF5 */ #define DW_FORM_strp_sup 0x1d /* DWARF5 */ #define DW_FORM_data16 0x1e /* DWARF5 */ #define DW_FORM_line_strp 0x1f /* DWARF5 */ #define DW_FORM_ref_sig8 0x20 /* DWARF4 */ +#define DW_FORM_implicit_const 0x21 /* DWARF5 */ +#define DW_FORM_loclistx 0x22 /* DWARF5 */ +#define DW_FORM_rnglistx 0x23 /* DWARF5 */ +#define DW_FORM_ref_sup8 0x24 /* DWARF5 */ +#define DW_FORM_strx1 0x25 /* DWARF5 */ +#define DW_FORM_strx2 0x26 /* DWARF5 */ +#define DW_FORM_strx3 0x27 /* DWARF5 */ +#define DW_FORM_strx4 0x28 /* DWARF5 */ +#define DW_FORM_addrx1 0x29 /* DWARF5 */ +#define DW_FORM_addrx2 0x2a /* DWARF5 */ +#define DW_FORM_addrx3 0x2b /* DWARF5 */ +#define DW_FORM_addrx4 0x2c /* DWARF5 */ + +#define DW_UT_compile 0x01 /* DWARF5 */ +#define DW_UT_type 0x02 /* DWARF5 */ +#define DW_UT_partial 0x03 /* DWARF5 */ +#define DW_UT_skeleton 0x04 /* DWARF5 */ +#define DW_UT_split_compile 0x05 /* DWARF5 */ +#define DW_UT_split_type 0x06 /* DWARF5 */ +#define DW_UT_lo_user 0x80 /* DWARF5 */ +#define DW_UT_hi_user 0xff /* DWARF5 */ #define DW_AT_sibling 0x01 #define DW_AT_location 0x02 @@ -331,6 +353,35 @@ extern "C" { #define DW_AT_const_expr 0x6c /* DWARF4 */ #define DW_AT_enum_class 0x6d /* DWARF4 */ #define DW_AT_linkage_name 0x6e /* DWARF4 */ +#define DW_AT_string_length_bit_size 0x6f /* DWARF5 */ +#define DW_AT_string_length_byte_size 0x70 /* DWARF5 */ +#define DW_AT_rank 0x71 /* DWARF5 */ +#define DW_AT_str_offsets_base 0x72 /* DWARF5 */ +#define DW_AT_addr_base 0x73 /* DWARF5 */ +#define DW_AT_rnglists_base 0x74 /* DWARF5 */ +#define DW_AT_dwo_name 0x76 /* DWARF5 */ +#define DW_AT_reference 0x77 /* DWARF5 */ +#define DW_AT_rvalue_reference 0x78 /* DWARF5 */ +#define DW_AT_macros 0x79 /* DWARF5 */ +#define DW_AT_call_all_calls 0x7a /* DWARF5 */ +#define DW_AT_call_all_source_calls 0x7b /* DWARF5 */ +#define DW_AT_call_all_tail_calls 0x7c /* DWARF5 */ +#define DW_AT_call_return_pc 0x7d /* DWARF5 */ +#define DW_AT_call_value 0x7e /* DWARF5 */ +#define DW_AT_call_origin 0x7f /* DWARF5 */ +#define DW_AT_call_parameter 0x80 /* DWARF5 */ +#define DW_AT_call_pc 0x81 /* DWARF5 */ +#define DW_AT_call_tail_call 0x82 /* DWARF5 */ +#define DW_AT_call_target 0x83 /* DWARF5 */ +#define DW_AT_call_target_clobbered 0x84 /* DWARF5 */ +#define DW_AT_call_data_location 0x85 /* DWARF5 */ +#define DW_AT_call_data_value 0x86 /* DWARF5 */ +#define DW_AT_noreturn 0x87 /* DWARF5 */ +#define DW_AT_alignment 0x88 /* DWARF5 */ +#define DW_AT_export_symbols 0x89 /* DWARF5 */ +#define DW_AT_deleted 0x8a /* DWARF5 */ +#define DW_AT_defaulted 0x8b /* DWARF5 */ +#define DW_AT_loclists_base 0x8c /* DWARF5 */ /* In extensions, we attempt to include the vendor extension in the name even when the vendor leaves it out. */ @@ -902,6 +953,27 @@ extern "C" { #define DW_LNE_lo_user 0x80 /* DWARF3 */ #define DW_LNE_hi_user 0xff /* DWARF3 */ +/* DWARF5 debug_rnglists entries */ +#define DW_RLE_end_of_list 0x00 +#define DW_RLE_base_addressx 0x01 +#define DW_RLE_startx_endx 0x02 +#define DW_RLE_startx_length 0x03 +#define DW_RLE_offset_pair 0x04 +#define DW_RLE_base_address 0x05 +#define DW_RLE_start_end 0x06 +#define DW_RLE_start_length 0x07 + +/* DWARF5 debug_loclists entries */ +#define DW_LLE_end_of_list 0x00 +#define DW_LLE_base_addressx 0x01 +#define DW_LLE_startx_endx 0x02 +#define DW_LLE_startx_length 0x03 +#define DW_LLE_offset_pair 0x04 +#define DW_LLE_default_location 0x05 +#define DW_LLE_base_address 0x06 +#define DW_LLE_start_end 0x07 +#define DW_LLE_start_length 0x08 + /* These are known values for DW_LNS_set_isa. */ #define DW_ISA_UNKNOWN 0 /* The following two are ARM specific. */ diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index 3750702..f53ae97 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -1294,16 +1294,26 @@ bool CV2PDB::mapTypes() while (off < img.debug_info.length) { DWARF_CompilationUnitInfo cu{}; - byte* ptr = cu.read(img, &off); + byte* ptr = cu.read(debug, img, &off); if (!ptr) continue; + if (cu.unit_type != DW_UT_compile) { + if (debug & DbgDwarfCompilationUnit) + fprintf(stderr, "%s:%d: skipping compilation unit offs=%x, unit_type=%d\n", __FUNCTION__, __LINE__, + cu.cu_offset, cu.unit_type); + + continue; + } + DIECursor cursor(&cu, ptr); DWARF_InfoData id; while (cursor.readNext(id)) { - //printf("0x%08x, level = %d, id.code = %d, id.tag = %d\n", - // (unsigned char*)cu + id.entryOff - (unsigned char*)img.debug_info, cursor.level, id.code, id.tag); + if (debug & DbgDwarfTagRead) + fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__, + cursor.entryOff, cursor.level, id.code, id.tag); + switch (id.tag) { case DW_TAG_base_type: @@ -1359,10 +1369,18 @@ bool CV2PDB::createTypes() while (off < img.debug_info.length) { DWARF_CompilationUnitInfo cu{}; - byte* ptr = cu.read(img, &off); + byte* ptr = cu.read(debug, img, &off); if (!ptr) continue; + if (cu.unit_type != DW_UT_compile) { + if (debug & DbgDwarfCompilationUnit) + fprintf(stderr, "%s:%d: skipping compilation unit offs=%x, unit_type=%d\n", __FUNCTION__, __LINE__, + cu.cu_offset, cu.unit_type); + + continue; + } + DIECursor cursor(&cu, ptr); DWARF_InfoData id; while (cursor.readNext(id)) diff --git a/src/dwarflines.cpp b/src/dwarflines.cpp index dd48995..13106e5 100644 --- a/src/dwarflines.cpp +++ b/src/dwarflines.cpp @@ -169,9 +169,11 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { + DWARF_CompilationUnitInfo cu{}; - if (!cu.read(img, 0)) { + unsigned long offs = 0; + if (!cu.read(debug_, img, &offs)) { return false; } diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 12fc84d..7f64ec9 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -34,7 +34,7 @@ void DIECursor::setContext(PEImage* img_, DebugLevel debug_) debug = debug_; } -byte* DWARF_CompilationUnitInfo::read(const PEImage& img, unsigned long *off) +byte* DWARF_CompilationUnitInfo::read(DebugLevel debug, const PEImage& img, unsigned long *off) { byte* ptr = img.debug_info.byteAt(*off); @@ -57,15 +57,25 @@ byte* DWARF_CompilationUnitInfo::read(const PEImage& img, unsigned long *off) end_ptr = ptr + unit_length; *off = img.debug_info.sectOff(end_ptr); version = RD2(ptr); - if (version >= 5) { + unit_type = DW_UT_compile; + if (version <= 4) { + debug_abbrev_offset = RD4(ptr); + address_size = *ptr++; + } else if (version == 5) { + unit_type = *ptr++; + address_size = *ptr++; + debug_abbrev_offset = RD4(ptr); + } else { fprintf(stderr, "%s:%d: WARNING: Unsupported dwarf version %d for compilation unit at offset=%x\n", __FUNCTION__, __LINE__, version, cu_offset); return nullptr; } - debug_abbrev_offset = RD4(ptr); - address_size = *ptr++; + if (debug & DbgDwarfCompilationUnit) + fprintf(stderr, "%s:%d: Reading compilation unit offs=%x, type=%d, ver=%d, addr_size=%d\n", __FUNCTION__, __LINE__, + cu_offset, unit_type, version, address_size); + return ptr; } @@ -539,7 +549,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) id.abbrev = abbrev; id.tag = LEB128(abbrev); id.hasChild = *abbrev++; - + if (debug & DbgDwarfAttrRead) fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__, entryOff, level, id.tag, id.code); @@ -591,7 +601,6 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_ref_sig8: a.type = Invalid; ptr += 8; break; case DW_FORM_exprloc: a.type = ExprLoc; a.expr.len = LEB128(ptr); a.expr.ptr = ptr; ptr += a.expr.len; break; case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = cu->isDWARF64() ? RD8(ptr) : RD4(ptr); break; - case DW_FORM_indirect: default: assert(false && "Unsupported DWARF attribute form"); return false; } @@ -722,6 +731,10 @@ byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode) { attr = LEB128(p); form = LEB128(p); + + // Implicit const forms have an extra constant value attached. + if (form == DW_FORM_implicit_const) + LEB128(p); } while (attr || form); } return 0; diff --git a/src/readDwarf.h b/src/readDwarf.h index 3bc7d09..1978bf0 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -18,11 +18,12 @@ enum DebugLevel : unsigned { DbgPdbSyms = 0x4, DbgPdbLines = 0x8, DbgPdbContrib = 0x10, - DbgDwarfTagRead = 0x100, - DbgDwarfAttrRead = 0x200, - DbgDwarfLocLists = 0x400, - DbgDwarfRangeLists = 0x800, - DbgDwarfLines = 0x1000 + DbgDwarfCompilationUnit = 0x100, + DbgDwarfTagRead = 0x200, + DbgDwarfAttrRead = 0x400, + DbgDwarfLocLists = 0x800, + DbgDwarfRangeLists = 0x1000, + DbgDwarfLines = 0x2000 }; DEFINE_ENUM_FLAG_OPERATORS(DebugLevel); @@ -144,7 +145,7 @@ struct DWARF_CompilationUnitInfo bool is_dwarf64; - byte* read(const PEImage& img, unsigned long *off); + byte* read(DebugLevel debug, const PEImage& img, unsigned long *off); bool isDWARF64() const { return is_dwarf64; } int refSize() const { return isDWARF64() ? 8 : 4; } -- cgit v0.12 From c7c7dfe5c665dd78d6d5f027c79516c201421ee1 Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Tue, 30 Nov 2021 21:29:57 -0800 Subject: read all dwarf5 main DIE forms Fill in Dwarf_InfoData for all DWARF5 forms excluding the new rnglists and loclists representations. --- src/readDwarf.cpp | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/readDwarf.h | 23 ++++++++++++- 2 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 7f64ec9..415dac8 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -506,6 +506,64 @@ DIECursor DIECursor::getSubtreeCursor() } } +const char Cv2PdbInvalidString[] = ""; + +const char* DIECursor::resolveIndirectString(uint32_t index) const +{ + if (!cu->str_offset_base) + { + fprintf(stderr, "ERROR: %s:%d: no string base for cu_offs=%x die_offs=%x\n", __FUNCTION__, __LINE__, + cu->cu_offset, entryOff); + return Cv2PdbInvalidString; + } + + byte* refAddr = cu->str_offset_base + index * refSize(); + return (const char*)img->debug_str.byteAt(RDref(refAddr)); +} + +uint32_t DIECursor::readIndirectAddr(uint32_t index) const +{ + if (!cu->addr_base) + { + fprintf(stderr, "ERROR: %s:%d: no addr base for cu_offs=%x die_offs=%x\n", __FUNCTION__, __LINE__, + cu->cu_offset, entryOff); + + return 0; + } + + byte* refAddr = cu->addr_base + index * refSize(); + return RDAddr(refAddr); +} + +uint32_t DIECursor::resolveIndirectSecPtr(uint32_t index, const SectionDescriptor &secDesc, byte *baseAddress) const +{ + if (!baseAddress) + { + fprintf(stderr, "ERROR: %s:%d: no base address in section %s for cu_offs=%x die_offs=%x\n", __FUNCTION__, __LINE__, + secDesc.name, cu->cu_offset, entryOff); + + return 0; + } + + byte* refAddr = baseAddress + index * refSize(); + byte* targetAddr = baseAddress + RDref(refAddr); + return (img->*(secDesc.pSec)).sectOff(targetAddr); +} + +static byte* getPointerInSection(const PEImage &img, const SectionDescriptor &secDesc, uint32_t offset) +{ + const PESection &peSec = img.*(secDesc.pSec); + + if (!peSec.isPresent() || offset >= peSec.length) + { + fprintf(stderr, "%s:%d: WARNING: offset %x is not valid in section %s\n", __FUNCTION__, __LINE__, + offset, secDesc.name); + return nullptr; + } + + return peSec.byteAt(offset); +} + bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) { id.clear(); @@ -577,7 +635,12 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) DWARF_Attribute a; switch (form) { - case DW_FORM_addr: a.type = Addr; a.addr = (unsigned long)RDsize(ptr, cu->address_size); break; + case DW_FORM_addr: a.type = Addr; a.addr = RDAddr(ptr); break; + case DW_FORM_addrx: a.type = Addr; a.addr = readIndirectAddr(LEB128(ptr)); break; + case DW_FORM_addrx1: + case DW_FORM_addrx2: + case DW_FORM_addrx3: + case DW_FORM_addrx4: a.type = Addr; a.addr = readIndirectAddr(RDsize(ptr, 1 + (form - DW_FORM_addrx1))); break; case DW_FORM_block: a.type = Block; a.block.len = LEB128(ptr); a.block.ptr = ptr; ptr += a.block.len; break; case DW_FORM_block1: a.type = Block; a.block.len = *ptr++; a.block.ptr = ptr; ptr += a.block.len; break; case DW_FORM_block2: a.type = Block; a.block.len = RD2(ptr); a.block.ptr = ptr; ptr += a.block.len; break; @@ -586,10 +649,19 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_data2: a.type = Const; a.cons = RD2(ptr); break; case DW_FORM_data4: a.type = Const; a.cons = RD4(ptr); break; case DW_FORM_data8: a.type = Const; a.cons = RD8(ptr); break; + case DW_FORM_data16: a.type = Block; a.block.len = 16; a.block.ptr = ptr; ptr += a.block.len; break; case DW_FORM_sdata: a.type = Const; a.cons = SLEB128(ptr); break; case DW_FORM_udata: a.type = Const; a.cons = LEB128(ptr); break; + case DW_FORM_implicit_const: a.type = Const; a.cons = LEB128(abbrev); break; case DW_FORM_string: a.type = String; a.string = (const char*)ptr; ptr += strlen(a.string) + 1; break; - case DW_FORM_strp: a.type = String; a.string = (const char*)img->debug_str.byteAt(RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; + case DW_FORM_strp: a.type = String; a.string = (const char*)img->debug_str.byteAt(RDref(ptr)); break; + case DW_FORM_line_strp: a.type = String; a.string = (const char*)img->debug_line_str.byteAt(RDref(ptr)); break; + case DW_FORM_strx: a.type = String; a.string = resolveIndirectString(LEB128(ptr)); break; + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: a.type = String; a.string = resolveIndirectString(RDsize(ptr, 1 + (form - DW_FORM_strx1))); break; + case DW_FORM_strp_sup: a.type = Invalid; assert(false && "Unsupported supplementary object"); ptr += refSize(); break; case DW_FORM_flag: a.type = Flag; a.flag = (*ptr++ != 0); break; case DW_FORM_flag_present: a.type = Flag; a.flag = true; break; case DW_FORM_ref1: a.type = Ref; a.ref = cu->start_ptr + *ptr++; break; @@ -597,10 +669,14 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_ref4: a.type = Ref; a.ref = cu->start_ptr + RD4(ptr); break; case DW_FORM_ref8: a.type = Ref; a.ref = cu->start_ptr + RD8(ptr); break; case DW_FORM_ref_udata: a.type = Ref; a.ref = cu->start_ptr + LEB128(ptr); break; - case DW_FORM_ref_addr: a.type = Ref; a.ref = img->debug_info.byteAt(cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; + case DW_FORM_ref_addr: a.type = Ref; a.ref = img->debug_info.byteAt(RDref(ptr)); break; case DW_FORM_ref_sig8: a.type = Invalid; ptr += 8; break; + case DW_FORM_ref_sup4: a.type = Invalid; assert(false && "Unsupported supplementary object"); ptr += 4; break; + case DW_FORM_ref_sup8: a.type = Invalid; assert(false && "Unsupported supplementary object"); ptr += 8; break; case DW_FORM_exprloc: a.type = ExprLoc; a.expr.len = LEB128(ptr); a.expr.ptr = ptr; ptr += a.expr.len; break; - case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = cu->isDWARF64() ? RD8(ptr) : RD4(ptr); break; + case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = RDref(ptr); break; + case DW_FORM_loclistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_loclists, cu->loclist_base); break; + case DW_FORM_rnglistx: a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_rnglists, cu->rnglist_base); break; default: assert(false && "Unsupported DWARF attribute form"); return false; } @@ -688,6 +764,19 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) id.has_artificial = true; id.is_artificial = true; break; + + case DW_AT_str_offsets_base: + cu->str_offset_base = getPointerInSection(*img, sec_desc_debug_str_offsets, a.sec_offset); + break; + case DW_AT_addr_base: + cu->addr_base = getPointerInSection(*img, sec_desc_debug_addr, a.sec_offset); + break; + case DW_AT_rnglists_base: + cu->rnglist_base = getPointerInSection(*img, sec_desc_debug_rnglists, a.sec_offset); + break; + case DW_AT_loclists_base: + cu->loclist_base = getPointerInSection(*img, sec_desc_debug_loclists, a.sec_offset); + break; } } diff --git a/src/readDwarf.h b/src/readDwarf.h index 1978bf0..67a5ed5 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -11,6 +11,7 @@ typedef unsigned char byte; class PEImage; class DIECursor; +struct SectionDescriptor; enum DebugLevel : unsigned { DbgBasic = 0x1, @@ -138,6 +139,18 @@ struct DWARF_CompilationUnitInfo // lists. uint32_t base_address; + // Indirect base address in .debug_addr for addrx forms + byte* addr_base; + + // Indirect base address in .debug_str_offsets for strx forms + byte* str_offset_base; + + // Indirect base address in .debug_loclists for loclistx forms + byte* loclist_base; + + // Indirect base address in the .debug_rnglists for rnglistx forms + byte* rnglist_base; + // Offset within the debug_info section uint32_t cu_offset; byte* start_ptr; @@ -148,7 +161,6 @@ struct DWARF_CompilationUnitInfo byte* read(DebugLevel debug, const PEImage& img, unsigned long *off); bool isDWARF64() const { return is_dwarf64; } - int refSize() const { return isDWARF64() ? 8 : 4; } }; struct DWARF_FileName @@ -526,6 +538,15 @@ public: return RD8(p); } + + unsigned long long RDref(byte* &ptr) const { return cu->isDWARF64() ? RD8(ptr) : RD4(ptr); } + int refSize() const { return cu->isDWARF64() ? 8 : 4; } + + // Obtain the address of a string for a strx form. + const char *resolveIndirectString(uint32_t index) const ; + uint32_t readIndirectAddr(uint32_t index) const ; + uint32_t resolveIndirectSecPtr(uint32_t index, const SectionDescriptor &secDesc, byte *baseAddress) const; + }; // iterate over DWARF debug_line information -- cgit v0.12 From 13e0840e109747378929bed7e5e59aa8a4eb9bcb Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Wed, 1 Dec 2021 13:38:29 -0800 Subject: DWARF5 rnglists and loclists Add support for the new opcode-based format for location lists and range lists. --- src/readDwarf.cpp | 189 +++++++++++++++++++++++++++++++++++++++++++++--------- src/readDwarf.h | 11 ++++ 2 files changed, 169 insertions(+), 31 deletions(-) diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 415dac8..b77a1d0 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -390,58 +390,185 @@ void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent) LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off) : parent(parent) - , end(parent.img->debug_loc.endByte()) - , ptr(parent.img->debug_loc.byteAt(off)) { + base = parent.cu->base_address; + isLocLists = (parent.cu->version >= 5); + + const PESection& sec = isLocLists ? parent.img->debug_loclists : parent.img->debug_loc; + ptr = sec.byteAt(off); + end = sec.endByte(); } bool LOCCursor::readNext(LOCEntry& entry) { - if (ptr >= end) - return false; + if (isLocLists) + { + if (parent.debug & DbgDwarfLocLists) + fprintf(stderr, "%s:%d: loclists off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_loclists.sectOff(ptr), parent.entryOff); + + auto readCountedLocation = [&entry](byte* &ptr) { + DWARF_Attribute attr; + attr.type = Block; + attr.block.len = LEB128(ptr); + attr.block.ptr = ptr; + ptr += attr.block.len; + entry.loc = decodeLocation(attr); + return true; + }; + + while (ptr < end) + { + byte type = *ptr++; + switch (type) + { + case DW_LLE_end_of_list: + return false; + case DW_LLE_base_addressx: + base = parent.readIndirectAddr(LEB128(ptr)); + continue; + case DW_LLE_startx_endx: + entry.beg_offset = parent.readIndirectAddr(LEB128(ptr)); + entry.end_offset = parent.readIndirectAddr(LEB128(ptr)); + return readCountedLocation(ptr); + case DW_LLE_startx_length: + entry.beg_offset = parent.readIndirectAddr(LEB128(ptr)); + entry.end_offset = entry.beg_offset + LEB128(ptr); + return readCountedLocation(ptr); + case DW_LLE_offset_pair: + entry.beg_offset = LEB128(ptr); + entry.end_offset = LEB128(ptr); + return readCountedLocation(ptr); + case DW_LLE_default_location: + entry = {}; + entry.isDefault = true; + return readCountedLocation(ptr); + case DW_LLE_base_address: + base = parent.RDAddr(ptr); + continue; + case DW_LLE_start_end: + entry.beg_offset = parent.RDAddr(ptr); + entry.end_offset = parent.RDAddr(ptr); + return readCountedLocation(ptr); + case DW_LLE_start_length: + entry.beg_offset = parent.RDAddr(ptr); + entry.end_offset = entry.beg_offset + LEB128(ptr); + return readCountedLocation(ptr); + default: + fprintf(stderr, "ERROR: %s:%d: unknown loclists entry %d at offs=%x die_offs=%x\n", __FUNCTION__, __LINE__, + type, parent.img->debug_loclists.sectOff(ptr - 1), parent.entryOff); - if (parent.debug & DbgDwarfLocLists) - fprintf(stderr, "%s:%d: loclist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, - parent.img->debug_loc.sectOff(ptr), parent.entryOff); + assert(false && "unknown rnglist opcode"); + return false; + } + } + } + else + { + if (ptr >= end) + return false; - entry.beg_offset = (unsigned long) parent.RDAddr(ptr); - entry.end_offset = (unsigned long) parent.RDAddr(ptr); - if (!entry.beg_offset && !entry.end_offset) - return false; + if (parent.debug & DbgDwarfLocLists) + fprintf(stderr, "%s:%d: loclist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_loc.sectOff(ptr), parent.entryOff); - DWARF_Attribute attr; - attr.type = Block; - attr.block.len = RD2(ptr); - attr.block.ptr = ptr; - entry.loc = decodeLocation(attr); - ptr += attr.expr.len; - return true; + entry.beg_offset = (unsigned long) parent.RDAddr(ptr); + entry.end_offset = (unsigned long) parent.RDAddr(ptr); + if (!entry.beg_offset && !entry.end_offset) + return false; + + DWARF_Attribute attr; + attr.type = Block; + attr.block.len = RD2(ptr); + attr.block.ptr = ptr; + entry.loc = decodeLocation(attr); + ptr += attr.expr.len; + return true; + } + + return false; } RangeCursor::RangeCursor(const DIECursor& parent, unsigned long off) : parent(parent) - , end(parent.img->debug_ranges.endByte()) - , ptr(parent.img->debug_ranges.byteAt(off)) { + base = parent.cu->base_address; + isRngLists = (parent.cu->version >= 5); + + const PESection& sec = isRngLists ? parent.img->debug_rnglists : parent.img->debug_ranges; + ptr = sec.byteAt(off); + end = sec.endByte(); } bool RangeCursor::readNext(RangeEntry& entry) { - while (ptr < end) { + if (isRngLists) + { if (parent.debug & DbgDwarfRangeLists) - fprintf(stderr, "%s:%d: rangelist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, - parent.img->debug_ranges.sectOff(ptr), parent.entryOff); + fprintf(stderr, "%s:%d: rnglists off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_rnglists.sectOff(ptr), parent.entryOff); - entry.pclo = parent.RDAddr(ptr); - entry.pchi = parent.RDAddr(ptr); - if (!entry.pclo && !entry.pchi) - return false; + while (ptr < end) + { + byte type = *ptr++; + switch (type) + { + case DW_RLE_end_of_list: + return false; + case DW_RLE_base_addressx: + base = parent.readIndirectAddr(LEB128(ptr)); + continue; + case DW_RLE_startx_endx: + entry.pclo = parent.readIndirectAddr(LEB128(ptr)); + entry.pchi = parent.readIndirectAddr(LEB128(ptr)); + return true; + case DW_RLE_startx_length: + entry.pclo = parent.readIndirectAddr(LEB128(ptr)); + entry.pchi = entry.pclo + LEB128(ptr); + return true; + case DW_RLE_offset_pair: + entry.pclo = LEB128(ptr); + entry.pchi = LEB128(ptr); + entry.addBase(base); + return true; + case DW_RLE_base_address: + base = parent.RDAddr(ptr); + continue; + case DW_RLE_start_end: + entry.pclo = parent.RDAddr(ptr); + entry.pchi = parent.RDAddr(ptr); + return true; + case DW_RLE_start_length: + entry.pclo = parent.RDAddr(ptr); + entry.pchi = entry.pclo + LEB128(ptr); + return true; + default: + fprintf(stderr, "ERROR: %s:%d: unknown rnglists entry %d at offs=%x die_offs=%x\n", __FUNCTION__, __LINE__, + type, parent.img->debug_rnglists.sectOff(ptr - 1), parent.entryOff); - if (entry.pclo >= entry.pchi) - continue; + assert(false && "unknown rnglist opcode"); + return false; + } + } + } + else + { + while (ptr < end) { + if (parent.debug & DbgDwarfRangeLists) + fprintf(stderr, "%s:%d: rangelist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__, + parent.img->debug_ranges.sectOff(ptr), parent.entryOff); + + entry.pclo = parent.RDAddr(ptr); + entry.pchi = parent.RDAddr(ptr); + if (!entry.pclo && !entry.pchi) + return false; - entry.addBase(parent.cu->base_address); - return true; + if (entry.pclo >= entry.pchi) + continue; + + entry.addBase(parent.cu->base_address); + return true; + } } return false; diff --git a/src/readDwarf.h b/src/readDwarf.h index 67a5ed5..f9593f9 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -435,7 +435,14 @@ class LOCEntry public: unsigned long beg_offset; unsigned long end_offset; + bool isDefault; Location loc; + + void addBase(uint32_t base) + { + beg_offset += base; + end_offset += base; + } }; // Location list cursor @@ -445,8 +452,10 @@ public: LOCCursor(const DIECursor& parent, unsigned long off); const DIECursor& parent; + uint32_t base; byte* end; byte* ptr; + bool isLocLists; bool readNext(LOCEntry& entry); }; @@ -472,8 +481,10 @@ public: RangeCursor(const DIECursor& parent, unsigned long off); const DIECursor& parent; + uint32_t base; byte *end; byte *ptr; + bool isRngLists; bool readNext(RangeEntry& entry); }; -- cgit v0.12 From 35c615b756322f7755a5fbe1846129be8b9fa936 Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Wed, 1 Dec 2021 20:55:09 -0800 Subject: Fix file names in dwarf5 line tables --- src/dwarflines.cpp | 90 ++++++++++++++++++++++++++++++++++++++++++------------ src/readDwarf.h | 9 +++--- 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/dwarflines.cpp b/src/dwarflines.cpp index 13106e5..11380ae 100644 --- a/src/dwarflines.cpp +++ b/src/dwarflines.cpp @@ -5,6 +5,7 @@ // see file LICENSE for further details // +#include #include "PEImage.h" #include "mspdb.h" #include "dwarf.h" @@ -25,6 +26,13 @@ bool isRelativePath(const std::string& s) return true; } +static void addTrailingSlash(std::string& dir) +{ + // Make sure dirs always end in a trailing slash + if (!dir.size() || (dir.back() != '\\' && dir.back() != '/')) + dir += '\\'; +} + static int cmpAdr(const void* s1, const void* s2) { const mspdb::LineInfoEntry* e1 = (const mspdb::LineInfoEntry*) s1; @@ -63,7 +71,7 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat const DWARF_FileName* dfn; if(state.lineInfo_file == 0) - dfn = state.file_ptr; + dfn = &state.cur_file; else if(state.lineInfo_file > 0 && state.lineInfo_file <= state.files.size()) dfn = &state.files[state.lineInfo_file - 1]; else @@ -74,8 +82,6 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat dfn->dir_index > 0 && dfn->dir_index <= state.include_dirs.size()) { std::string dir = state.include_dirs[dfn->dir_index - 1]; - if(dir.length() > 0 && dir[dir.length() - 1] != '/' && dir[dir.length() - 1] != '\\') - dir.append("\\"); fname = dir + fname; } for(size_t i = 0; i < fname.length(); i++) @@ -241,7 +247,6 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) DWARF_LineState state; state.seg_offset = img.getImageBase() + img.getSection(img.text.secNo).VirtualAddress; - DWARF_FileName fname; if (hdr->version <= 4) { // dirs @@ -249,16 +254,19 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { if(*p == 0) break; - state.include_dirs.push_back((const char*) p); - p += strlen((const char*) p) + 1; + state.include_dirs.emplace_back((const char*) p); + auto &dir = state.include_dirs.back(); + p += dir.size() + 1; + addTrailingSlash(dir); } p++; // files while(p < end && *p) { + DWARF_FileName fname; fname.read(p); - state.files.push_back(fname); + state.files.emplace_back(std::move(fname)); } p++; } @@ -278,31 +286,48 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) unsigned int directories_count = LEB128(p); for (int o = 0; o < directories_count; o++) { - for (int i = 0; i < directory_entry_format_count; i++) + for (const auto &typeForm : directory_entry_format) { - switch (directory_entry_format[i].type) + switch (typeForm.type) { case DW_LNCT_path: - switch (directory_entry_format[i].form) + { + switch (typeForm.form) { case DW_FORM_line_strp: { size_t offset = cu.isDWARF64() ? RD8(p) : RD4(p); - state.include_dirs.push_back((const char*)img.debug_line_str.byteAt(offset)); + state.include_dirs.emplace_back((const char*)img.debug_line_str.byteAt(offset)); break; } case DW_FORM_string: - state.include_dirs.push_back((const char*)p); + state.include_dirs.emplace_back((const char*)p); p += strlen((const char*)p) + 1; break; default: + fprintf(stderr, "%s:%d: ERROR: invalid form=%d for path lineHdrOffs=%x\n", __FUNCTION__, __LINE__, + typeForm.form, off); + return false; } + + auto& dir = state.include_dirs.back(); + + // Relative dirs are relative to the first directory + // in the table. + if (state.include_dirs.size() > 1 && isRelativePath(dir)) + dir = state.include_dirs.front() + dir; + + addTrailingSlash(dir); break; + } case DW_LNCT_directory_index: case DW_LNCT_timestamp: case DW_LNCT_size: default: + fprintf(stderr, "%s:%d: ERROR: unexpected type=%d form=%d for directory path lineHdrOffs=%x\n", __FUNCTION__, __LINE__, + typeForm.type, typeForm.form, off); + return false; } } @@ -320,12 +345,14 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) unsigned int file_names_count = LEB128(p); for (int o = 0; o < file_names_count; o++) { - for (int i = 0; i < file_name_entry_format_count; i++) + DWARF_FileName fname; + + for (const auto &typeForm : file_name_entry_format) { - switch (file_name_entry_format[i].type) + switch (typeForm.type) { case DW_LNCT_path: - switch (directory_entry_format[i].form) + switch (typeForm.form) { case DW_FORM_line_strp: { @@ -338,22 +365,41 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) p += strlen((const char*)p) + 1; break; default: + fprintf(stderr, "%s:%d: ERROR: invalid form=%d for path lineHdrOffs=%x\n", __FUNCTION__, __LINE__, + typeForm.form, off); + + + assert(false, "invalid path form"); return false; } break; case DW_LNCT_directory_index: - if (file_name_entry_format[i].form == DW_FORM_udata) - fname.dir_index = LEB128(p); + // bias the directory index by 1 since _flushDWARFLines + // will check for 0 and subtract one (which is + // useful for DWARF4). + if (typeForm.form == DW_FORM_udata) + { + fname.dir_index = LEB128(p) + 1; + } else + { + fprintf(stderr, "%s:%d: ERROR: invalid form=%d for directory index lineHdrOffs=%x\n", __FUNCTION__, __LINE__, + typeForm.form, off); + return false; + } break; case DW_LNCT_timestamp: case DW_LNCT_size: default: + fprintf(stderr, "%s:%d: ERROR: unexpected type=%d form=%d for file path lineHdrOffs=%x\n", __FUNCTION__, __LINE__, + typeForm.type, typeForm.form, off); + return false; } } - state.files.push_back(fname); + + state.files.emplace_back(std::move(fname)); } } @@ -406,8 +452,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) break; } case DW_LNE_define_file: - fname.read(p); - state.file_ptr = &fname; + state.cur_file.read(p); state.file = 0; break; case DW_LNE_set_discriminator: @@ -433,6 +478,11 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) break; case DW_LNS_set_file: state.file = LEB128(p); + // DWARF5 numbers all files starting at zero. We will + // subtract one in _flushDWARFLines when indexing the files + // array. + if (hdr->version >= 5) + state.file += 1; break; case DW_LNS_set_column: state.column = LEB128(p); diff --git a/src/readDwarf.h b/src/readDwarf.h index f9593f9..5e1db99 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -165,7 +165,7 @@ struct DWARF_CompilationUnitInfo struct DWARF_FileName { - const char* file_name; + std::string file_name; unsigned int dir_index; unsigned long lastModification; unsigned long fileLength; @@ -173,7 +173,7 @@ struct DWARF_FileName void read(byte* &p) { file_name = (const char*)p; - p += strlen((const char*)p) + 1; + p += file_name.size() + 1; dir_index = LEB128(p); lastModification = LEB128(p); fileLength = LEB128(p); @@ -345,7 +345,7 @@ struct DWARF2_LineNumberProgramHeader struct DWARF_LineState { // hdr info - std::vector include_dirs; + std::vector include_dirs; std::vector files; unsigned long address; @@ -362,7 +362,7 @@ struct DWARF_LineState unsigned int discriminator; // not part of the "documented" state - DWARF_FileName* file_ptr; + DWARF_FileName cur_file; unsigned long seg_offset; unsigned long section; unsigned long last_addr; @@ -371,7 +371,6 @@ struct DWARF_LineState DWARF_LineState() { - file_ptr = nullptr; seg_offset = 0x400000; last_addr = 0; lineInfo_file = 0; -- cgit v0.12 From 13a21e913a1bdeaba5eeba4c66f7ae08e0596a1a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 3 Dec 2021 11:58:38 +0100 Subject: ci: verify DWARF5 symbols Technically, this does not validate the DWARF5 parsing completely, but it looks for the `main` symbol to show up as expected when compiling with whatever GCC version MSYS2/Git for Windows uses (which is typically very close to the latest GCC version available). Signed-off-by: Johannes Schindelin --- .github/workflows/build-and-test.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 9cfcbcc..2372a71 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -52,4 +52,9 @@ jobs: gcc -g -o hello.exe hello.c && bin/${{env.BUILD_CONFIGURATION}}*/cv2pdb.exe hello.exe world.exe && - ls -l hello* world* + + ls -l hello* world* && + + curl -Lo cvdump.exe https://raw.githubusercontent.com/microsoft/microsoft-pdb/HEAD/cvdump/cvdump.exe && + ./cvdump.exe world.pdb >world.cvdump && + grep '^S_PUB32: .*, Flags: 00000000, main$' world.cvdump -- cgit v0.12