From 28d3fad41817310b8fc2fd6d9e46137249d65fad Mon Sep 17 00:00:00 2001 From: Neeraj Singh Date: Mon, 29 Nov 2021 12:44:16 -0800 Subject: Refactor PE image section handling. Add debug logging. Encapsulate dwarf-related PE sections into the PESection class. Remove some unused sections. Add helpers for common section operations. Move the context set via DIECursor::setContext to be static members of the class and add a debug context there. Add a standard method of enabling debug logging across the dwarf and PDB code. --- src/PEImage.cpp | 67 +++++++++++---------------------- src/PEImage.h | 108 +++++++++++++++++++++++++++++++++++++++++++++-------- src/cv2pdb.cpp | 20 ++++++++-- src/cv2pdb.h | 4 +- src/dwarf2pdb.cpp | 78 ++++++++++++++++++++++---------------- src/dwarflines.cpp | 50 +++++++++++-------------- src/main.cpp | 20 +++++++--- src/readDwarf.cpp | 85 +++++++++++++++++++++++------------------ src/readDwarf.h | 30 +++++++++++++-- 9 files changed, 291 insertions(+), 171 deletions(-) diff --git a/src/PEImage.cpp b/src/PEImage.cpp index 99c6776..247d514 100644 --- a/src/PEImage.cpp +++ b/src/PEImage.cpp @@ -35,19 +35,6 @@ PEImage::PEImage(const TCHAR* iname) , hdr32(0) , hdr64(0) , fd(-1) -, debug_aranges(0) -, debug_pubnames(0) -, debug_pubtypes(0) -, debug_info(0), debug_info_length(0) -, debug_abbrev(0), debug_abbrev_length(0) -, debug_line(0), debug_line_length(0) -, debug_frame(0), debug_frame_length(0) -, debug_str(0) -, debug_loc(0), debug_loc_length(0) -, debug_ranges(0), debug_ranges_length(0) -, codeSegment(0) -, linesSegment(-1) -, reloc(0), reloc_length(0) , nsec(0) , nsym(0) , symtable(0) @@ -470,6 +457,15 @@ static DWORD sizeInImage(const IMAGE_SECTION_HEADER& sec) return sec.SizeOfRawData < sec.Misc.VirtualSize ? sec.SizeOfRawData : sec.Misc.VirtualSize; } +void PEImage::initSec(PESection& peSec, int secNo) const +{ + auto &imgSec = sec[secNo]; + + peSec.length = sizeInImage(imgSec); + peSec.base = DPV(imgSec.PointerToRawData, peSec.length); + peSec.secNo = secNo; +} + void PEImage::initDWARFSegments() { for(int s = 0; s < nsec; s++) @@ -480,49 +476,30 @@ void PEImage::initDWARFSegments() int off = strtol(name + 1, 0, 10); name = strtable + off; } - if(strcmp(name, ".debug_aranges") == 0) - debug_aranges = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_pubnames") == 0) - debug_pubnames = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_pubtypes") == 0) - debug_pubtypes = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_info") == 0) - debug_info = DPV(sec[s].PointerToRawData, debug_info_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_abbrev") == 0) - debug_abbrev = DPV(sec[s].PointerToRawData, debug_abbrev_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_line") == 0) - debug_line = DPV(sec[linesSegment = s].PointerToRawData, debug_line_length = sizeInImage(sec[s])); - if (strcmp(name, ".debug_line_str") == 0) - debug_line_str = DPV(sec[s].PointerToRawData, debug_line_str_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_frame") == 0) - debug_frame = DPV(sec[s].PointerToRawData, debug_frame_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_str") == 0) - debug_str = DPV(sec[s].PointerToRawData, sizeInImage(sec[s])); - if(strcmp(name, ".debug_loc") == 0) - debug_loc = DPV(sec[s].PointerToRawData, debug_loc_length = sizeInImage(sec[s])); - if(strcmp(name, ".debug_ranges") == 0) - debug_ranges = DPV(sec[s].PointerToRawData, debug_ranges_length = sizeInImage(sec[s])); - if(strcmp(name, ".reloc") == 0) - reloc = DPV(sec[s].PointerToRawData, reloc_length = sizeInImage(sec[s])); - if(strcmp(name, ".text") == 0) - codeSegment = s; + + for (const SectionDescriptor *sec_desc : sec_descriptors) { + if (!strcmp(name, sec_desc->name)) { + PESection& peSec = this->*(sec_desc->pSec); + initSec(peSec, s); + } + } } } bool PEImage::relocateDebugLineInfo(unsigned int img_base) { - if(!reloc || !reloc_length) + if(!reloc.isPresent()) return true; - char* relocbase = reloc; - char* relocend = reloc + reloc_length; + byte* relocbase = reloc.startByte(); + byte* relocend = reloc.endByte(); while(relocbase < relocend) { unsigned int virtadr = *(unsigned int *) relocbase; unsigned int chksize = *(unsigned int *) (relocbase + 4); char* p = RVA (virtadr, 1); - if(p >= debug_line && p < debug_line + debug_line_length) + if(debug_line.isPtrInside(p)) { for (unsigned int w = 8; w < chksize; w += 2) { @@ -536,7 +513,7 @@ bool PEImage::relocateDebugLineInfo(unsigned int img_base) } } } - if(chksize == 0 || chksize >= reloc_length) + if(chksize == 0 || chksize >= reloc.length) break; relocbase += chksize; } @@ -545,7 +522,7 @@ bool PEImage::relocateDebugLineInfo(unsigned int img_base) int PEImage::getRelocationInLineSegment(unsigned int offset) const { - return getRelocationInSegment(linesSegment, offset); + return getRelocationInSegment(debug_line.secNo, offset); } int PEImage::getRelocationInSegment(int segment, unsigned int offset) const diff --git a/src/PEImage.h b/src/PEImage.h index dd170bf..a3e3cd7 100644 --- a/src/PEImage.h +++ b/src/PEImage.h @@ -15,6 +15,8 @@ struct OMFDirHeader; struct OMFDirEntry; +typedef unsigned char byte; + struct SymbolInfo { int seg; @@ -22,6 +24,72 @@ struct SymbolInfo bool dllimport; }; +struct PESection +{ + byte* base; + unsigned long length; + unsigned int secNo; + + PESection() + : base(0) + , length(0) + , secNo(0) + { + } + + byte* byteAt(unsigned int off) const + { + return base + off; + } + + byte* startByte() const + { + return byteAt(0); + } + + byte* endByte() const + { + return byteAt(0) + length; + } + + bool isPresent() const + { + return base && length; + } + + bool isPtrInside(const void *p) const + { + auto pInt = (uintptr_t)p; + return (pInt >= (uintptr_t)base && pInt < (uintptr_t)base + length); + } + + unsigned int sectOff(void *p) const + { + return (unsigned int)((uintptr_t)p - (uintptr_t)base); + } +}; + +// Define the list of interesting PE sections in one place so that we can +// generate definitions needed to populate our pointers and reference each +// section. + +#define SECTION_LIST() \ + EXPANDSEC(debug_addr) \ + EXPANDSEC(debug_info) \ + EXPANDSEC(debug_abbrev) \ + EXPANDSEC(debug_line) \ + EXPANDSEC(debug_line_str) \ + EXPANDSEC(debug_frame) \ + EXPANDSEC(debug_str) \ + EXPANDSEC(debug_str_offsets) \ + EXPANDSEC(debug_loc) \ + EXPANDSEC(debug_loclists) \ + EXPANDSEC(debug_ranges) \ + EXPANDSEC(debug_rnglists) \ + EXPANDSEC(reloc) \ + EXPANDSEC(text) + + #define IMGHDR(x) (hdr32 ? hdr32->x : hdr64->x) class PEImage : public LastError @@ -70,6 +138,7 @@ public: bool save(const TCHAR* oname); bool replaceDebugSection (const void* data, int datalen, bool initCV); + void initSec(PESection& peSec, int secNo) const; bool initCVPtr(bool initDbgDir); bool initDbgPtr(bool initDbgDir); bool initDWARFPtr(bool initDbgDir); @@ -77,7 +146,7 @@ public: void initDWARFSegments(); bool relocateDebugLineInfo(unsigned int img_base); - bool hasDWARF() const { return debug_line != 0; } + bool hasDWARF() const { return debug_line.isPresent(); } bool isX64() const { return x64; } bool isDBG() const { return dbgfile; } @@ -131,23 +200,30 @@ private: public: //dwarf - char* debug_aranges; - char* debug_pubnames; - char* debug_pubtypes; - char* debug_info; unsigned long debug_info_length; - char* debug_abbrev; unsigned long debug_abbrev_length; - char* debug_line; unsigned long debug_line_length; - char* debug_line_str; unsigned long debug_line_str_length; - char* debug_frame; unsigned long debug_frame_length; - char* debug_str; - char* debug_loc; unsigned long debug_loc_length; - char* debug_ranges; unsigned long debug_ranges_length; - char* reloc; unsigned long reloc_length; - - int linesSegment; - int codeSegment; +#define EXPANDSEC(name) PESection name; + SECTION_LIST() +#undef EXPANDSEC + int cv_base; }; +struct SectionDescriptor { + const char *name; + PESection PEImage::* pSec; +}; + +#define EXPANDSEC(name) constexpr SectionDescriptor sec_desc_##name { "." #name, &PEImage::name }; +SECTION_LIST() +#undef EXPANDSEC + +constexpr const SectionDescriptor *sec_descriptors[] = +{ +#define EXPANDSEC(name) &sec_desc_##name, + SECTION_LIST() +#undef EXPANDSEC +}; + + +#undef SECTION_LIST #endif //__PEIMAGE_H__ diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp index bb4727b..696cb74 100644 --- a/src/cv2pdb.cpp +++ b/src/cv2pdb.cpp @@ -17,7 +17,7 @@ static const int typePrefix = 4; -CV2PDB::CV2PDB(PEImage& image) +CV2PDB::CV2PDB(PEImage& image, DebugLevel debug_) : img(image), pdb(0), dbi(0), tpi(0), ipi(0), libraries(0), rsds(0), rsdsLen(0), modules(0), globmod(0) , segMap(0), segMapDesc(0), segFrame2Index(0), globalTypeHeader(0) , globalTypes(0), cbGlobalTypes(0), allocGlobalTypes(0) @@ -28,7 +28,7 @@ CV2PDB::CV2PDB(PEImage& image) , srcLineStart(0), srcLineSections(0) , pointerTypes(0) , Dversion(2) -, debug(false) +, debug(debug_) , classEnumType(0), ifaceEnumType(0), cppIfaceEnumType(0), structEnumType(0) , classBaseType(0), ifaceBaseType(0), cppIfaceBaseType(0), structBaseType(0) , emptyFieldListType(0) @@ -149,7 +149,7 @@ bool CV2PDB::openPDB(const TCHAR* pdbname, const TCHAR* pdbref) if (!initMsPdb ()) return setError("cannot load PDB helper DLL"); - if (debug) + if (debug & DbgBasic) { extern HMODULE modMsPdb; char modpath[260]; @@ -737,6 +737,9 @@ int CV2PDB::countNestedTypes(const codeview_reftype* fieldlist, int type) int CV2PDB::addAggregate(codeview_type* dtype, bool clss, int n_element, int fieldlist, int property, int derived, int vshape, int structlen, const char* name, const char* uniquename) { + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding aggregate %s -> fieldlist:%d\n", __FUNCTION__, __LINE__, name, fieldlist); + dtype->struct_v2.id = clss ? (v3 ? LF_CLASS_V3 : LF_CLASS_V2) : (v3 ? LF_STRUCTURE_V3 : LF_STRUCTURE_V2); dtype->struct_v2.n_element = n_element; dtype->struct_v2.fieldlist = fieldlist; @@ -771,6 +774,9 @@ int CV2PDB::addStruct(codeview_type* dtype, int n_element, int fieldlist, int pr int CV2PDB::addEnum(codeview_type* dtype, int count, int fieldlist, int property, int type, const char*name) { + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding enum %s -> fieldlist:%d\n", __FUNCTION__, __LINE__, name, fieldlist); + dtype->enumeration_v2.id = (v3 ? LF_ENUM_V3 : LF_ENUM_V2); dtype->enumeration_v2.count = count; dtype->enumeration_v2.fieldlist = fieldlist; @@ -2074,6 +2080,9 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation) if(type == 0x78) basetype = 0x75; // dchar type not understood by debugger, use uint instead + if (debug & DbgPdbTypes) + fprintf(stderr, "%s:%d: adding typedef %s -> %d\n", __FUNCTION__, __LINE__, name, type); + int typedefType; if(useTypedefEnum) { @@ -2981,6 +2990,9 @@ bool CV2PDB::addPublics() char symname[kMaxNameLen]; dsym2c((BYTE*)sym->data_v1.p_name.name, sym->data_v1.p_name.namelen, symname, sizeof(symname)); int type = translateType(sym->data_v1.symtype); + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: AddPublic2 %s\n", __FUNCTION__, __LINE__, (const char *)symname); + if (mod) rc = mod->AddPublic2(symname, sym->data_v1.segment, sym->data_v1.offset, type); else @@ -2997,6 +3009,8 @@ bool CV2PDB::addPublics() bool CV2PDB::initGlobalSymbols() { + if (debug & DbgBasic) + fprintf(stderr, "%s:%d, countEntries: %d\n", __FUNCTION__, __LINE__, (int)countEntries); for (int m = 0; m < countEntries; m++) { OMFDirEntry* entry = img.getCVEntry(m); diff --git a/src/cv2pdb.h b/src/cv2pdb.h index ae72ca1..26b2143 100644 --- a/src/cv2pdb.h +++ b/src/cv2pdb.h @@ -30,7 +30,7 @@ class CFIIndex; class CV2PDB : public LastError { public: - CV2PDB(PEImage& image); + CV2PDB(PEImage& image, DebugLevel debug); ~CV2PDB(); bool cleanup(bool commit); @@ -265,7 +265,7 @@ public: bool useGlobalMod; bool thisIsNotRef; bool v3; - bool debug; + DebugLevel debug; const char* lastError; int srcLineSections; diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp index bc030e6..f58067d 100644 --- a/src/dwarf2pdb.cpp +++ b/src/dwarf2pdb.cpp @@ -230,8 +230,8 @@ class CFICursor { public: CFICursor(const PEImage& img) - : beg((byte*)img.debug_frame) - , end((byte*)img.debug_frame + img.debug_frame_length) + : beg(img.debug_frame.startByte()) + , end(img.debug_frame.endByte()) , ptr(beg) { default_address_size = img.isX64() ? 8 : 4; @@ -489,7 +489,7 @@ Location findBestCFA(const PEImage& img, const CFIIndex* index, unsigned int pcl { bool x64 = img.isX64(); Location ebp = { Location::RegRel, x64 ? 6 : 5, x64 ? 16 : 8 }; - if (!img.debug_frame) + if (!img.debug_frame.isPresent()) return ebp; byte *fde_ptr = index->lookup(pclo, pchi); @@ -515,7 +515,6 @@ Location findBestCFA(const PEImage& img, const CFIIndex* index, unsigned int pcl class LOCEntry { public: - byte* ptr; unsigned long beg_offset; unsigned long end_offset; Location loc; @@ -529,8 +528,8 @@ class LOCCursor public: LOCCursor(const PEImage& image, unsigned long off) : img (image) - , end((byte*)img.debug_loc + img.debug_loc_length) - , ptr((byte*)img.debug_loc + off) + , end(img.debug_loc.endByte()) + , ptr(img.debug_loc.byteAt(off)) { default_address_size = img.isX64() ? 8 : 4; } @@ -682,7 +681,7 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo) dsym->block_v3.end = 0; // destSize + sizeof(dsym->block_v3) + 12; dsym->block_v3.length = id.pchi - id.pclo; dsym->block_v3.offset = id.pclo - codeSegOff; - dsym->block_v3.segment = img.codeSegment + 1; + dsym->block_v3.segment = img.text.secNo + 1; dsym->block_v3.name[0] = 0; int len = sizeof(dsym->block_v3); for (; len & 3; len++) @@ -701,6 +700,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE checkUdtSymbolAlloc(100 + kMaxNameLen); + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: Adding a proc: %s at %x\n", __FUNCTION__, __LINE__, procid.name, pclo); + // GLOBALPROC codeview_symbol*cvs = (codeview_symbol*) (udtSymbols + cbUdtSymbols); cvs->proc_v2.id = v3 ? S_GPROC_V3 : S_GPROC_V2; @@ -711,7 +713,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE cvs->proc_v2.debug_start = pclo - pclo; cvs->proc_v2.debug_end = pchi - pclo; cvs->proc_v2.offset = pclo; - cvs->proc_v2.segment = img.codeSegment + 1; + cvs->proc_v2.segment = img.text.secNo + 1; cvs->proc_v2.proctype = 0; // translateType(sym->proc_v1.proctype); cvs->proc_v2.flags = 0; @@ -793,8 +795,8 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, DWARF_CompilationUnit* cu, DIE id.pchi = 0; // TODO: handle base address selection - byte *r = (byte *)img.debug_ranges + id.ranges; - byte *rend = (byte *)img.debug_ranges + img.debug_ranges_length; + byte *r = img.debug_ranges.byteAt(id.ranges); + byte *rend = img.debug_ranges.endByte(); while (r < rend) { uint64_t pclo, pchi; @@ -1122,7 +1124,7 @@ bool CV2PDB::addDWARFTypes() checkUdtSymbolAlloc(100); int prefix = 4; - DWORD* ddata = new DWORD [img.debug_info_length/4]; // large enough + DWORD* ddata = new DWORD [img.debug_info.length/4]; // large enough unsigned char *data = (unsigned char*) (ddata + prefix); unsigned int off = 0; unsigned int len; @@ -1131,7 +1133,7 @@ bool CV2PDB::addDWARFTypes() // SSEARCH codeview_symbol* cvs = (codeview_symbol*) (data + off); cvs->ssearch_v1.id = S_SSEARCH_V1; - cvs->ssearch_v1.segment = img.codeSegment + 1; + cvs->ssearch_v1.segment = img.text.secNo + 1; cvs->ssearch_v1.offset = 0; len = sizeof(cvs->ssearch_v1); for (; len & (align-1); len++) @@ -1350,9 +1352,9 @@ bool CV2PDB::mapTypes() { int typeID = nextUserType; unsigned long off = 0; - while (off < img.debug_info_length) + while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)(img.debug_info + off); + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); DWARF_InfoData id; @@ -1396,6 +1398,9 @@ bool CV2PDB::mapTypes() off += sizeof(cu->unit_length) + cu->unit_length; } + if (debug & DbgBasic) + fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size()); + nextDwarfType = typeID; return true; } @@ -1407,17 +1412,21 @@ bool CV2PDB::createTypes() int typeID = nextUserType; int pointerAttr = img.isX64() ? 0x1000C : 0x800A; + if (debug & DbgBasic) + fprintf(stderr, "%s:%d: createTypes()\n", __FUNCTION__, __LINE__); + unsigned long off = 0; - while (off < img.debug_info_length) + while (off < img.debug_info.length) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)(img.debug_info + off); + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.byteAt(off); DIECursor cursor(cu, (byte*)cu + sizeof(DWARF_CompilationUnit)); DWARF_InfoData id; while (cursor.readNext(id)) { - //printf("0x%08x, level = %d, id.code = %d, id.tag = %d\n", - // (unsigned char*)cu + id.entryOff - (unsigned char*)img.debug_info, cursor.level, id.code, id.tag); + if (debug & DbgDwarfTagRead) + fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__, + cursor.entryOff, cursor.level, id.code, id.tag); if (id.abstract_origin) mergeAbstractOrigin(id, cu); @@ -1497,8 +1506,8 @@ bool CV2PDB::createTypes() else if (id.ranges != ~0) { entry_point = ~0; - byte* r = (byte*)img.debug_ranges + id.ranges; - byte* rend = (byte*)img.debug_ranges + img.debug_ranges_length; + byte* r = (byte*)img.debug_ranges.byteAt(id.ranges); + byte* rend = (byte*)img.debug_ranges.endByte(); while (r < rend) { uint64_t pclo, pchi; @@ -1524,7 +1533,12 @@ bool CV2PDB::createTypes() } if (entry_point) - mod->AddPublic2(id.name, img.codeSegment + 1, entry_point - codeSegOff, 0); + { + if (debug & DbgPdbSyms) + fprintf(stderr, "%s:%d: Adding a public: %s at %x\n", __FUNCTION__, __LINE__, id.name, entry_point); + + mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0); + } } if (id.pclo && id.pchi) @@ -1555,10 +1569,10 @@ bool CV2PDB::createTypes() #if !FULL_CONTRIB if (id.dir && id.name) { - if (id.ranges > 0 && id.ranges < img.debug_ranges_length) + if (id.ranges > 0 && id.ranges < img.debug_ranges.length) { - unsigned char* r = (unsigned char*)img.debug_ranges + id.ranges; - unsigned char* rend = (unsigned char*)img.debug_ranges + img.debug_ranges_length; + unsigned char* r = img.debug_ranges.byteAt(id.ranges); + unsigned char* rend = img.debug_ranges.endByte(); while (r < rend) { unsigned long pclo = RD4(r); @@ -1644,10 +1658,10 @@ bool CV2PDB::createTypes() bool CV2PDB::createDWARFModules() { - if(!img.debug_info) + if(!img.debug_info.isPresent()) return setError("no .debug_info section found"); - codeSegOff = img.getImageBase() + img.getSection(img.codeSegment).VirtualAddress; + codeSegOff = img.getImageBase() + img.getSection(img.text.secNo).VirtualAddress; mspdb::Mod* mod = globalMod(); for (int s = 0; s < img.countSections(); s++) @@ -1662,7 +1676,7 @@ bool CV2PDB::createDWARFModules() #if FULL_CONTRIB // we use a single global module, so we can simply add the whole text segment int segFlags = 0x60101020; // 0x40401040, 0x60500020; // TODO - int s = img.codeSegment; + int s = img.text.secNo; int pclo = 0; // img.getImageBase() + img.getSection(s).VirtualAddress; int pchi = pclo + img.getSection(s).Misc.VirtualSize; int rc = mod->AddSecContrib(s + 1, pclo, pchi - pclo, segFlags); @@ -1682,7 +1696,7 @@ bool CV2PDB::createDWARFModules() appendComplex(0x52, 0x42, 12, "creal"); } - DIECursor::setContext(&img); + DIECursor::setContext(&img, debug); countEntries = 0; if (!mapTypes()) @@ -1725,10 +1739,10 @@ bool CV2PDB::createDWARFModules() bool CV2PDB::addDWARFLines() { - if(!img.debug_line) + if(!img.debug_line.isPresent()) return setError("no .debug_line section found"); - if (!interpretDWARFLines(img, globalMod())) + if (!interpretDWARFLines(img, globalMod(), debug)) return setError("cannot add line number info to module"); return true; @@ -1739,7 +1753,7 @@ bool CV2PDB::addDWARFPublics() mspdb::Mod* mod = globalMod(); int type = 0; - int rc = mod->AddPublic2("public_all", img.codeSegment + 1, 0, 0x1000); + int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, 0x1000); if (rc <= 0) return setError("cannot add public"); return true; @@ -1759,7 +1773,7 @@ bool CV2PDB::writeDWARFImage(const TCHAR* opath) void CV2PDB::build_cfi_index() { - if (img.debug_frame == NULL) + if (!img.debug_frame.isPresent()) return; cfi_index = new CFIIndex(img); } diff --git a/src/dwarflines.cpp b/src/dwarflines.cpp index b061f79..2e0570d 100644 --- a/src/dwarflines.cpp +++ b/src/dwarflines.cpp @@ -10,6 +10,8 @@ #include "dwarf.h" #include "readDwarf.h" +static DebugLevel debug; + bool isRelativePath(const std::string& s) { if(s.length() < 1) @@ -57,12 +59,8 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat // throw away invalid lines (mostly due to "set address to 0") state.lineInfo.resize(0); return true; - //return false; } -// if(saddr >= 0x4000) -// return true; - const DWARF_FileName* dfn; if(state.lineInfo_file == 0) dfn = state.file_ptr; @@ -92,14 +90,8 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat return true; } #if 1 - bool dump = false; // (fname == "cvtest.d"); //qsort(&state.lineInfo[0], state.lineInfo.size(), sizeof(state.lineInfo[0]), cmpAdr); -#if 0 - printf("%s:\n", fname.c_str()); - for(size_t ln = 0; ln < state.lineInfo.size(); ln++) - printf(" %08x: %4d\n", state.lineInfo[ln].offset + 0x401000, state.lineInfo[ln].line); -#endif - + int rc = 1; unsigned int low_offset = state.lineInfo[0].offset; unsigned short low_line = state.lineInfo[0].line; @@ -120,9 +112,11 @@ bool _flushDWARFLines(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& stat // This subtraction can underflow to (unsigned)-1 if this info is only for a single instruction, but AddLines will immediately increment it to 0, so this is fine. Not underflowing this can cause the debugger to ignore other line info for address ranges that include this address. unsigned int address_range_length = high_offset - low_offset; - if (dump) - printf("AddLines(%08x+%04x, Line=%4d+%3d, %s)\n", low_offset, address_range_length, low_line, - state.lineInfo.size(), fname.c_str()); + if (debug & DbgPdbLines) + fprintf(stderr, "%s:%d: AddLines(%08x+%04x, Line=%4d+%3d, %s)\n", __FUNCTION__, __LINE__, + low_offset, address_range_length, low_line, + (unsigned int)state.lineInfo.size(), fname.c_str()); + rc = mod->AddLines(fname.c_str(), segIndex + 1, low_offset, address_range_length, low_offset, low_line, (unsigned char*)&state.lineInfo[0], state.lineInfo.size() * sizeof(state.lineInfo[0])); @@ -147,10 +141,6 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) if (state.end_sequence) return _flushDWARFLines(img, mod, state); -#if 0 - const char* fname = (state.file == 0 ? state.file_ptr->file_name : state.files[state.file - 1].file_name); - printf("Adr:%08x Line: %5d File: %s\n", state.address, state.line, fname); -#endif if (state.address < state.seg_offset) return true; mspdb::LineInfoEntry entry; @@ -177,15 +167,17 @@ bool addLineInfo(const PEImage& img, mspdb::Mod* mod, DWARF_LineState& state) return true; } -bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) +bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug_) { - DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info; + DWARF_CompilationUnit* cu = (DWARF_CompilationUnit*)img.debug_info.startByte(); int ptrsize = cu ? cu->address_size : 4; + debug = debug_; + DWARF_LineNumberProgramHeader hdr5; - for(unsigned long off = 0; off < img.debug_line_length; ) + for(unsigned long off = 0; off < img.debug_line.length; ) { - DWARF_LineNumberProgramHeader* hdrver = (DWARF_LineNumberProgramHeader*) (img.debug_line + off); + DWARF_LineNumberProgramHeader* hdrver = (DWARF_LineNumberProgramHeader*)img.debug_line.byteAt(off); int length = hdrver->unit_length; if(length < 0) break; @@ -226,6 +218,10 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) unsigned char* p = (unsigned char*) hdrver + hdrlength; unsigned char* end = (unsigned char*) hdrver + length; + if (debug & DbgDwarfLines) + fprintf(stderr, "%s:%d: LineNumberProgramHeader offs=%x ver=%d\n", __FUNCTION__, __LINE__, + off, hdr->version); + std::vector opcode_lengths; opcode_lengths.resize(hdr->opcode_base); if (hdr->opcode_base > 0) @@ -236,7 +232,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) } DWARF_LineState state; - state.seg_offset = img.getImageBase() + img.getSection(img.codeSegment).VirtualAddress; + state.seg_offset = img.getImageBase() + img.getSection(img.text.secNo).VirtualAddress; DWARF_FileName fname; if (hdr->version <= 4) @@ -285,7 +281,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_FORM_line_strp: { size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); - state.include_dirs.push_back(img.debug_line_str + offset); + state.include_dirs.push_back((const char*)img.debug_line_str.byteAt(offset)); break; } case DW_FORM_string: @@ -327,7 +323,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_FORM_line_strp: { size_t offset = cu->isDWARF64() ? RD8(p) : RD4(p); - fname.file_name = img.debug_line_str + offset; + fname.file_name = (const char*)img.debug_line_str.byteAt(offset); break; } case DW_FORM_string: @@ -387,8 +383,6 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) switch(excode) { case DW_LNE_end_sequence: - if((char*)p - img.debug_line >= 0xe4e0) - p = p; state.end_sequence = true; state.last_addr = state.address; if(!addLineInfo(img, mod, state)) @@ -398,7 +392,7 @@ bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod) case DW_LNE_set_address: { if (!mod && state.section == -1) - state.section = img.getRelocationInLineSegment((char*)p - img.debug_line); + state.section = img.getRelocationInLineSegment(img.debug_line.sectOff(p)); unsigned long adr = ptrsize == 8 ? RD8(p) : RD4(p); state.address = adr; state.op_index = 0; diff --git a/src/main.cpp b/src/main.cpp index ced7aac..91b300a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,6 +21,8 @@ double #define T_strcpy wcscpy #define T_strcat wcscat #define T_strstr wcsstr +#define T_strncmp wcsncmp +#define T_strtoul wcstoul #define T_strtod wcstod #define T_strrchr wcsrchr #define T_unlink _wremove @@ -34,6 +36,8 @@ double #define T_strcpy strcpy #define T_strcat strcat #define T_strstr strstr +#define T_strncmp strncmp +#define T_strtoul strtoul #define T_strtod strtod #define T_strrchr strrchr #define T_unlink unlink @@ -120,7 +124,7 @@ int T_main(int argc, TCHAR* argv[]) { double Dversion = 2.072; const TCHAR* pdbref = 0; - bool debug = false; + DebugLevel debug = DebugLevel{}; CoInitialize(nullptr); @@ -138,8 +142,15 @@ int T_main(int argc, TCHAR* argv[]) demangleSymbols = false; else if (argv[0][1] == 'e') useTypedefEnum = true; - else if (argv[0][1] == 'd' && argv[0][2] == 'e' && argv[0][3] == 'b') // deb[ug] - debug = true; + else if (!T_strncmp(&argv[0][1], TEXT("debug"), 5)) // debug[level] + { + debug = (DebugLevel)T_strtoul(&argv[0][6], 0, 0); + if (!debug) { + debug = DbgBasic; + } + + fprintf(stderr, "Debug set to %x\n", debug); + } else if (argv[0][1] == 's' && argv[0][2]) dotReplacementChar = (char)argv[0][2]; else if (argv[0][1] == 'p' && argv[0][2]) @@ -182,9 +193,8 @@ int T_main(int argc, TCHAR* argv[]) img = &dbg; } - CV2PDB cv2pdb(*img); + CV2PDB cv2pdb(*img, debug); cv2pdb.Dversion = Dversion; - cv2pdb.debug = debug; cv2pdb.initLibraries(); TCHAR* outname = argv[1]; diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp index 652e063..4d35cf2 100644 --- a/src/readDwarf.cpp +++ b/src/readDwarf.cpp @@ -1,8 +1,6 @@ #include "readDwarf.h" #include -#include #include -#include #include "PEImage.h" #include "dwarf.h" @@ -11,6 +9,31 @@ extern "C" { #include "mscvpdb.h" } + +// declare hasher for pair +namespace std +{ +template +struct hash> +{ + size_t operator()(const std::pair& t) const + { + return std::hash()(t.first) ^ std::hash()(t.second); + } +}; +} + +PEImage* DIECursor::img; +abbrevMap_t DIECursor::abbrevMap; +DebugLevel DIECursor::debug; + +void DIECursor::setContext(PEImage* img_, DebugLevel debug_) +{ + img = img_; + abbrevMap.clear(); + debug = debug_; +} + static Location mkInReg(unsigned reg) { Location l; @@ -320,31 +343,6 @@ void mergeSpecification(DWARF_InfoData& id, DWARF_CompilationUnit* cu) id.merge(idspec); } -// declare hasher for pair -namespace std -{ - template - struct hash> - { - size_t operator()(const std::pair& t) const - { - return std::hash()(t.first) ^ std::hash()(t.second); - } - }; -} - -typedef std::unordered_map, byte*> abbrevMap_t; - -static PEImage* img; -static abbrevMap_t abbrevMap; - -void DIECursor::setContext(PEImage* img_) -{ - img = img_; - abbrevMap.clear(); -} - - DIECursor::DIECursor(DWARF_CompilationUnit* cu_, byte* ptr_) { cu = cu_; @@ -415,7 +413,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) return false; // root of the tree does not have a null terminator, but we know the length id.entryPtr = ptr; - id.entryOff = ptr - (byte*)cu; + entryOff = img->debug_info.sectOff(ptr); id.code = LEB128(ptr); if (id.code == 0) { @@ -432,13 +430,20 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) } byte* abbrev = getDWARFAbbrev(cu->debug_abbrev_offset, id.code); - assert(abbrev); - if (!abbrev) + if (!abbrev) { + fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__, + id.code, entryOff); + assert(abbrev); return false; + } id.abbrev = abbrev; id.tag = LEB128(abbrev); id.hasChild = *abbrev++; + + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: offs=%d level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__, + entryOff, level, id.tag, id.code); int attr, form; for (;;) @@ -449,8 +454,16 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) if (attr == 0 && form == 0) break; - while (form == DW_FORM_indirect) + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: offs=%x, attr=%d, form=%d\n", __FUNCTION__, __LINE__, + img->debug_info.sectOff(ptr), attr, form); + + while (form == DW_FORM_indirect) { form = LEB128(ptr); + if (debug & DbgDwarfAttrRead) + fprintf(stderr, "%s:%d: attr=%d, form=%d\n", __FUNCTION__, __LINE__, + attr, form); + } DWARF_Attribute a; switch (form) @@ -467,7 +480,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_sdata: a.type = Const; a.cons = SLEB128(ptr); break; case DW_FORM_udata: a.type = Const; a.cons = LEB128(ptr); break; case DW_FORM_string: a.type = String; a.string = (const char*)ptr; ptr += strlen(a.string) + 1; break; - case DW_FORM_strp: a.type = String; a.string = (const char*)(img->debug_str + RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; + case DW_FORM_strp: a.type = String; a.string = (const char*)img->debug_str.byteAt(RDsize(ptr, cu->isDWARF64() ? 8 : 4)); break; case DW_FORM_flag: a.type = Flag; a.flag = (*ptr++ != 0); break; case DW_FORM_flag_present: a.type = Flag; a.flag = true; break; case DW_FORM_ref1: a.type = Ref; a.ref = (byte*)cu + *ptr++; break; @@ -475,7 +488,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) case DW_FORM_ref4: a.type = Ref; a.ref = (byte*)cu + RD4(ptr); break; case DW_FORM_ref8: a.type = Ref; a.ref = (byte*)cu + RD8(ptr); break; case DW_FORM_ref_udata: a.type = Ref; a.ref = (byte*)cu + LEB128(ptr); break; - case DW_FORM_ref_addr: a.type = Ref; a.ref = (byte*)img->debug_info + (cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; + case DW_FORM_ref_addr: a.type = Ref; a.ref = img->debug_info.byteAt(cu->isDWARF64() ? RD8(ptr) : RD4(ptr)); break; case DW_FORM_ref_sig8: a.type = Invalid; ptr += 8; break; case DW_FORM_exprloc: a.type = ExprLoc; a.expr.len = LEB128(ptr); a.expr.ptr = ptr; ptr += a.expr.len; break; case DW_FORM_sec_offset: a.type = SecOffset; a.sec_offset = cu->isDWARF64() ? RD8(ptr) : RD4(ptr); break; @@ -578,7 +591,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull) byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode) { - if (!img->debug_abbrev) + if (!img->debug_abbrev.isPresent()) return 0; std::pair key = std::make_pair(off, findcode); @@ -588,8 +601,8 @@ byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode) return it->second; } - byte* p = (byte*)img->debug_abbrev + off; - byte* end = (byte*)img->debug_abbrev + img->debug_abbrev_length; + byte* p = img->debug_abbrev.byteAt(off); + byte* end = img->debug_abbrev.endByte(); while (p < end) { int code = LEB128(p); diff --git a/src/readDwarf.h b/src/readDwarf.h index 7ad56e8..41724f6 100644 --- a/src/readDwarf.h +++ b/src/readDwarf.h @@ -1,13 +1,30 @@ #ifndef __READDWARF_H__ #define __READDWARF_H__ +#include #include #include #include +#include #include "mspdb.h" +class PEImage; + typedef unsigned char byte; +enum DebugLevel : unsigned { + DbgBasic = 0x1, + DbgPdbTypes = 0x2, + DbgPdbSyms = 0x4, + DbgPdbLines = 0x8, + DbgDwarfTagRead = 0x10, + DbgDwarfAttrRead = 0x20, + DbgDwarfLocLists = 0x40, + DbgDwarfLines = 0x80 +}; + +DEFINE_ENUM_FLAG_OPERATORS(DebugLevel); + inline unsigned int LEB128(byte* &p) { unsigned int x = 0; @@ -138,7 +155,6 @@ struct DWARF_FileName struct DWARF_InfoData { byte* entryPtr; - unsigned entryOff; // offset in the cu int code; byte* abbrev; int tag; @@ -382,7 +398,8 @@ struct Location bool is_regrel() const { return type == RegRel; } }; -class PEImage; +typedef std::unordered_map, byte*> abbrevMap_t; + // Attempts to partially evaluate DWARF location expressions. // The only supported expressions are those, whose result may be represented @@ -398,15 +415,20 @@ class DIECursor public: DWARF_CompilationUnit* cu; byte* ptr; + unsigned int entryOff; int level; bool hasChild; // indicates whether the last read DIE has children byte* sibling; + static PEImage *img; + static abbrevMap_t abbrevMap; + static DebugLevel debug; + byte* getDWARFAbbrev(unsigned off, unsigned findcode); public: - static void setContext(PEImage* img_); + static void setContext(PEImage* img_, DebugLevel debug_); // Create a new DIECursor DIECursor(DWARF_CompilationUnit* cu_, byte* ptr); @@ -429,6 +451,6 @@ public: // iterate over DWARF debug_line information // if mod is null, print them out, otherwise add to module -bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod); +bool interpretDWARFLines(const PEImage& img, mspdb::Mod* mod, DebugLevel debug = DebugLevel{}); #endif -- cgit v0.12