From 3723eb072efb1a2f74c0330bd2dabc0de22844a5 Mon Sep 17 00:00:00 2001 From: Dimitri van Heesch Date: Sat, 23 Jan 2021 19:07:41 +0100 Subject: Refactoring: Move xml parser to a separate directory --- CMakeLists.txt | 1 + addon/doxyapp/CMakeLists.txt | 1 + addon/doxyparse/CMakeLists.txt | 1 + libxml/CMakeLists.txt | 19 ++ libxml/xml.h | 90 ++++++++ libxml/xml.l | 496 +++++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 5 +- src/layout.cpp | 5 +- src/tagreader.cpp | 3 +- src/xml.h | 74 ------ src/xml.l | 484 ---------------------------------------- 11 files changed, 615 insertions(+), 564 deletions(-) create mode 100644 libxml/CMakeLists.txt create mode 100644 libxml/xml.h create mode 100644 libxml/xml.l delete mode 100644 src/xml.h delete mode 100644 src/xml.l diff --git a/CMakeLists.txt b/CMakeLists.txt index fafb56e..7f4a904 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,7 @@ add_subdirectory(libmd5) add_subdirectory(liblodepng) add_subdirectory(libmscgen) add_subdirectory(libversion) +add_subdirectory(libxml) add_subdirectory(qtools) add_subdirectory(vhdlparser) add_subdirectory(src) diff --git a/addon/doxyapp/CMakeLists.txt b/addon/doxyapp/CMakeLists.txt index 707fded..9213e22 100644 --- a/addon/doxyapp/CMakeLists.txt +++ b/addon/doxyapp/CMakeLists.txt @@ -38,6 +38,7 @@ target_link_libraries(doxyapp doxymain qtools md5 +xml lodepng mscgen doxygen_version diff --git a/addon/doxyparse/CMakeLists.txt b/addon/doxyparse/CMakeLists.txt index fe2f2c2..7a422a1 100644 --- a/addon/doxyparse/CMakeLists.txt +++ b/addon/doxyparse/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries(doxyparse doxymain qtools md5 +xml lodepng mscgen doxygen_version diff --git a/libxml/CMakeLists.txt b/libxml/CMakeLists.txt new file mode 100644 index 0000000..96c5653 --- /dev/null +++ b/libxml/CMakeLists.txt @@ -0,0 +1,19 @@ +include_directories( + ${PROJECT_SOURCE_DIR}/libxml +) + +add_custom_command( + COMMAND ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/src/scan_states.py ${PROJECT_SOURCE_DIR}/libxml/xml.l > ${GENERATED_SRC}/xml.l.h + DEPENDS ${PROJECT_SOURCE_DIR}/src/scan_states.py ${PROJECT_SOURCE_DIR}/libxml/xml.l + OUTPUT ${GENERATED_SRC}/xml.l.h +) +set_source_files_properties(${GENERATED_SRC}/xml.l.h PROPERTIES GENERATED 1) + +FLEX_TARGET(xml xml.l ${GENERATED_SRC}/xml.cpp COMPILE_FLAGS "${LEX_FLAGS}") + +add_library(xml +${GENERATED_SRC}/xml.cpp +${GENERATED_SRC}/xml.l.h +) + + diff --git a/libxml/xml.h b/libxml/xml.h new file mode 100644 index 0000000..0708d34 --- /dev/null +++ b/libxml/xml.h @@ -0,0 +1,90 @@ +#ifndef XML_H +#define XML_H + +/****************************************************************************** + * + * Copyright (C) 1997-2021 by Dimitri van Heesch. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation under the terms of the GNU General Public License is hereby + * granted. No representations are made about the suitability of this software + * for any purpose. It is provided "as is" without express or implied warranty. + * See the GNU General Public License for more details. + * + * Documents produced by Doxygen are derivative works derived from the + * input used in their production; they are not affected by this license. + * + */ + +#include +#include +#include +#include + +/*! @brief Event handlers that can installed by the client and called while parsing a XML document. + */ +class XMLHandlers +{ + public: + using Attributes = std::unordered_map; + using StartDocType = void(); + using EndDocType = void(); + using StartElementType = void(const std::string &,const Attributes &); + using EndElementType = void(const std::string &); + using ErrorType = void(const std::string,int,const std::string &); + using CharsType = void(const std::string &); + + std::function startDocument; /**< handler invoked at the start of the document */ + std::function endDocument; /**< handler invoked at the end of the document */ + std::function startElement; /**< handler invoked when an opening tag has been found */ + std::function endElement; /**< handler invoked when a closing tag has been found */ + std::function characters; /**< handler invoked when content between tags has been found */ + std::function error; /**< handler invoked when the parser encounters an error */ + + static std::string value(const Attributes &attrib,const std::string &key) + { + auto it = attrib.find(key); + if (it!=attrib.end()) + { + return it->second; + } + return ""; + } +}; + +class XMLLocator +{ + public: + virtual ~XMLLocator() {} + virtual int lineNr() const = 0; + virtual std::string fileName() const = 0; +}; + +/*! Very basic SAX style parser to parse XML documents. */ +class XMLParser : public XMLLocator +{ + public: + /*! Creates an instance of the parser object. Different instances can run on different + * threads without interference. + * + * @param handlers The event handlers passed by the client. + */ + XMLParser(const XMLHandlers &handlers); + /*! Destructor */ + ~XMLParser(); + + /*! Parses a file gives the contents of the file as a string. + * @param fileName the name of the file, used for error reporting. + * @param inputString the contents of the file as a zero terminated UTF-8 string. + * @param debugEnable indicates if debugging via -d lex is enabled or not. + */ + void parse(const char *fileName,const char *inputString,bool debugEnabled); + + private: + virtual int lineNr() const override; + virtual std::string fileName() const override; + struct Private; + std::unique_ptr p; +}; + +#endif diff --git a/libxml/xml.l b/libxml/xml.l new file mode 100644 index 0000000..ac58882 --- /dev/null +++ b/libxml/xml.l @@ -0,0 +1,496 @@ +/****************************************************************************** + * + * Copyright (C) 1997-2020 by Dimitri van Heesch. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation under the terms of the GNU General Public License is hereby + * granted. No representations are made about the suitability of this software + * for any purpose. It is provided "as is" without express or implied warranty. + * See the GNU General Public License for more details. + * + * Documents produced by Doxygen are derivative works derived from the + * input used in their production; they are not affected by this license. + * + */ +/****************************************************************************** + * Minimal flex based parser for XML + ******************************************************************************/ + +%option never-interactive +%option prefix="xmlYY" +%option reentrant +%option extra-type="struct xmlYY_state *" +%option 8bit noyywrap +%top{ +#include +} + +%{ + +#include +#include +#include +#include "xml.h" +//#include "message.h" + +#define YY_NEVER_INTERACTIVE 1 +#define YY_NO_INPUT 1 +#define YY_NO_UNISTD_H 1 + +struct xmlYY_state +{ + std::string fileName; + int lineNr = 1; + const char * inputString = 0; //!< the code fragment as text + yy_size_t inputPosition = 0; //!< read offset during parsing + std::string name; + bool isEnd = false; + bool selfClose = false; + std::string data; + std::string attrValue; + std::string attrName; + XMLHandlers::Attributes attrs; + XMLHandlers handlers; + int cdataContext; + int commentContext; + char stringChar; + std::vector xpath; +}; + +#if USE_STATE2STRING +static const char *stateToString(int state); +#endif + +static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size); +static void initElement(yyscan_t yyscanner); +static void addCharacters(yyscan_t yyscanner); +static void addElement(yyscan_t yyscanner); +static void addAttribute(yyscan_t yyscanner); +static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len); +static void reportError(yyscan_t yyscanner, const std::string &msg); +static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len); + +#undef YY_INPUT +#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size); + +%} + +NL (\r\n|\r|\n) +SP [ \t\r\n]+ +OPEN {SP}?"<" +OPENSPECIAL {SP}?""{NL}? +CLOSESPECIAL "?>"{NL}? +NAMESTART [:A-Za-z\200-\377_] +NAMECHAR [:A-Za-z\200-\377_0-9.-] +NAME {NAMESTART}{NAMECHAR}* +ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";" +COLON ":" +PCDATA [^<]+ +COMMENT {OPEN}"!--" +COMMENTEND "--"{CLOSE} +STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\' +DOCTYPE {SP}?"" + +%option noyywrap + +%s Initial +%s Content +%s CDataSection +%s Element +%s Attributes +%s AttributeValue +%s AttrValueStr +%s Prolog +%s Comment + +%% + +{ + {SP} { countLines(yyscanner,yytext,yyleng); } + {DOCTYPE} { countLines(yyscanner,yytext,yyleng); } + {OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); } + {OPEN} { countLines(yyscanner,yytext,yyleng); + initElement(yyscanner); + BEGIN(Element); } + {COMMENT} { yyextra->commentContext = YY_START; + BEGIN(Comment); + } +} +{ + {CDATA} { countLines(yyscanner,yytext,yyleng); + yyextra->cdataContext = YY_START; + BEGIN(CDataSection); + } + {PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); } + {OPEN} { countLines(yyscanner,yytext,yyleng); + addCharacters(yyscanner); + initElement(yyscanner); + BEGIN(Element); + } + {COMMENT} { yyextra->commentContext = YY_START; + countLines(yyscanner,yytext,yyleng); + BEGIN(Comment); + } +} +{ + "/" { yyextra->isEnd = true; } + {NAME} { yyextra->name = yytext; + BEGIN(Attributes); } + {CLOSE} { addElement(yyscanner); + countLines(yyscanner,yytext,yyleng); + yyextra->data = ""; + BEGIN(Content); + } + {SP} { countLines(yyscanner,yytext,yyleng); } +} +{ + "/" { yyextra->selfClose = true; } + {NAME} { yyextra->attrName = yytext; } + "=" { BEGIN(AttributeValue); } + {CLOSE} { addElement(yyscanner); + countLines(yyscanner,yytext,yyleng); + yyextra->data = ""; + BEGIN(Content); + } + {SP} { countLines(yyscanner,yytext,yyleng); } +} +{ + {SP} { countLines(yyscanner,yytext,yyleng); } + ['"] { yyextra->stringChar = *yytext; + yyextra->attrValue = ""; + BEGIN(AttrValueStr); + } + . { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found"; + reportError(yyscanner,msg); + unput(*yytext); + BEGIN(Attributes); + } +} +{ + [^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); } + ['"] { if (*yytext==yyextra->stringChar) + { + addAttribute(yyscanner); + BEGIN(Attributes); + } + else + { + yyextra->attrValue += processData(yyscanner,yytext,yyleng); + } + } + \n { yyextra->lineNr++; yyextra->attrValue+=' '; } +} +{ + {ENDCDATA} { BEGIN(yyextra->cdataContext); } + [^]\n]+ { yyextra->data += yytext; } + \n { yyextra->data += yytext; + yyextra->lineNr++; + } + . { yyextra->data += yytext; } +} +{ + {CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng); + BEGIN(Initial); + } + [^?\n]+ { } + \n { yyextra->lineNr++; } + . { } +} +{ + {COMMENTEND} { countLines(yyscanner,yytext,yyleng); + BEGIN(yyextra->commentContext); + } + [^\n-]+ { } + \n { yyextra->lineNr++; } + . { } +} +\n { yyextra->lineNr++; } +. { std::string msg = "Unexpected character `"; + msg+=yytext; + msg+="` found"; + reportError(yyscanner,msg); + } + +%% + +//---------------------------------------------------------------------------------------- + +static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + yy_size_t inputPosition = yyextra->inputPosition; + const char *s = yyextra->inputString + inputPosition; + yy_size_t c=0; + while( c < max_size && *s) + { + *buf++ = *s++; + c++; + } + yyextra->inputPosition += c; + return c; +} + +static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + for (yy_size_t i=0;ilineNr++; + } +} + +static void initElement(yyscan_t yyscanner) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + yyextra->isEnd = false; // true => + yyextra->selfClose = false; // true => + yyextra->name = ""; + yyextra->attrs.clear(); +} + +static void checkAndUpdatePath(yyscan_t yyscanner) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + if (yyextra->xpath.empty()) + { + std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag"; + reportError(yyscanner,msg); + } + else + { + std::string expectedTagName = yyextra->xpath.back(); + if (expectedTagName!=yyextra->name) + { + std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level"; + reportError(yyscanner,msg); + } + else // matching end tag + { + yyextra->xpath.pop_back(); + } + } +} + +static void addElement(yyscan_t yyscanner) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + if (!yyextra->isEnd) + { + yyextra->xpath.push_back(yyextra->name); + if (yyextra->handlers.startElement) + { + yyextra->handlers.startElement(yyextra->name,yyextra->attrs); + } + if (yy_flex_debug) + { + fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data()); + for (auto attr : yyextra->attrs) + { + fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str()); + } + fprintf(stderr,"])\n"); + } + } + if (yyextra->isEnd || yyextra->selfClose) + { + if (yy_flex_debug) + { + fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data()); + } + checkAndUpdatePath(yyscanner); + if (yyextra->handlers.endElement) + { + yyextra->handlers.endElement(yyextra->name); + } + } +} + +static std::string trimSpaces(const std::string &str) +{ + const int l = static_cast(str.length()); + int s=0, e=l-1; + while (ss && isspace(str.at(e))) e--; + return str.substr(s,1+e-s); +} + +static void addCharacters(yyscan_t yyscanner) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + std::string data = trimSpaces(yyextra->data); + if (yyextra->handlers.characters) + { + yyextra->handlers.characters(data); + } + if (!data.empty()) + { + if (yy_flex_debug) + { + fprintf(stderr,"characters(%s)\n",data.c_str()); + } + } +} + +static void addAttribute(yyscan_t yyscanner) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue)); +} + +static void reportError(yyscan_t yyscanner,const std::string &msg) +{ + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + if (yy_flex_debug) + { + fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str()); + } + if (yyextra->handlers.error) + { + yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg); + } +} + +static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" }; +static const char entities_dec[] = { '&', '"', '>', '<', '\'' }; +static const int num_entities = 5; + +// replace character entities such as & in txt and return the string where entities +// are replaced +static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len) +{ + std::string result; + result.reserve(len); + for (yy_size_t i=0; ixmlYY_extra,&p->yyscanner); + p->xmlYY_extra.handlers = handlers; +} + +XMLParser::~XMLParser() +{ + xmlYYlex_destroy(p->yyscanner); +} + +void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabled) +{ + yyscan_t yyscanner = p->yyscanner; + struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; + +#ifdef FLEX_DEBUG + xmlYYset_debug(1,p->yyscanner); +#endif + + if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input + + FILE *output = 0; + const char *enter_txt = 0; + const char *finished_txt = 0; + if (yy_flex_debug) { output=stderr; enter_txt="entering"; finished_txt="finished"; } + else if (debugEnabled) { output=stdout; enter_txt="Entering"; finished_txt="Finished"; } + + if (output) + { + fprintf(output,"--%s lexical analyzer: %s (for: %s)\n",enter_txt, __FILE__, fileName); + } + + BEGIN(Initial); + yyextra->fileName = fileName; + yyextra->lineNr = 1; + yyextra->inputString = inputStr; + yyextra->inputPosition = 0; + + xmlYYrestart( 0, yyscanner ); + + if (yyextra->handlers.startDocument) + { + yyextra->handlers.startDocument(); + } + xmlYYlex(yyscanner); + if (yyextra->handlers.endDocument) + { + yyextra->handlers.endDocument(); + } + + if (!yyextra->xpath.empty()) + { + std::string tagName = yyextra->xpath.back(); + std::string msg = "End of file reached while expecting closing tag '"+tagName+"'"; + reportError(yyscanner,msg); + } + + if (output) + { + fprintf(output,"--%s lexical analyzer: %s (for: %s)\n",finished_txt, __FILE__, fileName); + } +} + +int XMLParser::lineNr() const +{ + struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner; + return yyextra->lineNr; +} + +std::string XMLParser::fileName() const +{ + struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner; + return yyextra->fileName; +} + +#if USE_STATE2STRING +#include "xml.l.h" +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5004a95..c6af813 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,7 @@ include_directories( ${PROJECT_SOURCE_DIR}/liblodepng ${PROJECT_SOURCE_DIR}/libmscgen ${PROJECT_SOURCE_DIR}/libversion + ${PROJECT_SOURCE_DIR}/libxml ${PROJECT_SOURCE_DIR}/vhdlparser ${PROJECT_SOURCE_DIR}/src ${CLANG_INCLUDEDIR} @@ -114,7 +115,6 @@ set(LEX_FILES scanner commentcnv commentscan constexp - xml xmlcode sqlcode configimpl) @@ -190,7 +190,6 @@ add_library(doxymain STATIC ${GENERATED_SRC}/scanner.l.h ${GENERATED_SRC}/sqlcode.l.h ${GENERATED_SRC}/vhdlcode.l.h - ${GENERATED_SRC}/xml.l.h ${GENERATED_SRC}/xmlcode.l.h ${GENERATED_SRC}/code.cpp ${GENERATED_SRC}/commentcnv.cpp @@ -206,7 +205,6 @@ add_library(doxymain STATIC ${GENERATED_SRC}/scanner.cpp ${GENERATED_SRC}/sqlcode.cpp ${GENERATED_SRC}/vhdlcode.cpp - ${GENERATED_SRC}/xml.cpp ${GENERATED_SRC}/xmlcode.cpp # ${GENERATED_SRC}/ce_parse.cpp @@ -348,6 +346,7 @@ target_link_libraries(doxygen PRIVATE md5 lodepng mscgen + xml doxygen_version vhdlparser ${SQLITE3_LIBRARIES} diff --git a/src/layout.cpp b/src/layout.cpp index 427747e..b8984e0 100644 --- a/src/layout.cpp +++ b/src/layout.cpp @@ -32,6 +32,7 @@ #include "config.h" #include "xml.h" #include "resourcemgr.h" +#include "debug.h" inline QCString compileOptions(const QCString &def) { @@ -1503,7 +1504,7 @@ void LayoutDocManager::init() XMLParser parser(handlers); layoutParser.setDocumentLocator(&parser); QCString layout_default = ResourceMgr::instance().getAsString("layout_default.xml"); - parser.parse("layout_default.xml",layout_default); + parser.parse("layout_default.xml",layout_default,Debug::isFlagSet(Debug::Lex)); } LayoutDocManager::~LayoutDocManager() @@ -1546,7 +1547,7 @@ void LayoutDocManager::parse(const char *fileName) handlers.error = [&layoutParser](const std::string &fn,int lineNr,const std::string &msg) { layoutParser.error(fn,lineNr,msg); }; XMLParser parser(handlers); layoutParser.setDocumentLocator(&parser); - parser.parse(fileName,fileToString(fileName)); + parser.parse(fileName,fileToString(fileName),Debug::isFlagSet(Debug::Lex)); } //--------------------------------------------------------------------------------- diff --git a/src/tagreader.cpp b/src/tagreader.cpp index d5f8d5f..0a7c8f1 100644 --- a/src/tagreader.cpp +++ b/src/tagreader.cpp @@ -38,6 +38,7 @@ #include "filename.h" #include "section.h" #include "containers.h" +#include "debug.h" /** Information about an linkable anchor */ class TagAnchorInfo @@ -1499,7 +1500,7 @@ void parseTagFile(const std::shared_ptr &root,const char *fullName) handlers.error = [&tagFileParser](const std::string &fileName,int lineNr,const std::string &msg) { tagFileParser.error(fileName,lineNr,msg); }; XMLParser parser(handlers); tagFileParser.setDocumentLocator(&parser); - parser.parse(fullName,inputStr); + parser.parse(fullName,inputStr,Debug::isFlagSet(Debug::Lex)); tagFileParser.buildLists(root); tagFileParser.addIncludes(); //tagFileParser.dump(); diff --git a/src/xml.h b/src/xml.h deleted file mode 100644 index add95f1..0000000 --- a/src/xml.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef XML_H -#define XML_H - -#include -#include -#include -#include - -/*! @brief Event handlers that can installed by the client and called while parsing a XML document. - */ -class XMLHandlers -{ - public: - using Attributes = std::unordered_map; - using StartDocType = void(); - using EndDocType = void(); - using StartElementType = void(const std::string &,const Attributes &); - using EndElementType = void(const std::string &); - using ErrorType = void(const std::string,int,const std::string &); - using CharsType = void(const std::string &); - - std::function startDocument; /**< handler invoked at the start of the document */ - std::function endDocument; /**< handler invoked at the end of the document */ - std::function startElement; /**< handler invoked when an opening tag has been found */ - std::function endElement; /**< handler invoked when a closing tag has been found */ - std::function characters; /**< handler invoked when content between tags has been found */ - std::function error; /**< handler invoked when the parser encounters an error */ - - static std::string value(const Attributes &attrib,const std::string &key) - { - auto it = attrib.find(key); - if (it!=attrib.end()) - { - return it->second; - } - return ""; - } -}; - -class XMLLocator -{ - public: - virtual ~XMLLocator() {} - virtual int lineNr() const = 0; - virtual std::string fileName() const = 0; -}; - -/*! Very basic SAX style parser to parse XML documents. */ -class XMLParser : public XMLLocator -{ - public: - /*! Creates an instance of the parser object. Different instances can run on different - * threads without interference. - * - * @param handlers The event handlers passed by the client. - */ - XMLParser(const XMLHandlers &handlers); - /*! Destructor */ - ~XMLParser(); - - /*! Parses a file gives the contents of the file as a string. - * @param fileName the name of the file, used for error reporting. - * @param inputString the contents of the file as a zero terminated UTF-8 string. - */ - void parse(const char *fileName,const char *inputString); - - private: - virtual int lineNr() const override; - virtual std::string fileName() const override; - struct Private; - std::unique_ptr p; -}; - -#endif diff --git a/src/xml.l b/src/xml.l deleted file mode 100644 index ace35d5..0000000 --- a/src/xml.l +++ /dev/null @@ -1,484 +0,0 @@ -/****************************************************************************** - * - * Copyright (C) 1997-2020 by Dimitri van Heesch. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation under the terms of the GNU General Public License is hereby - * granted. No representations are made about the suitability of this software - * for any purpose. It is provided "as is" without express or implied warranty. - * See the GNU General Public License for more details. - * - * Documents produced by Doxygen are derivative works derived from the - * input used in their production; they are not affected by this license. - * - */ -/****************************************************************************** - * Minimal flex based parser for XML - ******************************************************************************/ - -%option never-interactive -%option prefix="xmlYY" -%option reentrant -%option extra-type="struct xmlYY_state *" -%option 8bit noyywrap -%top{ -#include -} - -%{ - -#include -#include -#include -#include "xml.h" -#include "message.h" - -#define YY_NEVER_INTERACTIVE 1 -#define YY_NO_INPUT 1 -#define YY_NO_UNISTD_H 1 - -struct xmlYY_state -{ - std::string fileName; - int lineNr = 1; - const char * inputString = 0; //!< the code fragment as text - yy_size_t inputPosition = 0; //!< read offset during parsing - std::string name; - bool isEnd = false; - bool selfClose = false; - std::string data; - std::string attrValue; - std::string attrName; - XMLHandlers::Attributes attrs; - XMLHandlers handlers; - int cdataContext; - int commentContext; - char stringChar; - std::vector xpath; -}; - -#if USE_STATE2STRING -static const char *stateToString(int state); -#endif - -static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size); -static void initElement(yyscan_t yyscanner); -static void addCharacters(yyscan_t yyscanner); -static void addElement(yyscan_t yyscanner); -static void addAttribute(yyscan_t yyscanner); -static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len); -static void reportError(yyscan_t yyscanner, const std::string &msg); -static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len); - -#undef YY_INPUT -#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size); - -%} - -NL (\r\n|\r|\n) -SP [ \t\r\n]+ -OPEN {SP}?"<" -OPENSPECIAL {SP}?""{NL}? -CLOSESPECIAL "?>"{NL}? -NAMESTART [:A-Za-z\200-\377_] -NAMECHAR [:A-Za-z\200-\377_0-9.-] -NAME {NAMESTART}{NAMECHAR}* -ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";" -COLON ":" -PCDATA [^<]+ -COMMENT {OPEN}"!--" -COMMENTEND "--"{CLOSE} -STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\' -DOCTYPE {SP}?"" - -%option noyywrap - -%s Initial -%s Content -%s CDataSection -%s Element -%s Attributes -%s AttributeValue -%s AttrValueStr -%s Prolog -%s Comment - -%% - -{ - {SP} { countLines(yyscanner,yytext,yyleng); } - {DOCTYPE} { countLines(yyscanner,yytext,yyleng); } - {OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); } - {OPEN} { countLines(yyscanner,yytext,yyleng); - initElement(yyscanner); - BEGIN(Element); } - {COMMENT} { yyextra->commentContext = YY_START; - BEGIN(Comment); - } -} -{ - {CDATA} { countLines(yyscanner,yytext,yyleng); - yyextra->cdataContext = YY_START; - BEGIN(CDataSection); - } - {PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); } - {OPEN} { countLines(yyscanner,yytext,yyleng); - addCharacters(yyscanner); - initElement(yyscanner); - BEGIN(Element); - } - {COMMENT} { yyextra->commentContext = YY_START; - countLines(yyscanner,yytext,yyleng); - BEGIN(Comment); - } -} -{ - "/" { yyextra->isEnd = true; } - {NAME} { yyextra->name = yytext; - BEGIN(Attributes); } - {CLOSE} { addElement(yyscanner); - countLines(yyscanner,yytext,yyleng); - yyextra->data = ""; - BEGIN(Content); - } - {SP} { countLines(yyscanner,yytext,yyleng); } -} -{ - "/" { yyextra->selfClose = true; } - {NAME} { yyextra->attrName = yytext; } - "=" { BEGIN(AttributeValue); } - {CLOSE} { addElement(yyscanner); - countLines(yyscanner,yytext,yyleng); - yyextra->data = ""; - BEGIN(Content); - } - {SP} { countLines(yyscanner,yytext,yyleng); } -} -{ - {SP} { countLines(yyscanner,yytext,yyleng); } - ['"] { yyextra->stringChar = *yytext; - yyextra->attrValue = ""; - BEGIN(AttrValueStr); - } - . { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found"; - reportError(yyscanner,msg); - unput(*yytext); - BEGIN(Attributes); - } -} -{ - [^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); } - ['"] { if (*yytext==yyextra->stringChar) - { - addAttribute(yyscanner); - BEGIN(Attributes); - } - else - { - yyextra->attrValue += processData(yyscanner,yytext,yyleng); - } - } - \n { yyextra->lineNr++; yyextra->attrValue+=' '; } -} -{ - {ENDCDATA} { BEGIN(yyextra->cdataContext); } - [^]\n]+ { yyextra->data += yytext; } - \n { yyextra->data += yytext; - yyextra->lineNr++; - } - . { yyextra->data += yytext; } -} -{ - {CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng); - BEGIN(Initial); - } - [^?\n]+ { } - \n { yyextra->lineNr++; } - . { } -} -{ - {COMMENTEND} { countLines(yyscanner,yytext,yyleng); - BEGIN(yyextra->commentContext); - } - [^\n-]+ { } - \n { yyextra->lineNr++; } - . { } -} -\n { yyextra->lineNr++; } -. { std::string msg = "Unexpected character `"; - msg+=yytext; - msg+="` found"; - reportError(yyscanner,msg); - } - -%% - -//---------------------------------------------------------------------------------------- - -static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - yy_size_t inputPosition = yyextra->inputPosition; - const char *s = yyextra->inputString + inputPosition; - yy_size_t c=0; - while( c < max_size && *s) - { - *buf++ = *s++; - c++; - } - yyextra->inputPosition += c; - return c; -} - -static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - for (yy_size_t i=0;ilineNr++; - } -} - -static void initElement(yyscan_t yyscanner) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - yyextra->isEnd = false; // true => - yyextra->selfClose = false; // true => - yyextra->name = ""; - yyextra->attrs.clear(); -} - -static void checkAndUpdatePath(yyscan_t yyscanner) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - if (yyextra->xpath.empty()) - { - std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag"; - reportError(yyscanner,msg); - } - else - { - std::string expectedTagName = yyextra->xpath.back(); - if (expectedTagName!=yyextra->name) - { - std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level"; - reportError(yyscanner,msg); - } - else // matching end tag - { - yyextra->xpath.pop_back(); - } - } -} - -static void addElement(yyscan_t yyscanner) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - if (!yyextra->isEnd) - { - yyextra->xpath.push_back(yyextra->name); - if (yyextra->handlers.startElement) - { - yyextra->handlers.startElement(yyextra->name,yyextra->attrs); - } - if (yy_flex_debug) - { - fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data()); - for (auto attr : yyextra->attrs) - { - fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str()); - } - fprintf(stderr,"])\n"); - } - } - if (yyextra->isEnd || yyextra->selfClose) - { - if (yy_flex_debug) - { - fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data()); - } - checkAndUpdatePath(yyscanner); - if (yyextra->handlers.endElement) - { - yyextra->handlers.endElement(yyextra->name); - } - } -} - -static std::string trimSpaces(const std::string &str) -{ - const int l = static_cast(str.length()); - int s=0, e=l-1; - while (ss && isspace(str.at(e))) e--; - return str.substr(s,1+e-s); -} - -static void addCharacters(yyscan_t yyscanner) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - std::string data = trimSpaces(yyextra->data); - if (yyextra->handlers.characters) - { - yyextra->handlers.characters(data); - } - if (!data.empty()) - { - if (yy_flex_debug) - { - fprintf(stderr,"characters(%s)\n",data.c_str()); - } - } -} - -static void addAttribute(yyscan_t yyscanner) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue)); -} - -static void reportError(yyscan_t yyscanner,const std::string &msg) -{ - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - if (yy_flex_debug) - { - fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str()); - } - if (yyextra->handlers.error) - { - yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg); - } -} - -static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" }; -static const char entities_dec[] = { '&', '"', '>', '<', '\'' }; -static const int num_entities = 5; - -// replace character entities such as & in txt and return the string where entities -// are replaced -static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len) -{ - std::string result; - result.reserve(len); - for (yy_size_t i=0; ixmlYY_extra,&p->yyscanner); - p->xmlYY_extra.handlers = handlers; -} - -XMLParser::~XMLParser() -{ - xmlYYlex_destroy(p->yyscanner); -} - -void XMLParser::parse(const char *fileName,const char *inputStr) -{ - yyscan_t yyscanner = p->yyscanner; - struct yyguts_t *yyg = (struct yyguts_t*)yyscanner; - -#ifdef FLEX_DEBUG - xmlYYset_debug(1,p->yyscanner); -#endif - - if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input - - printlex(yy_flex_debug, true, __FILE__, fileName); - - BEGIN(Initial); - yyextra->fileName = fileName; - yyextra->lineNr = 1; - yyextra->inputString = inputStr; - yyextra->inputPosition = 0; - - xmlYYrestart( 0, yyscanner ); - - if (yyextra->handlers.startDocument) - { - yyextra->handlers.startDocument(); - } - xmlYYlex(yyscanner); - if (yyextra->handlers.endDocument) - { - yyextra->handlers.endDocument(); - } - - if (!yyextra->xpath.empty()) - { - std::string tagName = yyextra->xpath.back(); - std::string msg = "End of file reached while expecting closing tag '"+tagName+"'"; - reportError(yyscanner,msg); - } - - printlex(yy_flex_debug, false, __FILE__, fileName); -} - -int XMLParser::lineNr() const -{ - struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner; - return yyextra->lineNr; -} - -std::string XMLParser::fileName() const -{ - struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner; - return yyextra->fileName; -} - -#if USE_STATE2STRING -#include "xml.l.h" -#endif -- cgit v0.12