summaryrefslogtreecommitdiffstats
path: root/src/xml.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml.l')
-rw-r--r--src/xml.l484
1 files changed, 0 insertions, 484 deletions
diff --git a/src/xml.l b/src/xml.l
deleted file mode 100644
index ace35d5..0000000
--- a/src/xml.l
+++ /dev/null
@@ -1,484 +0,0 @@
-/******************************************************************************
- *
- * Copyright (C) 1997-2020 by Dimitri van Heesch.
- *
- * Permission to use, copy, modify, and distribute this software and its
- * documentation under the terms of the GNU General Public License is hereby
- * granted. No representations are made about the suitability of this software
- * for any purpose. It is provided "as is" without express or implied warranty.
- * See the GNU General Public License for more details.
- *
- * Documents produced by Doxygen are derivative works derived from the
- * input used in their production; they are not affected by this license.
- *
- */
-/******************************************************************************
- * Minimal flex based parser for XML
- ******************************************************************************/
-
-%option never-interactive
-%option prefix="xmlYY"
-%option reentrant
-%option extra-type="struct xmlYY_state *"
-%option 8bit noyywrap
-%top{
-#include <stdint.h>
-}
-
-%{
-
-#include <ctype.h>
-#include <vector>
-#include <stdio.h>
-#include "xml.h"
-#include "message.h"
-
-#define YY_NEVER_INTERACTIVE 1
-#define YY_NO_INPUT 1
-#define YY_NO_UNISTD_H 1
-
-struct xmlYY_state
-{
- std::string fileName;
- int lineNr = 1;
- const char * inputString = 0; //!< the code fragment as text
- yy_size_t inputPosition = 0; //!< read offset during parsing
- std::string name;
- bool isEnd = false;
- bool selfClose = false;
- std::string data;
- std::string attrValue;
- std::string attrName;
- XMLHandlers::Attributes attrs;
- XMLHandlers handlers;
- int cdataContext;
- int commentContext;
- char stringChar;
- std::vector<std::string> xpath;
-};
-
-#if USE_STATE2STRING
-static const char *stateToString(int state);
-#endif
-
-static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size);
-static void initElement(yyscan_t yyscanner);
-static void addCharacters(yyscan_t yyscanner);
-static void addElement(yyscan_t yyscanner);
-static void addAttribute(yyscan_t yyscanner);
-static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len);
-static void reportError(yyscan_t yyscanner, const std::string &msg);
-static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len);
-
-#undef YY_INPUT
-#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
-
-%}
-
-NL (\r\n|\r|\n)
-SP [ \t\r\n]+
-OPEN {SP}?"<"
-OPENSPECIAL {SP}?"<?"
-CLOSE ">"{NL}?
-CLOSESPECIAL "?>"{NL}?
-NAMESTART [:A-Za-z\200-\377_]
-NAMECHAR [:A-Za-z\200-\377_0-9.-]
-NAME {NAMESTART}{NAMECHAR}*
-ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
-COLON ":"
-PCDATA [^<]+
-COMMENT {OPEN}"!--"
-COMMENTEND "--"{CLOSE}
-STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
-DOCTYPE {SP}?"<!DOCTYPE"{SP}
-CDATA {SP}?"<![CDATA["
-ENDCDATA "]]>"
-
-%option noyywrap
-
-%s Initial
-%s Content
-%s CDataSection
-%s Element
-%s Attributes
-%s AttributeValue
-%s AttrValueStr
-%s Prolog
-%s Comment
-
-%%
-
-<Initial>{
- {SP} { countLines(yyscanner,yytext,yyleng); }
- {DOCTYPE} { countLines(yyscanner,yytext,yyleng); }
- {OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
- {OPEN} { countLines(yyscanner,yytext,yyleng);
- initElement(yyscanner);
- BEGIN(Element); }
- {COMMENT} { yyextra->commentContext = YY_START;
- BEGIN(Comment);
- }
-}
-<Content>{
- {CDATA} { countLines(yyscanner,yytext,yyleng);
- yyextra->cdataContext = YY_START;
- BEGIN(CDataSection);
- }
- {PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); }
- {OPEN} { countLines(yyscanner,yytext,yyleng);
- addCharacters(yyscanner);
- initElement(yyscanner);
- BEGIN(Element);
- }
- {COMMENT} { yyextra->commentContext = YY_START;
- countLines(yyscanner,yytext,yyleng);
- BEGIN(Comment);
- }
-}
-<Element>{
- "/" { yyextra->isEnd = true; }
- {NAME} { yyextra->name = yytext;
- BEGIN(Attributes); }
- {CLOSE} { addElement(yyscanner);
- countLines(yyscanner,yytext,yyleng);
- yyextra->data = "";
- BEGIN(Content);
- }
- {SP} { countLines(yyscanner,yytext,yyleng); }
-}
-<Attributes>{
- "/" { yyextra->selfClose = true; }
- {NAME} { yyextra->attrName = yytext; }
- "=" { BEGIN(AttributeValue); }
- {CLOSE} { addElement(yyscanner);
- countLines(yyscanner,yytext,yyleng);
- yyextra->data = "";
- BEGIN(Content);
- }
- {SP} { countLines(yyscanner,yytext,yyleng); }
-}
-<AttributeValue>{
- {SP} { countLines(yyscanner,yytext,yyleng); }
- ['"] { yyextra->stringChar = *yytext;
- yyextra->attrValue = "";
- BEGIN(AttrValueStr);
- }
- . { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found";
- reportError(yyscanner,msg);
- unput(*yytext);
- BEGIN(Attributes);
- }
-}
-<AttrValueStr>{
- [^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); }
- ['"] { if (*yytext==yyextra->stringChar)
- {
- addAttribute(yyscanner);
- BEGIN(Attributes);
- }
- else
- {
- yyextra->attrValue += processData(yyscanner,yytext,yyleng);
- }
- }
- \n { yyextra->lineNr++; yyextra->attrValue+=' '; }
-}
-<CDataSection>{
- {ENDCDATA} { BEGIN(yyextra->cdataContext); }
- [^]\n]+ { yyextra->data += yytext; }
- \n { yyextra->data += yytext;
- yyextra->lineNr++;
- }
- . { yyextra->data += yytext; }
-}
-<Prolog>{
- {CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng);
- BEGIN(Initial);
- }
- [^?\n]+ { }
- \n { yyextra->lineNr++; }
- . { }
-}
-<Comment>{
- {COMMENTEND} { countLines(yyscanner,yytext,yyleng);
- BEGIN(yyextra->commentContext);
- }
- [^\n-]+ { }
- \n { yyextra->lineNr++; }
- . { }
-}
-\n { yyextra->lineNr++; }
-. { std::string msg = "Unexpected character `";
- msg+=yytext;
- msg+="` found";
- reportError(yyscanner,msg);
- }
-
-%%
-
-//----------------------------------------------------------------------------------------
-
-static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- yy_size_t inputPosition = yyextra->inputPosition;
- const char *s = yyextra->inputString + inputPosition;
- yy_size_t c=0;
- while( c < max_size && *s)
- {
- *buf++ = *s++;
- c++;
- }
- yyextra->inputPosition += c;
- return c;
-}
-
-static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- for (yy_size_t i=0;i<len;i++)
- {
- if (txt[i]=='\n') yyextra->lineNr++;
- }
-}
-
-static void initElement(yyscan_t yyscanner)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- yyextra->isEnd = false; // true => </tag>
- yyextra->selfClose = false; // true => <tag/>
- yyextra->name = "";
- yyextra->attrs.clear();
-}
-
-static void checkAndUpdatePath(yyscan_t yyscanner)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- if (yyextra->xpath.empty())
- {
- std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag";
- reportError(yyscanner,msg);
- }
- else
- {
- std::string expectedTagName = yyextra->xpath.back();
- if (expectedTagName!=yyextra->name)
- {
- std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level";
- reportError(yyscanner,msg);
- }
- else // matching end tag
- {
- yyextra->xpath.pop_back();
- }
- }
-}
-
-static void addElement(yyscan_t yyscanner)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- if (!yyextra->isEnd)
- {
- yyextra->xpath.push_back(yyextra->name);
- if (yyextra->handlers.startElement)
- {
- yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
- }
- if (yy_flex_debug)
- {
- fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
- for (auto attr : yyextra->attrs)
- {
- fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str());
- }
- fprintf(stderr,"])\n");
- }
- }
- if (yyextra->isEnd || yyextra->selfClose)
- {
- if (yy_flex_debug)
- {
- fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
- }
- checkAndUpdatePath(yyscanner);
- if (yyextra->handlers.endElement)
- {
- yyextra->handlers.endElement(yyextra->name);
- }
- }
-}
-
-static std::string trimSpaces(const std::string &str)
-{
- const int l = static_cast<int>(str.length());
- int s=0, e=l-1;
- while (s<l && isspace(str.at(s))) s++;
- while (e>s && isspace(str.at(e))) e--;
- return str.substr(s,1+e-s);
-}
-
-static void addCharacters(yyscan_t yyscanner)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- std::string data = trimSpaces(yyextra->data);
- if (yyextra->handlers.characters)
- {
- yyextra->handlers.characters(data);
- }
- if (!data.empty())
- {
- if (yy_flex_debug)
- {
- fprintf(stderr,"characters(%s)\n",data.c_str());
- }
- }
-}
-
-static void addAttribute(yyscan_t yyscanner)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue));
-}
-
-static void reportError(yyscan_t yyscanner,const std::string &msg)
-{
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- if (yy_flex_debug)
- {
- fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str());
- }
- if (yyextra->handlers.error)
- {
- yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg);
- }
-}
-
-static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" };
-static const char entities_dec[] = { '&', '"', '>', '<', '\'' };
-static const int num_entities = 5;
-
-// replace character entities such as &amp; in txt and return the string where entities
-// are replaced
-static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len)
-{
- std::string result;
- result.reserve(len);
- for (yy_size_t i=0; i<len; i++)
- {
- char c = txt[i];
- if (c=='&')
- {
- const int maxEntityLen = 10;
- char entity[maxEntityLen+1];
- entity[maxEntityLen]='\0';
- for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
- {
- if (txt[i+j+1]!=';')
- {
- entity[j]=txt[i+j+1];
- }
- else
- {
- entity[j]=0;
- break;
- }
- }
- bool found=false;
- for (int e=0; !found && e<num_entities; e++)
- {
- if (strcmp(entity,entities_enc[e])==0)
- {
- result+=entities_dec[e];
- i+=strlen(entities_enc[e])+1;
- found=true;
- }
- }
- if (!found)
- {
- std::string msg = std::string("Invalid character entity '&") + entity + ";' found\n";
- reportError(yyscanner,msg);
- }
- }
- else
- {
- result+=c;
- }
- }
- return result;
-}
-
-//--------------------------------------------------------------
-
-struct XMLParser::Private
-{
- yyscan_t yyscanner;
- struct xmlYY_state xmlYY_extra;
-};
-
-XMLParser::XMLParser(const XMLHandlers &handlers) : p(new Private)
-{
- xmlYYlex_init_extra(&p->xmlYY_extra,&p->yyscanner);
- p->xmlYY_extra.handlers = handlers;
-}
-
-XMLParser::~XMLParser()
-{
- xmlYYlex_destroy(p->yyscanner);
-}
-
-void XMLParser::parse(const char *fileName,const char *inputStr)
-{
- yyscan_t yyscanner = p->yyscanner;
- struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
-
-#ifdef FLEX_DEBUG
- xmlYYset_debug(1,p->yyscanner);
-#endif
-
- if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input
-
- printlex(yy_flex_debug, true, __FILE__, fileName);
-
- BEGIN(Initial);
- yyextra->fileName = fileName;
- yyextra->lineNr = 1;
- yyextra->inputString = inputStr;
- yyextra->inputPosition = 0;
-
- xmlYYrestart( 0, yyscanner );
-
- if (yyextra->handlers.startDocument)
- {
- yyextra->handlers.startDocument();
- }
- xmlYYlex(yyscanner);
- if (yyextra->handlers.endDocument)
- {
- yyextra->handlers.endDocument();
- }
-
- if (!yyextra->xpath.empty())
- {
- std::string tagName = yyextra->xpath.back();
- std::string msg = "End of file reached while expecting closing tag '"+tagName+"'";
- reportError(yyscanner,msg);
- }
-
- printlex(yy_flex_debug, false, __FILE__, fileName);
-}
-
-int XMLParser::lineNr() const
-{
- struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
- return yyextra->lineNr;
-}
-
-std::string XMLParser::fileName() const
-{
- struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
- return yyextra->fileName;
-}
-
-#if USE_STATE2STRING
-#include "xml.l.h"
-#endif