summaryrefslogtreecommitdiffstats
path: root/src/doctokenizer.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/doctokenizer.l')
-rw-r--r--src/doctokenizer.l180
1 files changed, 97 insertions, 83 deletions
diff --git a/src/doctokenizer.l b/src/doctokenizer.l
index 640c562..b2be204 100644
--- a/src/doctokenizer.l
+++ b/src/doctokenizer.l
@@ -136,72 +136,6 @@ static int computeIndent(const char *str,int length)
return indent;
}
-/*! converts input string \a opt into a list of Html Attributes. Each
- * attribute is a name, value pair. The result is stored in g_token->attribs
- */
-static void parseHtmlAttribs(const char *att)
-{
- //printf("parseHtmlAttribs(%s)\n",att);
- QCString attribs=att;
- int len = attribs.length();
- char c;
- int i=0,startName,endName,startAttrib,endAttrib;
- while (i<len)
- {
- c=attribs.at(i);
- // skip spaces
- while (i<len && c==' ') { c=attribs.at(++i); }
- startName=i;
- // search for end of name
- while (i<len && c!=' ' && c!='=') { c=attribs.at(++i); }
- endName=i;
- HtmlAttrib opt;
- opt.name = attribs.mid(startName,endName-startName).lower();
- // skip spaces
- while (i<len && c==' ') { c=attribs.at(++i); }
- if (attribs.at(i)=='=') // option has value
- {
- c=attribs.at(++i);
- // skip spaces
- while (i<len && c==' ') { c=attribs.at(++i); }
- if (attribs.at(i)=='\'') // option '...'
- {
- c=attribs.at(++i);
- startAttrib=i;
-
- // search for matching quote
- while (i<len && c!='\'') { c=attribs.at(++i); }
- endAttrib=i;
- if (i<len) c=attribs.at(++i);
- }
- else if (attribs.at(i)=='"') // option "..."
- {
- c=attribs.at(++i);
- startAttrib=i;
- // search for matching quote
- while (i<len && c!='"') { c=attribs.at(++i); }
- endAttrib=i;
- if (i<len) c=attribs.at(++i);
- }
- else // value without any quotes
- {
- startAttrib=i;
- // search for separator
- while (i<len && c!=' ') { c=attribs.at(++i); }
- endAttrib=i;
- if (i<len) c=attribs.at(++i);
- }
- opt.value = attribs.mid(startAttrib,endAttrib-startAttrib);
- }
- else // start next option
- {
- }
- //printf("=====> Adding option name=<%s> value=<%s>\n",
- // opt.name.data(),opt.value.data());
- g_token->attribs.append(&opt);
- }
-}
-
//--------------------------------------------------------------------------
static void processSection()
@@ -231,27 +165,96 @@ static void processSection()
static void handleHtmlTag()
{
- g_token->name = yytext;
+ QCString tagText=yytext;
g_token->attribs.clear();
+ g_token->endTag = FALSE;
+ g_token->emptyTag = FALSE;
+
+ // Check for end tag
int startNamePos=1;
- if (g_token->name.at(1)=='/') startNamePos++;
- int attSep=0;
- while (attSep<yyleng && !isspace(yytext[attSep]))
+ if (tagText.at(1)=='/')
{
- attSep++;
+ g_token->endTag = TRUE;
+ startNamePos++;
}
- if (attSep!=yyleng) // tag has one or more options
+
+ // Parse the name portion
+ int i = startNamePos;
+ for (i=startNamePos; i < yyleng; i++)
{
- parseHtmlAttribs(g_token->name.mid(attSep+1,g_token->name.length()-attSep-2));
- g_token->name=g_token->name.mid(startNamePos,attSep-1).lower();
+ // Check for valid HTML/XML name chars (including namespaces)
+ char c = tagText.at(i);
+ if (!(isalnum(c) || c=='-' || c=='_' || c==':')) break;
}
- else // tag without options, strip brackets
+ g_token->name = tagText.mid(startNamePos,i-startNamePos);
+
+ // Parse the attributes. Each attribute is a name, value pair
+ // The result is stored in g_token->attribs.
+ int startName,endName,startAttrib,endAttrib;
+ while (i<yyleng)
{
- g_token->name=g_token->name.mid(startNamePos,g_token->name.length()-startNamePos-1).lower();
+ char c=tagText.at(i);
+ // skip spaces
+ while (i<yyleng && c==' ') { c=tagText.at(++i); }
+ // check for end of the tag
+ if (c == '>') break;
+ // Check for XML style "empty" tag.
+ if (c == '/')
+ {
+ g_token->emptyTag = TRUE;
+ break;
+ }
+ startName=i;
+ // search for end of name
+ while (i<yyleng && c!=' ' && c!='=') { c=tagText.at(++i); }
+ endName=i;
+ HtmlAttrib opt;
+ opt.name = tagText.mid(startName,endName-startName).lower();
+ // skip spaces
+ while (i<yyleng && c==' ') { c=tagText.at(++i); }
+ if (tagText.at(i)=='=') // option has value
+ {
+ c=tagText.at(++i);
+ // skip spaces
+ while (i<yyleng && c==' ') { c=tagText.at(++i); }
+ if (tagText.at(i)=='\'') // option '...'
+ {
+ c=tagText.at(++i);
+ startAttrib=i;
+
+ // search for matching quote
+ while (i<yyleng && c!='\'') { c=tagText.at(++i); }
+ endAttrib=i;
+ if (i<yyleng) c=tagText.at(++i);
+ }
+ else if (tagText.at(i)=='"') // option "..."
+ {
+ c=tagText.at(++i);
+ startAttrib=i;
+ // search for matching quote
+ while (i<yyleng && c!='"') { c=tagText.at(++i); }
+ endAttrib=i;
+ if (i<yyleng) c=tagText.at(++i);
+ }
+ else // value without any quotes
+ {
+ startAttrib=i;
+ // search for separator
+ while (i<yyleng && c!=' ') { c=tagText.at(++i); }
+ endAttrib=i;
+ if (i<yyleng) c=tagText.at(++i);
+ }
+ opt.value = tagText.mid(startAttrib,endAttrib-startAttrib);
+ }
+ else // start next option
+ {
+ }
+ //printf("=====> Adding option name=<%s> value=<%s>\n",
+ // opt.name.data(),opt.value.data());
+ g_token->attribs.append(&opt);
}
- g_token->endTag = startNamePos==2;
}
-
+
static QString stripEmptyLines(const char *s)
{
int result=0,p=0;
@@ -287,6 +290,7 @@ WS [ \t\r\n]
NONWS [^ \t\r\n]
BLANK [ \t\r]
ID [a-z_A-Z][a-z_A-Z0-9]*
+MAILADR [a-z_A-Z0-9.+-]+"@"[a-z_A-Z0-9-]+("."[a-z_A-Z0-9\-]+)+[a-z_A-Z0-9\-]+
OPTSTARS ("//"{BLANK}*)?"*"*{BLANK}*
LISTITEM {BLANK}*{OPTSTARS}"-"("#")?{WS}
ENDLIST {BLANK}*{OPTSTARS}"."{BLANK}*\n
@@ -326,7 +330,7 @@ WORD1 "%"?{CHARWORD}+|"{"|"}"|("\""[^"\n]*"\"")
WORD2 "."|","|"("|")"|"["|"]"|":"|";"|"\?"
WORD1NQ "%"?{CHARWORDQ}+
WORD2NQ "."|","|"("|")"|"["|"]"|":"|";"|"\?"
-HTMLTAG "<"(("/")?){ID}({WS}+{ATTRIB})*{WS}*">"
+HTMLTAG "<"(("/")?){ID}({WS}+{ATTRIB})*{WS}*(("/")?)">"
HTMLKEYL "strong"|"center"|"table"|"caption"|"small"|"code"|"dfn"|"var"|"img"|"pre"|"sub"|"sup"|"tr"|"td"|"th"|"ol"|"ul"|"li"|"tt"|"kbd"|"em"|"hr"|"dl"|"dt"|"dd"|"br"|"i"|"a"|"b"|"p"
HTMLKEYU "STRONG"|"CENTER"|"TABLE"|"CAPTION"|"SMALL"|"CODE"|"DFN"|"VAR"|"IMG"|"PRE"|"SUB"|"SUP"|"TR"|"TD"|"TH"|"OL"|"UL"|"LI"|"TT"|"KBD"|"EM"|"HR"|"DL"|"DT"|"DD"|"BR"|"I"|"A"|"B"|"P"
HTMLKEYW {HTMLKEYL}|{HTMLKEYU}
@@ -344,6 +348,7 @@ REFWORD ("#"|"::")?({ID}("."|"#"|"::"|"-"))*({ID}(":")?){FUNCARG}?
%x St_TitleA
%x St_TitleV
%x St_Code
+%x St_XmlCode
%x St_HtmlOnly
%x St_ManOnly
%x St_LatexOnly
@@ -445,7 +450,7 @@ REFWORD ("#"|"::")?({ID}("."|"#"|"::"|"-"))*({ID}(":")?){FUNCARG}?
g_token->isEMailAddr=FALSE;
return TK_URL;
}
-<St_Para>[a-z_A-Z0-9.+-]+"@"[a-z_A-Z0-9-]+("."[a-z_A-Z0-9\-]+)+[a-z_A-Z0-9\-]+ { // Mail address
+<St_Para>{MAILADR} { // Mail address
g_token->name=yytext;
g_token->isEMailAddr=TRUE;
return TK_URL;
@@ -539,9 +544,12 @@ REFWORD ("#"|"::")?({ID}("."|"#"|"::"|"-"))*({ID}(":")?){FUNCARG}?
<St_Code>{WS}*{CMD}"endcode" {
return RetVal_OK;
}
-<St_Code>[^\\@\n]+ |
-<St_Code>\n |
-<St_Code>. {
+<St_XmlCode>{WS}*"</code>" {
+ return RetVal_OK;
+ }
+<St_Code,St_XmlCode>[^\\@\n]+ |
+<St_Code,St_XmlCode>\n |
+<St_Code,St_XmlCode>. {
g_token->verb+=yytext;
}
<St_HtmlOnly>{CMD}"endhtmlonly" {
@@ -942,6 +950,12 @@ void doctokenizerYYsetStateCode()
BEGIN(St_Code);
}
+void doctokenizerYYsetStateXmlCode()
+{
+ g_token->verb="";
+ BEGIN(St_XmlCode);
+}
+
void doctokenizerYYsetStateHtmlOnly()
{
g_token->verb="";