From 36332ecdec1f40cec2f8d8b902da4f9a018adb43 Mon Sep 17 00:00:00 2001
From: albert-github <albert.tests@gmail.com>
Date: Wed, 10 Mar 2021 11:40:18 +0100
Subject: Better handling of rules in lex scanners

- handling of square brackets, an opening square bracket inside a pair of square brackets has no special meaning unless followed by a colon (resulting in `[:...:]`.
- handling of sharp brackets so that `<tst><<<WORD` is correctly hanlded
- better handling of escaped charcters
- handling of  `(?...)`

(Found during tests on some lex files available on Fossies).
---
 src/lexcode.l    | 64 +++++++++++++++++++++++++++++++++++++++-----------------
 src/lexscanner.l | 60 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 84 insertions(+), 40 deletions(-)
diff --git a/src/lexcode.l b/src/lexcode.l
index a118703..81ad389 100644
--- a/src/lexcode.l
+++ b/src/lexcode.l
@@ -66,7 +66,6 @@ struct lexcodeYY_state
      QCString      CCodeBuffer;
      int           startCCodeLine = -1;
      int           roundCount = 0;
-     int           squareCount = 0;
      bool          insideCode = FALSE;
      QCString      delimiter;
      QCString      docBlockName;
@@ -114,12 +113,13 @@ LiteralStart    "%{"{nl}
 LiteralEnd      "%}"{nl}
 RulesStart      "%%"{nl}
 RulesEnd        "%%"{nl}
-RulesSharp      "<"[^>]*">"
+RulesSharp      "<"[^>\n]*">"
 RulesCurly      "{"[^{}\n]*"}"
 StartSquare     "["
 StartDouble     "\""
 StartRound      "("
-EscapeRulesCharOpen  "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ "
+StartRoundQuest "(?"
+EscapeRulesCharOpen  "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\"
 EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)"
 EscapeRulesChar      {EscapeRulesCharOpen}|{EscapeRulesCharClose}
 
@@ -134,14 +134,15 @@ CODE      [cC][oO][dD][eE]
 RAWBEGIN  (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"("
 RAWEND    ")"[^ \t\(\)\\]{0,16}\"
 CHARLIT   (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'"))
+CHARCE    "[:"[^:]*":]"
 
   /* no comment start / end signs inside square brackets */
 NCOMM [^/\*]
-  // C start comment 
+  // C start comment
 CCS   "/\*"
   // C end comment
 CCE   "*\/"
-  // Cpp comment 
+  // Cpp comment
 CPPC  "/\/"
   // doxygen start comment
 DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/")
@@ -160,6 +161,7 @@ NONLopt [^\n]*
 %x RulesSquare
 %x RulesRoundSquare
 %x RulesRound
+%x RulesRoundQuest
 %x UserSection
 
 %x TopSection
@@ -326,28 +328,24 @@ NONLopt [^\n]*
                            yyextra->rulesPatternBuffer += yytext;
                         }
 <RulesPattern>{StartSquare} {
-                           yyextra->squareCount++;
                            yyextra->rulesPatternBuffer += yytext;
                            yyextra->lastContext = YY_START;
                            BEGIN(RulesSquare);
                         }
-<RulesSquare,RulesRoundSquare>"\\["      |
-<RulesSquare,RulesRoundSquare>"\\]"      {
+<RulesSquare,RulesRoundSquare>{CHARCE} {
                            yyextra->rulesPatternBuffer += yytext;
                         }
-<RulesSquare,RulesRoundSquare>"["        {
-                           yyextra->squareCount++;
+<RulesSquare,RulesRoundSquare>"\\["      |
+<RulesSquare,RulesRoundSquare>"\\]"      {
                            yyextra->rulesPatternBuffer += yytext;
                         }
 <RulesSquare>"]"        {
-                           yyextra->squareCount--;
                            yyextra->rulesPatternBuffer += yytext;
-                           if (!yyextra->squareCount) BEGIN(RulesPattern) ;
+                           BEGIN(RulesPattern) ;
                         }
 <RulesRoundSquare>"]"        {
-                           yyextra->squareCount--;
                            yyextra->rulesPatternBuffer += yytext;
-                           if (!yyextra->squareCount) BEGIN(RulesRound) ;
+                           BEGIN(RulesRound) ;
                         }
 <RulesSquare,RulesRoundSquare>"\\\\"          {
                            yyextra->rulesPatternBuffer += yytext;
@@ -355,6 +353,28 @@ NONLopt [^\n]*
 <RulesSquare,RulesRoundSquare>.          {
                            yyextra->rulesPatternBuffer += yytext;
                         }
+<RulesPattern>{StartRoundQuest} {
+                           yyextra->rulesPatternBuffer += yytext;
+                           yyextra->lastContext = YY_START;
+                           BEGIN(RulesRoundQuest);
+                         }
+<RulesRoundQuest>{nl}    {
+                           yyextra->rulesPatternBuffer += yytext;
+                           if (!yyextra->rulesPatternBuffer.isEmpty())
+                           {
+                             startFontClass(yyscanner,"stringliteral");
+                             codifyLines(yyscanner,yyextra->rulesPatternBuffer.data());
+                             yyextra->rulesPatternBuffer.resize(0);
+                             endFontClass(yyscanner);
+                           }
+                         }
+<RulesRoundQuest>[^)]    {
+                           yyextra->rulesPatternBuffer += yytext;
+                         }
+<RulesRoundQuest>")"     {
+                           yyextra->rulesPatternBuffer += yytext;
+                           BEGIN(yyextra->lastContext);
+                         }
 <RulesPattern>{StartRound} {
                            yyextra->roundCount++;
                            yyextra->rulesPatternBuffer += yytext;
@@ -365,7 +385,6 @@ NONLopt [^\n]*
                            yyextra->rulesPatternBuffer += yytext;
                         }
 <RulesRound>{StartSquare} {
-                           yyextra->squareCount++;
                            yyextra->rulesPatternBuffer += yytext;
                            BEGIN(RulesRoundSquare);
                         }
@@ -373,8 +392,7 @@ NONLopt [^\n]*
                            yyextra->rulesPatternBuffer += yytext;
                            BEGIN(RulesRoundDouble);
                         }
-<RulesRound>"\\("       |
-<RulesRound>"\\)"       {
+<RulesRound>{EscapeRulesChar} {
                            yyextra->rulesPatternBuffer += yytext;
                         }
 <RulesRound>"("         {
@@ -386,6 +404,13 @@ NONLopt [^\n]*
                            yyextra->rulesPatternBuffer += yytext;
                            if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ;
                         }
+<RulesRound>{nl}        {
+                           yyextra->rulesPatternBuffer += yytext;
+                           yyextra->yyLineNr++;
+                        }
+<RulesRound>{ws}        {
+                           yyextra->rulesPatternBuffer += yytext;
+                        }
 <RulesRound>.           {
                            yyextra->rulesPatternBuffer += yytext;
                         }
@@ -890,9 +915,10 @@ NONLopt [^\n]*
                             yyextra->CCodeBuffer += yytext;
                             yyextra->yyLineNr++;
                           }
-  /*
+ /*
 <*>.  { fprintf(stderr,"Lex code scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);}
-  */
+<*>{nl}  { fprintf(stderr,"Lex code scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext); yyextra->yyLineNr++;}
+ */
 <*><<EOF>>                {
                             handleCCode(yyscanner);
                             yyterminate();
diff --git a/src/lexscanner.l b/src/lexscanner.l
index 47d3443..fcb2a82 100644
--- a/src/lexscanner.l
+++ b/src/lexscanner.l
@@ -76,7 +76,6 @@ struct lexscannerYY_state
   QCString         prefix = "yy";
   QCString         CCodeBuffer;
   int              roundCount = 0;
-  int              squareCount = 0;
 
   QCString         yyFileName;
   ClangTUParser   *clangParser = 0;
@@ -111,12 +110,13 @@ LiteralEnd      "%}"{nl}
 OptPrefix       "%option"{ws}+"prefix"{ws}*"="{ws}*
 RulesStart      "%%"{nl}
 RulesEnd        "%%"{nl}
-RulesSharp      "<"[^>]*">"
+RulesSharp      "<"[^>\n]*">"
 RulesCurly      "{"[^{}\n]*"}"
 StartSquare     "["
 StartDouble     "\""
 StartRound      "("
-EscapeRulesCharOpen  "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ "
+StartRoundQuest "(?"
+EscapeRulesCharOpen  "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\"
 EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)"
 EscapeRulesChar      {EscapeRulesCharOpen}|{EscapeRulesCharClose}
 
@@ -131,14 +131,14 @@ CODE      [cC][oO][dD][eE]
 RAWBEGIN  (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"("
 RAWEND    ")"[^ \t\(\)\\]{0,16}\"
 CHARLIT   (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'"))
-
+CHARCE    "[:"[^:]*":]"
   /* no comment start / end signs inside square brackets */
 NCOMM [^/\*]
-  // C start comment 
+  // C start comment
 CCS   "/\*"
   // C end comment
 CCE   "*\/"
-  // Cpp comment 
+  // Cpp comment
 CPPC  "/\/"
   // doxygen start comment
 DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/")
@@ -158,6 +158,7 @@ NONLopt [^\n]*
 %x RulesSquare
 %x RulesRoundSquare
 %x RulesRound
+%x RulesRoundQuest
 %x UserSection
 
 %x TopSection
@@ -309,28 +310,24 @@ NONLopt [^\n]*
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
 <RulesPattern>{StartSquare} {
-                           yyextra->squareCount++;
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                            yyextra->lastContext = YY_START;
                            BEGIN(RulesSquare);
                         }
-<RulesSquare,RulesRoundSquare>"\\[" |
-<RulesSquare,RulesRoundSquare>"\\]" {
+<RulesSquare,RulesRoundSquare>{CHARCE} {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
-<RulesSquare,RulesRoundSquare>"[" {
-                           yyextra->squareCount++;
+<RulesSquare,RulesRoundSquare>"\\[" |
+<RulesSquare,RulesRoundSquare>"\\]" {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
 <RulesSquare>"]"        {
-                           yyextra->squareCount--;
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
-                           if (!yyextra->squareCount) BEGIN(RulesPattern);
+                           BEGIN(RulesPattern);
                         }
 <RulesRoundSquare>"]"   {
-                           yyextra->squareCount--;
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
-                           if (!yyextra->squareCount) BEGIN(RulesRound) ;
+                           BEGIN(RulesRound) ;
                         }
 <RulesSquare,RulesRoundSquare>"\\\\" {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
@@ -338,6 +335,21 @@ NONLopt [^\n]*
 <RulesSquare,RulesRoundSquare>. {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
+<RulesPattern>{StartRoundQuest} {
+                           yyextra->CCodeBuffer += repeatChar(' ', yyleng);
+                           yyextra->lastContext = YY_START;
+                           BEGIN(RulesRoundQuest);
+                         }
+<RulesRoundQuest>{nl}    {
+                           yyextra->CCodeBuffer += "\n";
+                         }
+<RulesRoundQuest>[^)]    {
+                           yyextra->CCodeBuffer += repeatChar(' ', yyleng);
+                         }
+<RulesRoundQuest>")"     {
+                           yyextra->CCodeBuffer += repeatChar(' ', yyleng);
+                           BEGIN(yyextra->lastContext);
+                         }
 <RulesPattern>{StartRound} {
                            yyextra->roundCount++;
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
@@ -348,7 +360,6 @@ NONLopt [^\n]*
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
 <RulesRound>{StartSquare} {
-                           yyextra->squareCount++;
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                            BEGIN(RulesRoundSquare);
                         }
@@ -356,8 +367,7 @@ NONLopt [^\n]*
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                            BEGIN(RulesRoundDouble);
                         }
-<RulesRound>"\\("       |
-<RulesRound>"\\)"       {
+<RulesRound>{EscapeRulesChar} {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
 <RulesRound>"("         {
@@ -369,6 +379,12 @@ NONLopt [^\n]*
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                            if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ;
                         }
+<RulesRound>{nl}        {
+                           yyextra->CCodeBuffer += "\n";
+                        }
+<RulesRound>{ws}        {
+                           yyextra->CCodeBuffer += repeatChar(' ', yyleng);
+                        }
 <RulesRound>.           {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                         }
@@ -391,7 +407,7 @@ NONLopt [^\n]*
 <RulesPattern>"\\\\"     {
                            yyextra->CCodeBuffer += repeatChar(' ', yyleng);
                          }
-<RulesPattern>{CCS}       {
+<RulesPattern>{CCS}      {
                            yyextra->CCodeBuffer += yytext;
                            yyextra->lastContext = YY_START;
                            BEGIN(COMMENT);
@@ -835,9 +851,11 @@ NONLopt [^\n]*
                             yyextra->CCodeBuffer += yytext;
                           }
 
-  /*
+
+ /*
 <*>.  { fprintf(stderr,"Lex scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);}
-  */
+<*>{nl}  { fprintf(stderr,"Lex scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext);}
+ */
 <*><<EOF>>                {
                             handleCCode(yyscanner);
                             yyterminate();
-- 
cgit v0.12