From 36332ecdec1f40cec2f8d8b902da4f9a018adb43 Mon Sep 17 00:00:00 2001 From: albert-github Date: Wed, 10 Mar 2021 11:40:18 +0100 Subject: Better handling of rules in lex scanners - handling of square brackets, an opening square bracket inside a pair of square brackets has no special meaning unless followed by a colon (resulting in `[:...:]`. - handling of sharp brackets so that `<<]*">" +RulesSharp "<"[^>\n]*">" RulesCurly "{"[^{}\n]*"}" StartSquare "[" StartDouble "\"" StartRound "(" -EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ " +StartRoundQuest "(?" +EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\" EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)" EscapeRulesChar {EscapeRulesCharOpen}|{EscapeRulesCharClose} @@ -134,14 +134,15 @@ CODE [cC][oO][dD][eE] RAWBEGIN (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"(" RAWEND ")"[^ \t\(\)\\]{0,16}\" CHARLIT (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'")) +CHARCE "[:"[^:]*":]" /* no comment start / end signs inside square brackets */ NCOMM [^/\*] - // C start comment + // C start comment CCS "/\*" // C end comment CCE "*\/" - // Cpp comment + // Cpp comment CPPC "/\/" // doxygen start comment DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/") @@ -160,6 +161,7 @@ NONLopt [^\n]* %x RulesSquare %x RulesRoundSquare %x RulesRound +%x RulesRoundQuest %x UserSection %x TopSection @@ -326,28 +328,24 @@ NONLopt [^\n]* yyextra->rulesPatternBuffer += yytext; } {StartSquare} { - yyextra->squareCount++; yyextra->rulesPatternBuffer += yytext; yyextra->lastContext = YY_START; BEGIN(RulesSquare); } -"\\[" | -"\\]" { +{CHARCE} { yyextra->rulesPatternBuffer += yytext; } -"[" { - yyextra->squareCount++; +"\\[" | +"\\]" { yyextra->rulesPatternBuffer += yytext; } "]" { - yyextra->squareCount--; yyextra->rulesPatternBuffer += yytext; - if (!yyextra->squareCount) BEGIN(RulesPattern) ; + BEGIN(RulesPattern) ; } "]" { - yyextra->squareCount--; yyextra->rulesPatternBuffer += yytext; - if (!yyextra->squareCount) BEGIN(RulesRound) ; + BEGIN(RulesRound) ; } "\\\\" { yyextra->rulesPatternBuffer += yytext; @@ -355,6 +353,28 @@ NONLopt [^\n]* . { yyextra->rulesPatternBuffer += yytext; } +{StartRoundQuest} { + yyextra->rulesPatternBuffer += yytext; + yyextra->lastContext = YY_START; + BEGIN(RulesRoundQuest); + } +{nl} { + yyextra->rulesPatternBuffer += yytext; + if (!yyextra->rulesPatternBuffer.isEmpty()) + { + startFontClass(yyscanner,"stringliteral"); + codifyLines(yyscanner,yyextra->rulesPatternBuffer.data()); + yyextra->rulesPatternBuffer.resize(0); + endFontClass(yyscanner); + } + } +[^)] { + yyextra->rulesPatternBuffer += yytext; + } +")" { + yyextra->rulesPatternBuffer += yytext; + BEGIN(yyextra->lastContext); + } {StartRound} { yyextra->roundCount++; yyextra->rulesPatternBuffer += yytext; @@ -365,7 +385,6 @@ NONLopt [^\n]* yyextra->rulesPatternBuffer += yytext; } {StartSquare} { - yyextra->squareCount++; yyextra->rulesPatternBuffer += yytext; BEGIN(RulesRoundSquare); } @@ -373,8 +392,7 @@ NONLopt [^\n]* yyextra->rulesPatternBuffer += yytext; BEGIN(RulesRoundDouble); } -"\\(" | -"\\)" { +{EscapeRulesChar} { yyextra->rulesPatternBuffer += yytext; } "(" { @@ -386,6 +404,13 @@ NONLopt [^\n]* yyextra->rulesPatternBuffer += yytext; if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ; } +{nl} { + yyextra->rulesPatternBuffer += yytext; + yyextra->yyLineNr++; + } +{ws} { + yyextra->rulesPatternBuffer += yytext; + } . { yyextra->rulesPatternBuffer += yytext; } @@ -890,9 +915,10 @@ NONLopt [^\n]* yyextra->CCodeBuffer += yytext; yyextra->yyLineNr++; } - /* + /* <*>. { fprintf(stderr,"Lex code scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);} - */ +<*>{nl} { fprintf(stderr,"Lex code scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext); yyextra->yyLineNr++;} + */ <*><> { handleCCode(yyscanner); yyterminate(); diff --git a/src/lexscanner.l b/src/lexscanner.l index 47d3443..fcb2a82 100644 --- a/src/lexscanner.l +++ b/src/lexscanner.l @@ -76,7 +76,6 @@ struct lexscannerYY_state QCString prefix = "yy"; QCString CCodeBuffer; int roundCount = 0; - int squareCount = 0; QCString yyFileName; ClangTUParser *clangParser = 0; @@ -111,12 +110,13 @@ LiteralEnd "%}"{nl} OptPrefix "%option"{ws}+"prefix"{ws}*"="{ws}* RulesStart "%%"{nl} RulesEnd "%%"{nl} -RulesSharp "<"[^>]*">" +RulesSharp "<"[^>\n]*">" RulesCurly "{"[^{}\n]*"}" StartSquare "[" StartDouble "\"" StartRound "(" -EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ " +StartRoundQuest "(?" +EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\" EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)" EscapeRulesChar {EscapeRulesCharOpen}|{EscapeRulesCharClose} @@ -131,14 +131,14 @@ CODE [cC][oO][dD][eE] RAWBEGIN (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"(" RAWEND ")"[^ \t\(\)\\]{0,16}\" CHARLIT (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'")) - +CHARCE "[:"[^:]*":]" /* no comment start / end signs inside square brackets */ NCOMM [^/\*] - // C start comment + // C start comment CCS "/\*" // C end comment CCE "*\/" - // Cpp comment + // Cpp comment CPPC "/\/" // doxygen start comment DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/") @@ -158,6 +158,7 @@ NONLopt [^\n]* %x RulesSquare %x RulesRoundSquare %x RulesRound +%x RulesRoundQuest %x UserSection %x TopSection @@ -309,28 +310,24 @@ NONLopt [^\n]* yyextra->CCodeBuffer += repeatChar(' ', yyleng); } {StartSquare} { - yyextra->squareCount++; yyextra->CCodeBuffer += repeatChar(' ', yyleng); yyextra->lastContext = YY_START; BEGIN(RulesSquare); } -"\\[" | -"\\]" { +{CHARCE} { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } -"[" { - yyextra->squareCount++; +"\\[" | +"\\]" { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } "]" { - yyextra->squareCount--; yyextra->CCodeBuffer += repeatChar(' ', yyleng); - if (!yyextra->squareCount) BEGIN(RulesPattern); + BEGIN(RulesPattern); } "]" { - yyextra->squareCount--; yyextra->CCodeBuffer += repeatChar(' ', yyleng); - if (!yyextra->squareCount) BEGIN(RulesRound) ; + BEGIN(RulesRound) ; } "\\\\" { yyextra->CCodeBuffer += repeatChar(' ', yyleng); @@ -338,6 +335,21 @@ NONLopt [^\n]* . { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } +{StartRoundQuest} { + yyextra->CCodeBuffer += repeatChar(' ', yyleng); + yyextra->lastContext = YY_START; + BEGIN(RulesRoundQuest); + } +{nl} { + yyextra->CCodeBuffer += "\n"; + } +[^)] { + yyextra->CCodeBuffer += repeatChar(' ', yyleng); + } +")" { + yyextra->CCodeBuffer += repeatChar(' ', yyleng); + BEGIN(yyextra->lastContext); + } {StartRound} { yyextra->roundCount++; yyextra->CCodeBuffer += repeatChar(' ', yyleng); @@ -348,7 +360,6 @@ NONLopt [^\n]* yyextra->CCodeBuffer += repeatChar(' ', yyleng); } {StartSquare} { - yyextra->squareCount++; yyextra->CCodeBuffer += repeatChar(' ', yyleng); BEGIN(RulesRoundSquare); } @@ -356,8 +367,7 @@ NONLopt [^\n]* yyextra->CCodeBuffer += repeatChar(' ', yyleng); BEGIN(RulesRoundDouble); } -"\\(" | -"\\)" { +{EscapeRulesChar} { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } "(" { @@ -369,6 +379,12 @@ NONLopt [^\n]* yyextra->CCodeBuffer += repeatChar(' ', yyleng); if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ; } +{nl} { + yyextra->CCodeBuffer += "\n"; + } +{ws} { + yyextra->CCodeBuffer += repeatChar(' ', yyleng); + } . { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } @@ -391,7 +407,7 @@ NONLopt [^\n]* "\\\\" { yyextra->CCodeBuffer += repeatChar(' ', yyleng); } -{CCS} { +{CCS} { yyextra->CCodeBuffer += yytext; yyextra->lastContext = YY_START; BEGIN(COMMENT); @@ -835,9 +851,11 @@ NONLopt [^\n]* yyextra->CCodeBuffer += yytext; } - /* + + /* <*>. { fprintf(stderr,"Lex scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);} - */ +<*>{nl} { fprintf(stderr,"Lex scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext);} + */ <*><> { handleCCode(yyscanner); yyterminate(); -- cgit v0.12