From 10c1495dc8984ec3b800c290f1c7448e75478da1 Mon Sep 17 00:00:00 2001 From: Dimitri van Heesch Date: Tue, 28 Jul 2020 09:27:45 +0200 Subject: Better handling of \\ilinebr - Routines to strip leading and trailing whitespace now also take \\ilinebr into account - Added a number of cases in doctokenizer.l where \\ilinebr wasn't handled yet. --- src/commentscan.l | 18 +++++++++++++++++- src/doctokenizer.l | 30 ++++++++++++++++++++++++++---- src/util.cpp | 15 +++++++++------ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/commentscan.l b/src/commentscan.l index 4de7562..3d0ca69 100644 --- a/src/commentscan.l +++ b/src/commentscan.l @@ -2972,7 +2972,23 @@ static void stripTrailingWhiteSpace(QCString &s) uint len = s.length(); int i = (int)len-1; char c; - while (i>=0 && ((c = s.at(i))==' ' || c=='\t' || c=='\r')) i--; + while (i>=0) + { + c = s.at(i); + if (c==' ' || c=='\t' || c=='\r') // normal whitespace + { + i--; + } + else if (c=='r' && i>=7 && qstrncmp("\\ilinebr",s.data()+i-7,8)==0) // special line break marker + { + i-=8; + } + else // non-whitespace + { + break; + } + } + //printf("stripTrailingWhitespace(%s) i=%d len=%d\n",s.data(),i,len); if (i!=(int)len-1) { s.resize(i+2); // string up to and including char at pos i and \0 terminator diff --git a/src/doctokenizer.l b/src/doctokenizer.l index 6ea39d9..51d234b 100644 --- a/src/doctokenizer.l +++ b/src/doctokenizer.l @@ -954,6 +954,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } "&"{ID}";" { /* symbol */ g_token->name = yytext; return TK_SYMBOL; @@ -990,6 +993,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } "&"{ID}";" { /* symbol */ g_token->name = yytext; return TK_SYMBOL; @@ -1017,6 +1023,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } {BLANK}*{ID}{BLANK}*"="{BLANK}* { // title attribute g_token->name = yytext; g_token->name = g_token->name.left(g_token->name.find('=')).stripWhiteSpace(); @@ -1035,6 +1044,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } {LABELID}{WS}? { // anchor g_token->name = QCString(yytext).stripWhiteSpace(); @@ -1064,6 +1076,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } . { // any other character unput(*yytext); return 0; @@ -1091,6 +1106,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} unput(*yytext); return 0; } +"\\ilinebr" { + return 0; + } . { // any other character unput(*yytext); return 0; @@ -1150,7 +1168,7 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} g_token->chars=yytext; return TK_WHITESPACE; } -"\""|\n { /* " or \n => end of title */ +"\""|\n|"\\ilinebr" { /* " or \n => end of title */ return 0; } {LABELID} { @@ -1246,7 +1264,7 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} /* State for skipping title (all chars until the end of the line) */ . -\n { return 0; } +(\n|"\\ilinebr") { return 0; } /* State for the pass used to find the anchors and sections */ @@ -1352,9 +1370,9 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} } [^a-z_A-Z0-9\-\\\@]+ . -\n +(\n|"\\ilinebr") . -\n +(\n|"\\ilinebr") {LABELID} { g_secLabel = yytext; processSection(); @@ -1370,6 +1388,10 @@ REFWORD_NOCV {FILEMASK}|{LABELID}|{REFWORD2_NOCV}|{REFWORD3}|{REFWORD4_NOCV} [^\n]*\n { g_secTitle = yytext; g_secTitle = g_secTitle.stripWhiteSpace(); + if (g_secTitle.right(8)=="\\ilinebr") + { + g_secTitle.left(g_secTitle.length()-8); + } processSection(); BEGIN(St_Sections); } diff --git a/src/util.cpp b/src/util.cpp index 8b3f618..13ae200 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -6612,10 +6612,11 @@ QCString stripLeadingAndTrailingEmptyLines(const QCString &s,int &docLine) // search for leading empty lines int i=0,li=-1,l=s.length(); char c; - while ((c=*p++)) + while ((c=*p)) { - if (c==' ' || c=='\t' || c=='\r') i++; - else if (c=='\n') i++,li=i,docLine++; + if (c==' ' || c=='\t' || c=='\r') i++,p++; + else if (c=='\\' && qstrncmp(p,"\\ilinebr",8)==0) i+=8,li=i,docLine++,p+=8; + else if (c=='\n') i++,li=i,docLine++,p++; else break; } @@ -6624,9 +6625,10 @@ QCString stripLeadingAndTrailingEmptyLines(const QCString &s,int &docLine) p=s.data()+b; while (b>=0) { - c=*p; p--; - if (c==' ' || c=='\t' || c=='\r') b--; - else if (c=='\n') bi=b,b--; + c=*p; + if (c==' ' || c=='\t' || c=='\r') b--,p--; + else if (c=='r' && b>=7 && qstrncmp(p-7,"\\ilinebr",8)==0) bi=b-7,b-=8,p-=8; + else if (c=='\n') bi=b,b--,p--; else break; } @@ -6637,6 +6639,7 @@ QCString stripLeadingAndTrailingEmptyLines(const QCString &s,int &docLine) if (bi==-1) bi=l; if (li==-1) li=0; if (bi<=li) return 0; // only empty lines + //printf("docLine='%s' len=%d li=%d bi=%d\n",s.data(),s.length(),li,bi); return s.mid(li,bi-li); } -- cgit v0.12