summaryrefslogtreecommitdiffstats
path: root/Source/cmListFileLexer.in.l
diff options
context:
space:
mode:
Diffstat (limited to 'Source/cmListFileLexer.in.l')
-rw-r--r--Source/cmListFileLexer.in.l202
1 files changed, 188 insertions, 14 deletions
diff --git a/Source/cmListFileLexer.in.l b/Source/cmListFileLexer.in.l
index 12b53ee..ed4bf6b 100644
--- a/Source/cmListFileLexer.in.l
+++ b/Source/cmListFileLexer.in.l
@@ -31,6 +31,9 @@ Modify cmListFileLexer.c:
*/
#include "cmStandardLexer.h"
+#ifdef WIN32
+#include <cmsys/Encoding.h>
+#endif
/* Setup the proper cmListFileLexer_yylex declaration. */
#define YY_EXTRA_TYPE cmListFileLexer*
@@ -42,10 +45,13 @@ Modify cmListFileLexer.c:
struct cmListFileLexer_s
{
cmListFileLexer_Token token;
+ int bracket;
+ int comment;
int line;
int column;
int size;
FILE* file;
+ size_t cr;
char* string_buffer;
char* string_position;
int string_left;
@@ -74,22 +80,57 @@ static void cmListFileLexerDestroy(cmListFileLexer* lexer);
%option noyywrap
%pointer
%x STRING
+%x BRACKET
+%x BRACKETEND
+%x COMMENT
MAKEVAR \$\([A-Za-z0-9_]*\)
-UNQUOTED ([^ \t\r\n\(\)#\\\"]|\\.)
-LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\"
+UNQUOTED ([^ \t\r\n\(\)#\\\"[=]|\\.)
+LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\"
%%
-\n {
+<INITIAL,COMMENT>\n {
lexer->token.type = cmListFileLexer_Token_Newline;
cmListFileLexerSetToken(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
+ BEGIN(INITIAL);
return 1;
}
-#.* {
+#?\[=*\[\n? {
+ const char* bracket = yytext;
+ lexer->comment = yytext[0] == '#';
+ if(lexer->comment)
+ {
+ lexer->token.type = cmListFileLexer_Token_CommentBracket;
+ bracket += 1;
+ }
+ else
+ {
+ lexer->token.type = cmListFileLexer_Token_ArgumentBracket;
+ }
+ cmListFileLexerSetToken(lexer, "", 0);
+ lexer->bracket = (int)(strchr(bracket+1, '[') - bracket);
+ if(yytext[yyleng-1] == '\n')
+ {
+ ++lexer->line;
+ lexer->column = 1;
+ }
+ else
+ {
+ lexer->column += yyleng;
+ }
+ BEGIN(BRACKET);
+}
+
+# {
+ lexer->column += yyleng;
+ BEGIN(COMMENT);
+}
+
+<COMMENT>.* {
lexer->column += yyleng;
}
@@ -107,21 +148,64 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\"
return 1;
}
-[A-Za-z_][A-Za-z0-9_]+ {
+[A-Za-z_][A-Za-z0-9_]* {
lexer->token.type = cmListFileLexer_Token_Identifier;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
-({UNQUOTED})({UNQUOTED})* {
+<BRACKET>\]=* {
+ /* Handle ]]====]=======]*/
+ cmListFileLexerAppend(lexer, yytext, yyleng);
+ lexer->column += yyleng;
+ if(yyleng == lexer->bracket)
+ {
+ BEGIN(BRACKETEND);
+ }
+}
+
+<BRACKETEND>\] {
+ lexer->column += yyleng;
+ /* Erase the partial bracket from the token. */
+ lexer->token.length -= lexer->bracket;
+ lexer->token.text[lexer->token.length] = 0;
+ BEGIN(INITIAL);
+ return 1;
+}
+
+<BRACKET>([^]\n])+ {
+ cmListFileLexerAppend(lexer, yytext, yyleng);
+ lexer->column += yyleng;
+}
+
+<BRACKET,BRACKETEND>\n {
+ cmListFileLexerAppend(lexer, yytext, yyleng);
+ ++lexer->line;
+ lexer->column = 1;
+ BEGIN(BRACKET);
+}
+
+<BRACKET,BRACKETEND>. {
+ cmListFileLexerAppend(lexer, yytext, yyleng);
+ lexer->column += yyleng;
+ BEGIN(BRACKET);
+}
+
+<BRACKET,BRACKETEND><<EOF>> {
+ lexer->token.type = cmListFileLexer_Token_BadBracket;
+ BEGIN(INITIAL);
+ return 1;
+}
+
+({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* {
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
-({MAKEVAR}|{UNQUOTED})({LEGACY})* {
+({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* {
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
@@ -141,7 +225,7 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\"
}
<STRING>\\\n {
- cmListFileLexerAppend(lexer, yytext, yyleng);
+ /* Continuation: text is not part of string */
++lexer->line;
lexer->column = 1;
}
@@ -264,7 +348,38 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
{
if(lexer->file)
{
- return (int)fread(buffer, 1, bufferSize, lexer->file);
+ /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode
+ does not convert newlines on all platforms. Move any
+ trailing CR to the start of the buffer for the next read. */
+ size_t cr = lexer->cr;
+ size_t n;
+ buffer[0] = '\r';
+ n = fread(buffer+cr, 1, bufferSize-cr, lexer->file);
+ if(n)
+ {
+ char* o = buffer;
+ const char* i = buffer;
+ const char* e;
+ n += cr;
+ cr = (buffer[n-1] == '\r')? 1:0;
+ e = buffer + n - cr;
+ while(i != e)
+ {
+ if(i[0] == '\r' && i[1] == '\n')
+ {
+ ++i;
+ }
+ *o++ = *i++;
+ }
+ n = o - buffer;
+ }
+ else
+ {
+ n = cr;
+ cr = 0;
+ }
+ lexer->cr = cr;
+ return n;
}
else if(lexer->string_left)
{
@@ -292,6 +407,7 @@ static void cmListFileLexerInit(cmListFileLexer* lexer)
/*--------------------------------------------------------------------------*/
static void cmListFileLexerDestroy(cmListFileLexer* lexer)
{
+ cmListFileLexerSetToken(lexer, 0, 0);
if(lexer->file || lexer->string_buffer)
{
cmListFileLexer_yylex_destroy(lexer->scanner);
@@ -327,19 +443,74 @@ cmListFileLexer* cmListFileLexer_New()
/*--------------------------------------------------------------------------*/
void cmListFileLexer_Delete(cmListFileLexer* lexer)
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
free(lexer);
}
/*--------------------------------------------------------------------------*/
-int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name)
+static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
+{
+ unsigned char b[2];
+ if(fread(b, 1, 2, f) == 2)
+ {
+ if(b[0] == 0xEF && b[1] == 0xBB)
+ {
+ if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF)
+ {
+ return cmListFileLexer_BOM_UTF8;
+ }
+ }
+ else if(b[0] == 0xFE && b[1] == 0xFF)
+ {
+ /* UTF-16 BE */
+ return cmListFileLexer_BOM_UTF16BE;
+ }
+ else if(b[0] == 0 && b[1] == 0)
+ {
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF)
+ {
+ return cmListFileLexer_BOM_UTF32BE;
+ }
+ }
+ else if(b[0] == 0xFF && b[1] == 0xFE)
+ {
+ fpos_t p;
+ fgetpos(f, &p);
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0)
+ {
+ return cmListFileLexer_BOM_UTF32LE;
+ }
+ fsetpos(f, &p);
+ return cmListFileLexer_BOM_UTF16LE;
+ }
+ }
+ rewind(f);
+ return cmListFileLexer_BOM_None;
+}
+
+/*--------------------------------------------------------------------------*/
+int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
+ cmListFileLexer_BOM* bom)
{
int result = 1;
cmListFileLexerDestroy(lexer);
if(name)
{
- lexer->file = fopen(name, "r");
- if(!lexer->file)
+#ifdef _WIN32
+ wchar_t* wname = cmsysEncoding_DupToWide(name);
+ lexer->file = _wfopen(wname, L"rb");
+ free(wname);
+#else
+ lexer->file = fopen(name, "rb");
+#endif
+ if(lexer->file)
+ {
+ if(bom)
+ {
+ *bom = cmListFileLexer_ReadBOM(lexer->file);
+ }
+ }
+ else
{
result = 0;
}
@@ -385,7 +556,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
}
else
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
return 0;
}
}
@@ -431,7 +602,10 @@ const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer,
case cmListFileLexer_Token_ParenRight: return "right paren";
case cmListFileLexer_Token_ArgumentUnquoted: return "unquoted argument";
case cmListFileLexer_Token_ArgumentQuoted: return "quoted argument";
+ case cmListFileLexer_Token_ArgumentBracket: return "bracket argument";
+ case cmListFileLexer_Token_CommentBracket: return "bracket comment";
case cmListFileLexer_Token_BadCharacter: return "bad character";
+ case cmListFileLexer_Token_BadBracket: return "unterminated bracket";
case cmListFileLexer_Token_BadString: return "unterminated string";
}
return "unknown token";