diff options
Diffstat (limited to 'Source/cmListFileLexer.in.l')
-rw-r--r-- | Source/cmListFileLexer.in.l | 202 |
1 files changed, 188 insertions, 14 deletions
diff --git a/Source/cmListFileLexer.in.l b/Source/cmListFileLexer.in.l index 12b53ee..ed4bf6b 100644 --- a/Source/cmListFileLexer.in.l +++ b/Source/cmListFileLexer.in.l @@ -31,6 +31,9 @@ Modify cmListFileLexer.c: */ #include "cmStandardLexer.h" +#ifdef WIN32 +#include <cmsys/Encoding.h> +#endif /* Setup the proper cmListFileLexer_yylex declaration. */ #define YY_EXTRA_TYPE cmListFileLexer* @@ -42,10 +45,13 @@ Modify cmListFileLexer.c: struct cmListFileLexer_s { cmListFileLexer_Token token; + int bracket; + int comment; int line; int column; int size; FILE* file; + size_t cr; char* string_buffer; char* string_position; int string_left; @@ -74,22 +80,57 @@ static void cmListFileLexerDestroy(cmListFileLexer* lexer); %option noyywrap %pointer %x STRING +%x BRACKET +%x BRACKETEND +%x COMMENT MAKEVAR \$\([A-Za-z0-9_]*\) -UNQUOTED ([^ \t\r\n\(\)#\\\"]|\\.) -LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\" +UNQUOTED ([^ \t\r\n\(\)#\\\"[=]|\\.) +LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t[=])*\" %% -\n { +<INITIAL,COMMENT>\n { lexer->token.type = cmListFileLexer_Token_Newline; cmListFileLexerSetToken(lexer, yytext, yyleng); ++lexer->line; lexer->column = 1; + BEGIN(INITIAL); return 1; } -#.* { +#?\[=*\[\n? { + const char* bracket = yytext; + lexer->comment = yytext[0] == '#'; + if(lexer->comment) + { + lexer->token.type = cmListFileLexer_Token_CommentBracket; + bracket += 1; + } + else + { + lexer->token.type = cmListFileLexer_Token_ArgumentBracket; + } + cmListFileLexerSetToken(lexer, "", 0); + lexer->bracket = (int)(strchr(bracket+1, '[') - bracket); + if(yytext[yyleng-1] == '\n') + { + ++lexer->line; + lexer->column = 1; + } + else + { + lexer->column += yyleng; + } + BEGIN(BRACKET); +} + +# { + lexer->column += yyleng; + BEGIN(COMMENT); +} + +<COMMENT>.* { lexer->column += yyleng; } @@ -107,21 +148,64 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\" return 1; } -[A-Za-z_][A-Za-z0-9_]+ { +[A-Za-z_][A-Za-z0-9_]* { lexer->token.type = cmListFileLexer_Token_Identifier; cmListFileLexerSetToken(lexer, yytext, yyleng); lexer->column += yyleng; return 1; } -({UNQUOTED})({UNQUOTED})* { +<BRACKET>\]=* { + /* Handle ]]====]=======]*/ + cmListFileLexerAppend(lexer, yytext, yyleng); + lexer->column += yyleng; + if(yyleng == lexer->bracket) + { + BEGIN(BRACKETEND); + } +} + +<BRACKETEND>\] { + lexer->column += yyleng; + /* Erase the partial bracket from the token. */ + lexer->token.length -= lexer->bracket; + lexer->token.text[lexer->token.length] = 0; + BEGIN(INITIAL); + return 1; +} + +<BRACKET>([^]\n])+ { + cmListFileLexerAppend(lexer, yytext, yyleng); + lexer->column += yyleng; +} + +<BRACKET,BRACKETEND>\n { + cmListFileLexerAppend(lexer, yytext, yyleng); + ++lexer->line; + lexer->column = 1; + BEGIN(BRACKET); +} + +<BRACKET,BRACKETEND>. { + cmListFileLexerAppend(lexer, yytext, yyleng); + lexer->column += yyleng; + BEGIN(BRACKET); +} + +<BRACKET,BRACKETEND><<EOF>> { + lexer->token.type = cmListFileLexer_Token_BadBracket; + BEGIN(INITIAL); + return 1; +} + +({UNQUOTED}|=|\[=*{UNQUOTED})({UNQUOTED}|[[=])* { lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted; cmListFileLexerSetToken(lexer, yytext, yyleng); lexer->column += yyleng; return 1; } -({MAKEVAR}|{UNQUOTED})({LEGACY})* { +({MAKEVAR}|{UNQUOTED}|=|\[=*{LEGACY})({LEGACY}|[[=])* { lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted; cmListFileLexerSetToken(lexer, yytext, yyleng); lexer->column += yyleng; @@ -141,7 +225,7 @@ LEGACY {MAKEVAR}|{UNQUOTED}|\"({MAKEVAR}|{UNQUOTED}|[ \t])*\" } <STRING>\\\n { - cmListFileLexerAppend(lexer, yytext, yyleng); + /* Continuation: text is not part of string */ ++lexer->line; lexer->column = 1; } @@ -264,7 +348,38 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer, { if(lexer->file) { - return (int)fread(buffer, 1, bufferSize, lexer->file); + /* Convert CRLF -> LF explicitly. The C FILE "t"ext mode + does not convert newlines on all platforms. Move any + trailing CR to the start of the buffer for the next read. */ + size_t cr = lexer->cr; + size_t n; + buffer[0] = '\r'; + n = fread(buffer+cr, 1, bufferSize-cr, lexer->file); + if(n) + { + char* o = buffer; + const char* i = buffer; + const char* e; + n += cr; + cr = (buffer[n-1] == '\r')? 1:0; + e = buffer + n - cr; + while(i != e) + { + if(i[0] == '\r' && i[1] == '\n') + { + ++i; + } + *o++ = *i++; + } + n = o - buffer; + } + else + { + n = cr; + cr = 0; + } + lexer->cr = cr; + return n; } else if(lexer->string_left) { @@ -292,6 +407,7 @@ static void cmListFileLexerInit(cmListFileLexer* lexer) /*--------------------------------------------------------------------------*/ static void cmListFileLexerDestroy(cmListFileLexer* lexer) { + cmListFileLexerSetToken(lexer, 0, 0); if(lexer->file || lexer->string_buffer) { cmListFileLexer_yylex_destroy(lexer->scanner); @@ -327,19 +443,74 @@ cmListFileLexer* cmListFileLexer_New() /*--------------------------------------------------------------------------*/ void cmListFileLexer_Delete(cmListFileLexer* lexer) { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); free(lexer); } /*--------------------------------------------------------------------------*/ -int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name) +static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f) +{ + unsigned char b[2]; + if(fread(b, 1, 2, f) == 2) + { + if(b[0] == 0xEF && b[1] == 0xBB) + { + if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF) + { + return cmListFileLexer_BOM_UTF8; + } + } + else if(b[0] == 0xFE && b[1] == 0xFF) + { + /* UTF-16 BE */ + return cmListFileLexer_BOM_UTF16BE; + } + else if(b[0] == 0 && b[1] == 0) + { + if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) + { + return cmListFileLexer_BOM_UTF32BE; + } + } + else if(b[0] == 0xFF && b[1] == 0xFE) + { + fpos_t p; + fgetpos(f, &p); + if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) + { + return cmListFileLexer_BOM_UTF32LE; + } + fsetpos(f, &p); + return cmListFileLexer_BOM_UTF16LE; + } + } + rewind(f); + return cmListFileLexer_BOM_None; +} + +/*--------------------------------------------------------------------------*/ +int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name, + cmListFileLexer_BOM* bom) { int result = 1; cmListFileLexerDestroy(lexer); if(name) { - lexer->file = fopen(name, "r"); - if(!lexer->file) +#ifdef _WIN32 + wchar_t* wname = cmsysEncoding_DupToWide(name); + lexer->file = _wfopen(wname, L"rb"); + free(wname); +#else + lexer->file = fopen(name, "rb"); +#endif + if(lexer->file) + { + if(bom) + { + *bom = cmListFileLexer_ReadBOM(lexer->file); + } + } + else { result = 0; } @@ -385,7 +556,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer) } else { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); return 0; } } @@ -431,7 +602,10 @@ const char* cmListFileLexer_GetTypeAsString(cmListFileLexer* lexer, case cmListFileLexer_Token_ParenRight: return "right paren"; case cmListFileLexer_Token_ArgumentUnquoted: return "unquoted argument"; case cmListFileLexer_Token_ArgumentQuoted: return "quoted argument"; + case cmListFileLexer_Token_ArgumentBracket: return "bracket argument"; + case cmListFileLexer_Token_CommentBracket: return "bracket comment"; case cmListFileLexer_Token_BadCharacter: return "bad character"; + case cmListFileLexer_Token_BadBracket: return "unterminated bracket"; case cmListFileLexer_Token_BadString: return "unterminated string"; } return "unknown token"; |