diff options
author | Brad King <brad.king@kitware.com> | 2013-10-14 19:13:11 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2013-10-17 13:06:59 (GMT) |
commit | dbd933365ec780d27ab7c0dfba30dc1af1094607 (patch) | |
tree | 2fd61c1a48fbf8a0cea360400529e1abb588ac11 /Source | |
parent | 56457837e28de29d4f94b0cc9c47ef314d8f05e1 (diff) | |
download | CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.zip CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.gz CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.bz2 |
cmListFileLexer: Allow a leading UTF-8 Byte-Order-Mark (#11137)
Teach the lexer to read a UTF-8, UTF-16 BE/LE, or UTF-32 BE/LE
Byte-Order-Mark from the start of a file if any is present. Report an
error on files using UTF-16 or UTF-32 and accept a UTF-8 or missing BOM.
Diffstat (limited to 'Source')
-rw-r--r-- | Source/cmListFileCache.cxx | 15 | ||||
-rw-r--r-- | Source/cmListFileLexer.c | 57 | ||||
-rw-r--r-- | Source/cmListFileLexer.h | 14 | ||||
-rw-r--r-- | Source/cmListFileLexer.in.l | 57 |
4 files changed, 133 insertions, 10 deletions
diff --git a/Source/cmListFileCache.cxx b/Source/cmListFileCache.cxx index 898f379..f6ea4b1 100644 --- a/Source/cmListFileCache.cxx +++ b/Source/cmListFileCache.cxx @@ -57,13 +57,26 @@ cmListFileParser::~cmListFileParser() bool cmListFileParser::ParseFile() { // Open the file. - if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName)) + cmListFileLexer_BOM bom; + if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName, &bom)) { cmSystemTools::Error("cmListFileCache: error can not open file ", this->FileName); return false; } + // Verify the Byte-Order-Mark, if any. + if(bom != cmListFileLexer_BOM_None && + bom != cmListFileLexer_BOM_UTF8) + { + cmListFileLexer_SetFileName(this->Lexer, 0, 0); + cmOStringStream m; + m << "File\n " << this->FileName << "\n" + << "starts with a Byte-Order-Mark that is not UTF-8."; + this->Makefile->IssueMessage(cmake::FATAL_ERROR, m.str()); + return false; + } + // Use a simple recursive-descent parser to process the token // stream. bool haveNewline = true; diff --git a/Source/cmListFileLexer.c b/Source/cmListFileLexer.c index ad5a83d..394bd17 100644 --- a/Source/cmListFileLexer.c +++ b/Source/cmListFileLexer.c @@ -2307,19 +2307,68 @@ cmListFileLexer* cmListFileLexer_New() /*--------------------------------------------------------------------------*/ void cmListFileLexer_Delete(cmListFileLexer* lexer) { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); free(lexer); } /*--------------------------------------------------------------------------*/ -int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name) +static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f) +{ + unsigned char b[2]; + if(fread(b, 1, 2, f) == 2) + { + if(b[0] == 0xEF && b[1] == 0xBB) + { + if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF) + { + return cmListFileLexer_BOM_UTF8; + } + } + else if(b[0] == 0xFE && b[1] == 0xFF) + { + /* UTF-16 BE */ + return cmListFileLexer_BOM_UTF16BE; + } + else if(b[0] == 0 && b[1] == 0) + { + if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) + { + return cmListFileLexer_BOM_UTF32BE; + } + } + else if(b[0] == 0xFF && b[1] == 0xFE) + { + fpos_t p; + fgetpos(f, &p); + if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) + { + return cmListFileLexer_BOM_UTF32LE; + } + fsetpos(f, &p); + return cmListFileLexer_BOM_UTF16LE; + } + } + rewind(f); + return cmListFileLexer_BOM_None; +} + +/*--------------------------------------------------------------------------*/ +int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name, + cmListFileLexer_BOM* bom) { int result = 1; cmListFileLexerDestroy(lexer); if(name) { lexer->file = fopen(name, "r"); - if(!lexer->file) + if(lexer->file) + { + if(bom) + { + *bom = cmListFileLexer_ReadBOM(lexer->file); + } + } + else { result = 0; } @@ -2365,7 +2414,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer) } else { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); return 0; } } diff --git a/Source/cmListFileLexer.h b/Source/cmListFileLexer.h index cc78b5c..719347c 100644 --- a/Source/cmListFileLexer.h +++ b/Source/cmListFileLexer.h @@ -36,6 +36,17 @@ struct cmListFileLexer_Token_s int column; }; +enum cmListFileLexer_BOM_e +{ + cmListFileLexer_BOM_None, + cmListFileLexer_BOM_UTF8, + cmListFileLexer_BOM_UTF16BE, + cmListFileLexer_BOM_UTF16LE, + cmListFileLexer_BOM_UTF32BE, + cmListFileLexer_BOM_UTF32LE +}; +typedef enum cmListFileLexer_BOM_e cmListFileLexer_BOM; + typedef struct cmListFileLexer_s cmListFileLexer; #ifdef __cplusplus @@ -44,7 +55,8 @@ extern "C" #endif cmListFileLexer* cmListFileLexer_New(); -int cmListFileLexer_SetFileName(cmListFileLexer*, const char*); +int cmListFileLexer_SetFileName(cmListFileLexer*, const char*, + cmListFileLexer_BOM* bom); int cmListFileLexer_SetString(cmListFileLexer*, const char*); cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer*); long cmListFileLexer_GetCurrentLine(cmListFileLexer*); diff --git a/Source/cmListFileLexer.in.l b/Source/cmListFileLexer.in.l index 89f2917..a660d37 100644 --- a/Source/cmListFileLexer.in.l +++ b/Source/cmListFileLexer.in.l @@ -328,19 +328,68 @@ cmListFileLexer* cmListFileLexer_New() /*--------------------------------------------------------------------------*/ void cmListFileLexer_Delete(cmListFileLexer* lexer) { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); free(lexer); } /*--------------------------------------------------------------------------*/ -int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name) +static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f) +{ + unsigned char b[2]; + if(fread(b, 1, 2, f) == 2) + { + if(b[0] == 0xEF && b[1] == 0xBB) + { + if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF) + { + return cmListFileLexer_BOM_UTF8; + } + } + else if(b[0] == 0xFE && b[1] == 0xFF) + { + /* UTF-16 BE */ + return cmListFileLexer_BOM_UTF16BE; + } + else if(b[0] == 0 && b[1] == 0) + { + if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) + { + return cmListFileLexer_BOM_UTF32BE; + } + } + else if(b[0] == 0xFF && b[1] == 0xFE) + { + fpos_t p; + fgetpos(f, &p); + if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) + { + return cmListFileLexer_BOM_UTF32LE; + } + fsetpos(f, &p); + return cmListFileLexer_BOM_UTF16LE; + } + } + rewind(f); + return cmListFileLexer_BOM_None; +} + +/*--------------------------------------------------------------------------*/ +int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name, + cmListFileLexer_BOM* bom) { int result = 1; cmListFileLexerDestroy(lexer); if(name) { lexer->file = fopen(name, "r"); - if(!lexer->file) + if(lexer->file) + { + if(bom) + { + *bom = cmListFileLexer_ReadBOM(lexer->file); + } + } + else { result = 0; } @@ -386,7 +435,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer) } else { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); return 0; } } |