diff options
author | Brad King <brad.king@kitware.com> | 2013-10-14 19:13:11 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2013-10-17 13:06:59 (GMT) |
commit | dbd933365ec780d27ab7c0dfba30dc1af1094607 (patch) | |
tree | 2fd61c1a48fbf8a0cea360400529e1abb588ac11 /Source/cmListFileLexer.in.l | |
parent | 56457837e28de29d4f94b0cc9c47ef314d8f05e1 (diff) | |
download | CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.zip CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.gz CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.bz2 |
cmListFileLexer: Allow a leading UTF-8 Byte-Order-Mark (#11137)
Teach the lexer to read a UTF-8, UTF-16 BE/LE, or UTF-32 BE/LE
Byte-Order-Mark from the start of a file if any is present. Report an
error on files using UTF-16 or UTF-32 and accept a UTF-8 or missing BOM.
Diffstat (limited to 'Source/cmListFileLexer.in.l')
-rw-r--r-- | Source/cmListFileLexer.in.l | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/Source/cmListFileLexer.in.l b/Source/cmListFileLexer.in.l index 89f2917..a660d37 100644 --- a/Source/cmListFileLexer.in.l +++ b/Source/cmListFileLexer.in.l @@ -328,19 +328,68 @@ cmListFileLexer* cmListFileLexer_New() /*--------------------------------------------------------------------------*/ void cmListFileLexer_Delete(cmListFileLexer* lexer) { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); free(lexer); } /*--------------------------------------------------------------------------*/ -int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name) +static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f) +{ + unsigned char b[2]; + if(fread(b, 1, 2, f) == 2) + { + if(b[0] == 0xEF && b[1] == 0xBB) + { + if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF) + { + return cmListFileLexer_BOM_UTF8; + } + } + else if(b[0] == 0xFE && b[1] == 0xFF) + { + /* UTF-16 BE */ + return cmListFileLexer_BOM_UTF16BE; + } + else if(b[0] == 0 && b[1] == 0) + { + if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) + { + return cmListFileLexer_BOM_UTF32BE; + } + } + else if(b[0] == 0xFF && b[1] == 0xFE) + { + fpos_t p; + fgetpos(f, &p); + if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0) + { + return cmListFileLexer_BOM_UTF32LE; + } + fsetpos(f, &p); + return cmListFileLexer_BOM_UTF16LE; + } + } + rewind(f); + return cmListFileLexer_BOM_None; +} + +/*--------------------------------------------------------------------------*/ +int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name, + cmListFileLexer_BOM* bom) { int result = 1; cmListFileLexerDestroy(lexer); if(name) { lexer->file = fopen(name, "r"); - if(!lexer->file) + if(lexer->file) + { + if(bom) + { + *bom = cmListFileLexer_ReadBOM(lexer->file); + } + } + else { result = 0; } @@ -386,7 +435,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer) } else { - cmListFileLexer_SetFileName(lexer, 0); + cmListFileLexer_SetFileName(lexer, 0, 0); return 0; } } |