summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2013-10-14 19:13:11 (GMT)
committerBrad King <brad.king@kitware.com>2013-10-17 13:06:59 (GMT)
commitdbd933365ec780d27ab7c0dfba30dc1af1094607 (patch)
tree2fd61c1a48fbf8a0cea360400529e1abb588ac11
parent56457837e28de29d4f94b0cc9c47ef314d8f05e1 (diff)
downloadCMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.zip
CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.gz
CMake-dbd933365ec780d27ab7c0dfba30dc1af1094607.tar.bz2
cmListFileLexer: Allow a leading UTF-8 Byte-Order-Mark (#11137)
Teach the lexer to read a UTF-8, UTF-16 BE/LE, or UTF-32 BE/LE Byte-Order-Mark from the start of a file if any is present. Report an error on files using UTF-16 or UTF-32 and accept a UTF-8 or missing BOM.
-rw-r--r--Source/cmListFileCache.cxx15
-rw-r--r--Source/cmListFileLexer.c57
-rw-r--r--Source/cmListFileLexer.h14
-rw-r--r--Source/cmListFileLexer.in.l57
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-BE-result.txt1
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-BE-stderr.txt6
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-BE.cmakebin0 -> 54 bytes
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-LE-result.txt1
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-LE-stderr.txt6
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-16-LE.cmakebin0 -> 54 bytes
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-BE-result.txt1
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-BE-stderr.txt6
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-BE.cmakebin0 -> 108 bytes
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-LE-result.txt1
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-LE-stderr.txt6
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-32-LE.cmakebin0 -> 108 bytes
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-8-stdout.txt1
-rw-r--r--Tests/RunCMake/Syntax/BOM-UTF-8.cmake1
-rw-r--r--Tests/RunCMake/Syntax/RunCMakeTest.cmake5
19 files changed, 168 insertions, 10 deletions
diff --git a/Source/cmListFileCache.cxx b/Source/cmListFileCache.cxx
index 898f379..f6ea4b1 100644
--- a/Source/cmListFileCache.cxx
+++ b/Source/cmListFileCache.cxx
@@ -57,13 +57,26 @@ cmListFileParser::~cmListFileParser()
bool cmListFileParser::ParseFile()
{
// Open the file.
- if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName))
+ cmListFileLexer_BOM bom;
+ if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName, &bom))
{
cmSystemTools::Error("cmListFileCache: error can not open file ",
this->FileName);
return false;
}
+ // Verify the Byte-Order-Mark, if any.
+ if(bom != cmListFileLexer_BOM_None &&
+ bom != cmListFileLexer_BOM_UTF8)
+ {
+ cmListFileLexer_SetFileName(this->Lexer, 0, 0);
+ cmOStringStream m;
+ m << "File\n " << this->FileName << "\n"
+ << "starts with a Byte-Order-Mark that is not UTF-8.";
+ this->Makefile->IssueMessage(cmake::FATAL_ERROR, m.str());
+ return false;
+ }
+
// Use a simple recursive-descent parser to process the token
// stream.
bool haveNewline = true;
diff --git a/Source/cmListFileLexer.c b/Source/cmListFileLexer.c
index ad5a83d..394bd17 100644
--- a/Source/cmListFileLexer.c
+++ b/Source/cmListFileLexer.c
@@ -2307,19 +2307,68 @@ cmListFileLexer* cmListFileLexer_New()
/*--------------------------------------------------------------------------*/
void cmListFileLexer_Delete(cmListFileLexer* lexer)
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
free(lexer);
}
/*--------------------------------------------------------------------------*/
-int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name)
+static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
+{
+ unsigned char b[2];
+ if(fread(b, 1, 2, f) == 2)
+ {
+ if(b[0] == 0xEF && b[1] == 0xBB)
+ {
+ if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF)
+ {
+ return cmListFileLexer_BOM_UTF8;
+ }
+ }
+ else if(b[0] == 0xFE && b[1] == 0xFF)
+ {
+ /* UTF-16 BE */
+ return cmListFileLexer_BOM_UTF16BE;
+ }
+ else if(b[0] == 0 && b[1] == 0)
+ {
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF)
+ {
+ return cmListFileLexer_BOM_UTF32BE;
+ }
+ }
+ else if(b[0] == 0xFF && b[1] == 0xFE)
+ {
+ fpos_t p;
+ fgetpos(f, &p);
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0)
+ {
+ return cmListFileLexer_BOM_UTF32LE;
+ }
+ fsetpos(f, &p);
+ return cmListFileLexer_BOM_UTF16LE;
+ }
+ }
+ rewind(f);
+ return cmListFileLexer_BOM_None;
+}
+
+/*--------------------------------------------------------------------------*/
+int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
+ cmListFileLexer_BOM* bom)
{
int result = 1;
cmListFileLexerDestroy(lexer);
if(name)
{
lexer->file = fopen(name, "r");
- if(!lexer->file)
+ if(lexer->file)
+ {
+ if(bom)
+ {
+ *bom = cmListFileLexer_ReadBOM(lexer->file);
+ }
+ }
+ else
{
result = 0;
}
@@ -2365,7 +2414,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
}
else
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
return 0;
}
}
diff --git a/Source/cmListFileLexer.h b/Source/cmListFileLexer.h
index cc78b5c..719347c 100644
--- a/Source/cmListFileLexer.h
+++ b/Source/cmListFileLexer.h
@@ -36,6 +36,17 @@ struct cmListFileLexer_Token_s
int column;
};
+enum cmListFileLexer_BOM_e
+{
+ cmListFileLexer_BOM_None,
+ cmListFileLexer_BOM_UTF8,
+ cmListFileLexer_BOM_UTF16BE,
+ cmListFileLexer_BOM_UTF16LE,
+ cmListFileLexer_BOM_UTF32BE,
+ cmListFileLexer_BOM_UTF32LE
+};
+typedef enum cmListFileLexer_BOM_e cmListFileLexer_BOM;
+
typedef struct cmListFileLexer_s cmListFileLexer;
#ifdef __cplusplus
@@ -44,7 +55,8 @@ extern "C"
#endif
cmListFileLexer* cmListFileLexer_New();
-int cmListFileLexer_SetFileName(cmListFileLexer*, const char*);
+int cmListFileLexer_SetFileName(cmListFileLexer*, const char*,
+ cmListFileLexer_BOM* bom);
int cmListFileLexer_SetString(cmListFileLexer*, const char*);
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer*);
long cmListFileLexer_GetCurrentLine(cmListFileLexer*);
diff --git a/Source/cmListFileLexer.in.l b/Source/cmListFileLexer.in.l
index 89f2917..a660d37 100644
--- a/Source/cmListFileLexer.in.l
+++ b/Source/cmListFileLexer.in.l
@@ -328,19 +328,68 @@ cmListFileLexer* cmListFileLexer_New()
/*--------------------------------------------------------------------------*/
void cmListFileLexer_Delete(cmListFileLexer* lexer)
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
free(lexer);
}
/*--------------------------------------------------------------------------*/
-int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name)
+static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
+{
+ unsigned char b[2];
+ if(fread(b, 1, 2, f) == 2)
+ {
+ if(b[0] == 0xEF && b[1] == 0xBB)
+ {
+ if(fread(b, 1, 1, f) == 1 && b[0] == 0xBF)
+ {
+ return cmListFileLexer_BOM_UTF8;
+ }
+ }
+ else if(b[0] == 0xFE && b[1] == 0xFF)
+ {
+ /* UTF-16 BE */
+ return cmListFileLexer_BOM_UTF16BE;
+ }
+ else if(b[0] == 0 && b[1] == 0)
+ {
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF)
+ {
+ return cmListFileLexer_BOM_UTF32BE;
+ }
+ }
+ else if(b[0] == 0xFF && b[1] == 0xFE)
+ {
+ fpos_t p;
+ fgetpos(f, &p);
+ if(fread(b, 1, 2, f) == 2 && b[0] == 0 && b[1] == 0)
+ {
+ return cmListFileLexer_BOM_UTF32LE;
+ }
+ fsetpos(f, &p);
+ return cmListFileLexer_BOM_UTF16LE;
+ }
+ }
+ rewind(f);
+ return cmListFileLexer_BOM_None;
+}
+
+/*--------------------------------------------------------------------------*/
+int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
+ cmListFileLexer_BOM* bom)
{
int result = 1;
cmListFileLexerDestroy(lexer);
if(name)
{
lexer->file = fopen(name, "r");
- if(!lexer->file)
+ if(lexer->file)
+ {
+ if(bom)
+ {
+ *bom = cmListFileLexer_ReadBOM(lexer->file);
+ }
+ }
+ else
{
result = 0;
}
@@ -386,7 +435,7 @@ cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
}
else
{
- cmListFileLexer_SetFileName(lexer, 0);
+ cmListFileLexer_SetFileName(lexer, 0, 0);
return 0;
}
}
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-BE-result.txt b/Tests/RunCMake/Syntax/BOM-UTF-16-BE-result.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-BE-result.txt
@@ -0,0 +1 @@
+1
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-BE-stderr.txt b/Tests/RunCMake/Syntax/BOM-UTF-16-BE-stderr.txt
new file mode 100644
index 0000000..b3f1e47
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-BE-stderr.txt
@@ -0,0 +1,6 @@
+CMake Error at CMakeLists.txt:3 \(include\):
+ File
+
+ .*/Tests/RunCMake/Syntax/BOM-UTF-16-BE.cmake
+
+ starts with a Byte-Order-Mark that is not UTF-8.
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-BE.cmake b/Tests/RunCMake/Syntax/BOM-UTF-16-BE.cmake
new file mode 100644
index 0000000..c51f6e6
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-BE.cmake
Binary files differ
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-LE-result.txt b/Tests/RunCMake/Syntax/BOM-UTF-16-LE-result.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-LE-result.txt
@@ -0,0 +1 @@
+1
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-LE-stderr.txt b/Tests/RunCMake/Syntax/BOM-UTF-16-LE-stderr.txt
new file mode 100644
index 0000000..c08c902
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-LE-stderr.txt
@@ -0,0 +1,6 @@
+CMake Error at CMakeLists.txt:3 \(include\):
+ File
+
+ .*/Tests/RunCMake/Syntax/BOM-UTF-16-LE.cmake
+
+ starts with a Byte-Order-Mark that is not UTF-8.
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-16-LE.cmake b/Tests/RunCMake/Syntax/BOM-UTF-16-LE.cmake
new file mode 100644
index 0000000..b57446f
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-16-LE.cmake
Binary files differ
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-BE-result.txt b/Tests/RunCMake/Syntax/BOM-UTF-32-BE-result.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-BE-result.txt
@@ -0,0 +1 @@
+1
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-BE-stderr.txt b/Tests/RunCMake/Syntax/BOM-UTF-32-BE-stderr.txt
new file mode 100644
index 0000000..5dde4e3
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-BE-stderr.txt
@@ -0,0 +1,6 @@
+CMake Error at CMakeLists.txt:3 \(include\):
+ File
+
+ .*/Tests/RunCMake/Syntax/BOM-UTF-32-BE.cmake
+
+ starts with a Byte-Order-Mark that is not UTF-8.
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-BE.cmake b/Tests/RunCMake/Syntax/BOM-UTF-32-BE.cmake
new file mode 100644
index 0000000..23c57f3
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-BE.cmake
Binary files differ
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-LE-result.txt b/Tests/RunCMake/Syntax/BOM-UTF-32-LE-result.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-LE-result.txt
@@ -0,0 +1 @@
+1
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-LE-stderr.txt b/Tests/RunCMake/Syntax/BOM-UTF-32-LE-stderr.txt
new file mode 100644
index 0000000..eb054ec
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-LE-stderr.txt
@@ -0,0 +1,6 @@
+CMake Error at CMakeLists.txt:3 \(include\):
+ File
+
+ .*/Tests/RunCMake/Syntax/BOM-UTF-32-LE.cmake
+
+ starts with a Byte-Order-Mark that is not UTF-8.
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-32-LE.cmake b/Tests/RunCMake/Syntax/BOM-UTF-32-LE.cmake
new file mode 100644
index 0000000..c330f5b
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-32-LE.cmake
Binary files differ
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-8-stdout.txt b/Tests/RunCMake/Syntax/BOM-UTF-8-stdout.txt
new file mode 100644
index 0000000..5776d6e
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-8-stdout.txt
@@ -0,0 +1 @@
+-- message
diff --git a/Tests/RunCMake/Syntax/BOM-UTF-8.cmake b/Tests/RunCMake/Syntax/BOM-UTF-8.cmake
new file mode 100644
index 0000000..bdff83b
--- /dev/null
+++ b/Tests/RunCMake/Syntax/BOM-UTF-8.cmake
@@ -0,0 +1 @@
+message(STATUS "message")
diff --git a/Tests/RunCMake/Syntax/RunCMakeTest.cmake b/Tests/RunCMake/Syntax/RunCMakeTest.cmake
index 2d87328..d1a15c8 100644
--- a/Tests/RunCMake/Syntax/RunCMakeTest.cmake
+++ b/Tests/RunCMake/Syntax/RunCMakeTest.cmake
@@ -1,5 +1,10 @@
include(RunCMake)
+run_cmake(BOM-UTF-8)
+run_cmake(BOM-UTF-16-LE)
+run_cmake(BOM-UTF-16-BE)
+run_cmake(BOM-UTF-32-LE)
+run_cmake(BOM-UTF-32-BE)
run_cmake(CommandSpaces)
run_cmake(CommandTabs)
run_cmake(CommandNewlines)