summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustin Borodinsky <justin.borodinsky@gmail.com>2015-01-11 19:33:36 (GMT)
committerBrad King <brad.king@kitware.com>2015-01-27 16:30:26 (GMT)
commit1f77a7001b2e3f8f9224cb603e5acfee45573064 (patch)
tree6e641ed7b6967267135c544adea91815523dd753
parent19e57a48cd1ad562b277c8fb9dc8285ef96acfa0 (diff)
downloadCMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.zip
CMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.tar.gz
CMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.tar.bz2
file: Teach STRINGS to support UTF-16 and UTF-32 encodings
-rw-r--r--Help/command/file.rst5
-rw-r--r--Help/release/dev/file-strings-utf-16.rst5
-rw-r--r--Source/cmFileCommand.cxx64
-rw-r--r--Tests/RunCMake/string/RunCMakeTest.cmake5
-rw-r--r--Tests/RunCMake/string/UTF-16BE-stderr.txt2
-rw-r--r--Tests/RunCMake/string/UTF-16BE.cmake4
-rw-r--r--Tests/RunCMake/string/UTF-16BE.txtbin0 -> 83 bytes
-rw-r--r--Tests/RunCMake/string/UTF-16LE-stderr.txt2
-rw-r--r--Tests/RunCMake/string/UTF-16LE.cmake4
-rw-r--r--Tests/RunCMake/string/UTF-16LE.txtbin0 -> 83 bytes
-rw-r--r--Tests/RunCMake/string/UTF-32BE-stderr.txt2
-rw-r--r--Tests/RunCMake/string/UTF-32BE.cmake4
-rw-r--r--Tests/RunCMake/string/UTF-32BE.txtbin0 -> 165 bytes
-rw-r--r--Tests/RunCMake/string/UTF-32LE-stderr.txt2
-rw-r--r--Tests/RunCMake/string/UTF-32LE.cmake4
-rw-r--r--Tests/RunCMake/string/UTF-32LE.txtbin0 -> 165 bytes
16 files changed, 99 insertions, 4 deletions
diff --git a/Help/command/file.rst b/Help/command/file.rst
index b0d4792..73d4cfa 100644
--- a/Help/command/file.rst
+++ b/Help/command/file.rst
@@ -65,7 +65,10 @@ Parse a list of ASCII strings from ``<filename>`` and store it in
Consider only strings that match the given regular expression.
``ENCODING <encoding-type>``
- Consider strings of a given encoding. "UTF-8" is currently supported.
+ Consider strings of a given encoding. Currently supported encodings are:
+ UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE. If the ENCODING option
+ is not provided and the file has a Byte Order Mark, the ENCODING option
+ will be defaulted to respect the Byte Order Mark.
For example, the code
diff --git a/Help/release/dev/file-strings-utf-16.rst b/Help/release/dev/file-strings-utf-16.rst
new file mode 100644
index 0000000..f40b63e
--- /dev/null
+++ b/Help/release/dev/file-strings-utf-16.rst
@@ -0,0 +1,5 @@
+file-strings-utf-16
+-------------------
+
+* The :command:`file(STRINGS)` now supports UTF-16LE, UTF-16BE,
+ UTF-32LE, UTF-32BE as ``ENCODING`` options.
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx
index f125292..579e715 100644
--- a/Source/cmFileCommand.cxx
+++ b/Source/cmFileCommand.cxx
@@ -472,7 +472,13 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
bool have_regex = false;
bool newline_consume = false;
bool hex_conversion_enabled = true;
- bool utf8_encoding = false;
+ enum { encoding_none = cmsys::FStream::BOM_None,
+ encoding_utf8 = cmsys::FStream::BOM_UTF8,
+ encoding_utf16le = cmsys::FStream::BOM_UTF16LE,
+ encoding_utf16be = cmsys::FStream::BOM_UTF16BE,
+ encoding_utf32le = cmsys::FStream::BOM_UTF32LE,
+ encoding_utf32be = cmsys::FStream::BOM_UTF32BE};
+ int encoding = encoding_none;
int arg_mode = arg_none;
for(unsigned int i=3; i < args.size(); ++i)
{
@@ -599,7 +605,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
{
if(args[i] == "UTF-8")
{
- utf8_encoding = true;
+ encoding = encoding_utf8;
+ }
+ else if(args[i] == "UTF-16LE")
+ {
+ encoding = encoding_utf16le;
+ }
+ else if(args[i] == "UTF-16BE")
+ {
+ encoding = encoding_utf16be;
+ }
+ else if(args[i] == "UTF-32LE")
+ {
+ encoding = encoding_utf32le;
+ }
+ else if(args[i] == "UTF-32BE")
+ {
+ encoding = encoding_utf32be;
}
else
{
@@ -647,6 +669,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
return false;
}
+ //If BOM is found and encoding was not specified, use the BOM
+ int bom_found = cmsys::FStream::ReadBOM(fin);
+ if(encoding == encoding_none && bom_found != cmsys::FStream::BOM_None)
+ {
+ encoding = bom_found;
+ }
+
+ unsigned int bytes_rem = 0;
+ if(encoding == encoding_utf16le || encoding == encoding_utf16be)
+ {
+ bytes_rem = 1;
+ }
+ if(encoding == encoding_utf32le || encoding == encoding_utf32be)
+ {
+ bytes_rem = 3;
+ }
+
// Parse strings out of the file.
int output_size = 0;
std::vector<std::string> strings;
@@ -658,6 +697,25 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
std::string current_str;
int c = fin.get();
+ for(unsigned int i=0; i<bytes_rem; ++i)
+ {
+ int c1 = fin.get();
+ if(!fin)
+ {
+ fin.putback(static_cast<char>(c1));
+ break;
+ }
+ c = (c << 8) | c1;
+ }
+ if(encoding == encoding_utf16le)
+ {
+ c = ((c & 0xFF) << 8) | ((c & 0xFF00) >> 8);
+ }
+ else if(encoding == encoding_utf32le)
+ {
+ c = (((c & 0xFF) << 24) | ((c & 0xFF00) << 8) |
+ ((c & 0xFF0000) >> 8) | ((c & 0xFF000000) >> 24));
+ }
if(c == '\r')
{
@@ -673,7 +731,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
// c is guaranteed to fit in char by the above if...
current_str += static_cast<char>(c);
}
- else if(utf8_encoding)
+ else if(encoding == encoding_utf8)
{
// Check for UTF-8 encoded string (up to 4 octets)
static const unsigned char utf8_check_table[3][2] =
diff --git a/Tests/RunCMake/string/RunCMakeTest.cmake b/Tests/RunCMake/string/RunCMakeTest.cmake
index fc913c6..89f7ea5 100644
--- a/Tests/RunCMake/string/RunCMakeTest.cmake
+++ b/Tests/RunCMake/string/RunCMakeTest.cmake
@@ -12,3 +12,8 @@ run_cmake(UuidMissingTypeValue)
run_cmake(UuidBadType)
run_cmake(RegexClear)
+
+run_cmake(UTF-16BE)
+run_cmake(UTF-16LE)
+run_cmake(UTF-32BE)
+run_cmake(UTF-32LE)
diff --git a/Tests/RunCMake/string/UTF-16BE-stderr.txt b/Tests/RunCMake/string/UTF-16BE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16BE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-16BE.cmake b/Tests/RunCMake/string/UTF-16BE.cmake
new file mode 100644
index 0000000..da986c0
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16BE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-16BE.txt str ENCODING UTF-16BE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-16BE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-16BE.txt b/Tests/RunCMake/string/UTF-16BE.txt
new file mode 100644
index 0000000..9d976bc
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16BE.txt
Binary files differ
diff --git a/Tests/RunCMake/string/UTF-16LE-stderr.txt b/Tests/RunCMake/string/UTF-16LE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16LE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-16LE.cmake b/Tests/RunCMake/string/UTF-16LE.cmake
new file mode 100644
index 0000000..326d848
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16LE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-16LE.txt str ENCODING UTF-16LE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-16LE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-16LE.txt b/Tests/RunCMake/string/UTF-16LE.txt
new file mode 100644
index 0000000..ebba874
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16LE.txt
Binary files differ
diff --git a/Tests/RunCMake/string/UTF-32BE-stderr.txt b/Tests/RunCMake/string/UTF-32BE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32BE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-32BE.cmake b/Tests/RunCMake/string/UTF-32BE.cmake
new file mode 100644
index 0000000..debdeaa
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32BE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-32BE.txt str ENCODING UTF-32BE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-32BE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-32BE.txt b/Tests/RunCMake/string/UTF-32BE.txt
new file mode 100644
index 0000000..6725fbb
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32BE.txt
Binary files differ
diff --git a/Tests/RunCMake/string/UTF-32LE-stderr.txt b/Tests/RunCMake/string/UTF-32LE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32LE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-32LE.cmake b/Tests/RunCMake/string/UTF-32LE.cmake
new file mode 100644
index 0000000..22aab5f
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32LE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-32LE.txt str ENCODING UTF-32LE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-32LE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-32LE.txt b/Tests/RunCMake/string/UTF-32LE.txt
new file mode 100644
index 0000000..cf5102f
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32LE.txt
Binary files differ