diff options
author | Justin Borodinsky <justin.borodinsky@gmail.com> | 2015-01-11 19:33:36 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2015-01-27 16:30:26 (GMT) |
commit | 1f77a7001b2e3f8f9224cb603e5acfee45573064 (patch) | |
tree | 6e641ed7b6967267135c544adea91815523dd753 /Source/cmFileCommand.cxx | |
parent | 19e57a48cd1ad562b277c8fb9dc8285ef96acfa0 (diff) | |
download | CMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.zip CMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.tar.gz CMake-1f77a7001b2e3f8f9224cb603e5acfee45573064.tar.bz2 |
file: Teach STRINGS to support UTF-16 and UTF-32 encodings
Diffstat (limited to 'Source/cmFileCommand.cxx')
-rw-r--r-- | Source/cmFileCommand.cxx | 64 |
1 files changed, 61 insertions, 3 deletions
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx index f125292..579e715 100644 --- a/Source/cmFileCommand.cxx +++ b/Source/cmFileCommand.cxx @@ -472,7 +472,13 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) bool have_regex = false; bool newline_consume = false; bool hex_conversion_enabled = true; - bool utf8_encoding = false; + enum { encoding_none = cmsys::FStream::BOM_None, + encoding_utf8 = cmsys::FStream::BOM_UTF8, + encoding_utf16le = cmsys::FStream::BOM_UTF16LE, + encoding_utf16be = cmsys::FStream::BOM_UTF16BE, + encoding_utf32le = cmsys::FStream::BOM_UTF32LE, + encoding_utf32be = cmsys::FStream::BOM_UTF32BE}; + int encoding = encoding_none; int arg_mode = arg_none; for(unsigned int i=3; i < args.size(); ++i) { @@ -599,7 +605,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) { if(args[i] == "UTF-8") { - utf8_encoding = true; + encoding = encoding_utf8; + } + else if(args[i] == "UTF-16LE") + { + encoding = encoding_utf16le; + } + else if(args[i] == "UTF-16BE") + { + encoding = encoding_utf16be; + } + else if(args[i] == "UTF-32LE") + { + encoding = encoding_utf32le; + } + else if(args[i] == "UTF-32BE") + { + encoding = encoding_utf32be; } else { @@ -647,6 +669,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) return false; } + //If BOM is found and encoding was not specified, use the BOM + int bom_found = cmsys::FStream::ReadBOM(fin); + if(encoding == encoding_none && bom_found != cmsys::FStream::BOM_None) + { + encoding = bom_found; + } + + unsigned int bytes_rem = 0; + if(encoding == encoding_utf16le || encoding == encoding_utf16be) + { + bytes_rem = 1; + } + if(encoding == encoding_utf32le || encoding == encoding_utf32be) + { + bytes_rem = 3; + } + // Parse strings out of the file. int output_size = 0; std::vector<std::string> strings; @@ -658,6 +697,25 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) std::string current_str; int c = fin.get(); + for(unsigned int i=0; i<bytes_rem; ++i) + { + int c1 = fin.get(); + if(!fin) + { + fin.putback(static_cast<char>(c1)); + break; + } + c = (c << 8) | c1; + } + if(encoding == encoding_utf16le) + { + c = ((c & 0xFF) << 8) | ((c & 0xFF00) >> 8); + } + else if(encoding == encoding_utf32le) + { + c = (((c & 0xFF) << 24) | ((c & 0xFF00) << 8) | + ((c & 0xFF0000) >> 8) | ((c & 0xFF000000) >> 24)); + } if(c == '\r') { @@ -673,7 +731,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) // c is guaranteed to fit in char by the above if... current_str += static_cast<char>(c); } - else if(utf8_encoding) + else if(encoding == encoding_utf8) { // Check for UTF-8 encoded string (up to 4 octets) static const unsigned char utf8_check_table[3][2] = |