diff options
author | Dāvis Mosāns <davispuh@gmail.com> | 2016-11-01 18:04:20 (GMT) |
---|---|---|
committer | Dāvis Mosāns <davispuh@gmail.com> | 2016-11-14 19:21:20 (GMT) |
commit | 96103972ea1c478a2845fb68aee70a3395c148e0 (patch) | |
tree | f327e98f892d8e5cb8a45b1fbb250399f34149e3 /Source | |
parent | 92c865b8f5f0aef1af3ecd33692ce490467c0e70 (diff) | |
download | CMake-96103972ea1c478a2845fb68aee70a3395c148e0.zip CMake-96103972ea1c478a2845fb68aee70a3395c148e0.tar.gz CMake-96103972ea1c478a2845fb68aee70a3395c148e0.tar.bz2 |
Add cmProcessOutput class to be used for decoding text data
This allows to decode text data we receive from external process which
uses external encoding to our internal encoding.
Diffstat (limited to 'Source')
-rw-r--r-- | Source/CMakeLists.txt | 5 | ||||
-rw-r--r-- | Source/cmProcessOutput.cxx | 155 | ||||
-rw-r--r-- | Source/cmProcessOutput.h | 80 |
3 files changed, 240 insertions, 0 deletions
diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index 718b022..fcda6f9 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -330,6 +330,8 @@ set(SRCS cmOrderDirectories.h cmPolicies.h cmPolicies.cxx + cmProcessOutput.cxx + cmProcessOutput.h cmProcessTools.cxx cmProcessTools.h cmProperty.cxx @@ -632,6 +634,9 @@ set(SRCS cm_codecvt.cxx ) +SET_PROPERTY(SOURCE cmProcessOutput.cxx APPEND PROPERTY COMPILE_DEFINITIONS + KWSYS_ENCODING_DEFAULT_CODEPAGE=${KWSYS_ENCODING_DEFAULT_CODEPAGE}) + # Kdevelop only works on UNIX and not windows if(UNIX) set(SRCS ${SRCS} cmGlobalKdevelopGenerator.cxx) diff --git a/Source/cmProcessOutput.cxx b/Source/cmProcessOutput.cxx new file mode 100644 index 0000000..1440223 --- /dev/null +++ b/Source/cmProcessOutput.cxx @@ -0,0 +1,155 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ + +#include "cmProcessOutput.h" + +#if defined(_WIN32) +#include <windows.h> +unsigned int cmProcessOutput::defaultCodepage = + KWSYS_ENCODING_DEFAULT_CODEPAGE; +#endif + +cmProcessOutput::cmProcessOutput(Encoding encoding, unsigned int maxSize) +{ +#if defined(_WIN32) + codepage = 0; + bufferSize = maxSize; + if (encoding == None) { + codepage = defaultCodepage; + } else if (encoding == Auto) { + codepage = GetConsoleCP(); + } else if (encoding == UTF8) { + codepage = CP_UTF8; + } else if (encoding == OEM) { + codepage = GetOEMCP(); + } + if (!codepage || encoding == ANSI) { + codepage = GetACP(); + } +#else + static_cast<void>(encoding); + static_cast<void>(maxSize); +#endif +} + +cmProcessOutput::~cmProcessOutput() +{ +} + +bool cmProcessOutput::DecodeText(std::string raw, std::string& decoded, + size_t id) +{ + bool success = true; + decoded = raw; +#if defined(_WIN32) + if (id > 0) { + if (rawparts.size() < id) { + rawparts.reserve(id); + while (rawparts.size() < id) + rawparts.push_back(std::string()); + } + raw = rawparts[id - 1] + raw; + rawparts[id - 1].clear(); + decoded = raw; + } + if (raw.size() > 0 && codepage != defaultCodepage) { + success = false; + CPINFOEXW cpinfo; + if (id > 0 && bufferSize > 0 && raw.size() == bufferSize && + GetCPInfoExW(codepage, 0, &cpinfo) == 1 && cpinfo.MaxCharSize > 1) { + if (cpinfo.MaxCharSize == 2 && cpinfo.LeadByte[0] != 0) { + LPSTR prevChar = + CharPrevExA(codepage, raw.c_str(), raw.c_str() + raw.size(), 0); + bool isLeadByte = + (*(prevChar + 1) == 0) && IsDBCSLeadByteEx(codepage, *prevChar); + if (isLeadByte) { + rawparts[id - 1] += *(raw.end() - 1); + raw.resize(raw.size() - 1); + } + success = DoDecodeText(raw, decoded, NULL); + } else { + bool restoreDecoded = false; + std::string firstDecoded = decoded; + wchar_t lastChar = 0; + for (UINT i = 0; i < cpinfo.MaxCharSize; i++) { + success = DoDecodeText(raw, decoded, &lastChar); + if (success && lastChar != 0) { + if (i == 0) { + firstDecoded = decoded; + } + if (lastChar == cpinfo.UnicodeDefaultChar) { + restoreDecoded = true; + rawparts[id - 1] = *(raw.end() - 1) + rawparts[id - 1]; + raw.resize(raw.size() - 1); + } else { + restoreDecoded = false; + break; + } + } else { + break; + } + } + if (restoreDecoded) { + decoded = firstDecoded; + rawparts[id - 1].clear(); + } + } + } else { + success = DoDecodeText(raw, decoded, NULL); + } + } +#else + static_cast<void>(id); +#endif + return success; +} + +bool cmProcessOutput::DecodeText(const char* data, size_t length, + std::string& decoded, size_t id) +{ + return DecodeText(std::string(data, length), decoded, id); +} + +bool cmProcessOutput::DecodeText(std::vector<char> raw, + std::vector<char>& decoded, size_t id) +{ + std::string str; + const bool success = + DecodeText(std::string(raw.begin(), raw.end()), str, id); + decoded.assign(str.begin(), str.end()); + return success; +} + +#if defined(_WIN32) +bool cmProcessOutput::DoDecodeText(std::string raw, std::string& decoded, + wchar_t* lastChar) +{ + bool success = false; + const int wlength = + MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), NULL, 0); + wchar_t* wdata = new wchar_t[wlength]; + int r = MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), wdata, + wlength); + if (r > 0) { + if (lastChar) { + *lastChar = 0; + if ((wlength >= 2 && wdata[wlength - 2] != wdata[wlength - 1]) || + wlength >= 1) { + *lastChar = wdata[wlength - 1]; + } + } + int length = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, NULL, + 0, NULL, NULL); + char* data = new char[length]; + r = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, data, length, + NULL, NULL); + if (r > 0) { + decoded = std::string(data, length); + success = true; + } + delete[] data; + } + delete[] wdata; + return success; +} +#endif diff --git a/Source/cmProcessOutput.h b/Source/cmProcessOutput.h new file mode 100644 index 0000000..d7a5e98 --- /dev/null +++ b/Source/cmProcessOutput.h @@ -0,0 +1,80 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ +#ifndef cmProcessOutput_h +#define cmProcessOutput_h + +#include <cmConfigure.h> // IWYU pragma: keep + +#include <string> +#include <vector> + +/** \class cmProcessOutput + * \brief Decode text data to internal encoding. + * + * cmProcessOutput is used to decode text output from external process + * using external encoding to our internal encoding. + */ +class cmProcessOutput +{ +public: + enum Encoding + { + None, + Auto, + UTF8, + ANSI, + OEM + }; + + /// The code page that is used as internal encoding to which we will encode. + static unsigned int defaultCodepage; + + /** + * A class constructor. + * \param encoding external process encoding from which we will decode. + * \param maxSize a maximal size for process output buffer. It should match + * to KWSYSPE_PIPE_BUFFER_SIZE. If text we decode is same size as \a maxSize + * then we will check for incomplete character at end of buffer and + * we will not return last incomplete character. This character will be + * returned with next DecodeText() call. To disable this behavior specify + * 0 as \a maxSize. + */ + cmProcessOutput(Encoding encoding = Auto, unsigned int maxSize = 1024); + ~cmProcessOutput(); + /** + * Decode \a raw string using external encoding to internal + * encoding in \a decoded. + * \a id specifies which internal buffer to use. This is important when we + * are decoding both stdout and stderr from process output and we need to + * keep incomplete characters in separate buffers for each stream. + * \return true if successfully decoded \a raw to \a decoded or false if not. + */ + bool DecodeText(std::string raw, std::string& decoded, size_t id = 0); + /** + * Decode \a data with \a length from external encoding to internal + * encoding in \a decoded. + * \param data a pointer to process output text data. + * \param length a size of data buffer. + * \param decoded a string which will contain decoded text. + * \param id an internal buffer id to use. + * \return true if successfully decoded \a data to \a decoded or false if + * not. + */ + bool DecodeText(const char* data, size_t length, std::string& decoded, + size_t id = 0); + /** + * \overload + */ + bool DecodeText(std::vector<char> raw, std::vector<char>& decoded, + size_t id = 0); + +private: +#if defined(_WIN32) + unsigned int codepage; + unsigned int bufferSize; + std::vector<std::string> rawparts; + bool DoDecodeText(std::string raw, std::string& decoded, wchar_t* lastChar); +#endif +}; + +#endif |