From 96103972ea1c478a2845fb68aee70a3395c148e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= Date: Tue, 1 Nov 2016 20:04:20 +0200 Subject: Add cmProcessOutput class to be used for decoding text data This allows to decode text data we receive from external process which uses external encoding to our internal encoding. --- Source/CMakeLists.txt | 5 ++ Source/cmProcessOutput.cxx | 155 +++++++++++++++++++++++++++++++++++++++++++++ Source/cmProcessOutput.h | 80 +++++++++++++++++++++++ bootstrap | 5 +- 4 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 Source/cmProcessOutput.cxx create mode 100644 Source/cmProcessOutput.h diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index 718b022..fcda6f9 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -330,6 +330,8 @@ set(SRCS cmOrderDirectories.h cmPolicies.h cmPolicies.cxx + cmProcessOutput.cxx + cmProcessOutput.h cmProcessTools.cxx cmProcessTools.h cmProperty.cxx @@ -632,6 +634,9 @@ set(SRCS cm_codecvt.cxx ) +SET_PROPERTY(SOURCE cmProcessOutput.cxx APPEND PROPERTY COMPILE_DEFINITIONS + KWSYS_ENCODING_DEFAULT_CODEPAGE=${KWSYS_ENCODING_DEFAULT_CODEPAGE}) + # Kdevelop only works on UNIX and not windows if(UNIX) set(SRCS ${SRCS} cmGlobalKdevelopGenerator.cxx) diff --git a/Source/cmProcessOutput.cxx b/Source/cmProcessOutput.cxx new file mode 100644 index 0000000..1440223 --- /dev/null +++ b/Source/cmProcessOutput.cxx @@ -0,0 +1,155 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ + +#include "cmProcessOutput.h" + +#if defined(_WIN32) +#include +unsigned int cmProcessOutput::defaultCodepage = + KWSYS_ENCODING_DEFAULT_CODEPAGE; +#endif + +cmProcessOutput::cmProcessOutput(Encoding encoding, unsigned int maxSize) +{ +#if defined(_WIN32) + codepage = 0; + bufferSize = maxSize; + if (encoding == None) { + codepage = defaultCodepage; + } else if (encoding == Auto) { + codepage = GetConsoleCP(); + } else if (encoding == UTF8) { + codepage = CP_UTF8; + } else if (encoding == OEM) { + codepage = GetOEMCP(); + } + if (!codepage || encoding == ANSI) { + codepage = GetACP(); + } +#else + static_cast(encoding); + static_cast(maxSize); +#endif +} + +cmProcessOutput::~cmProcessOutput() +{ +} + +bool cmProcessOutput::DecodeText(std::string raw, std::string& decoded, + size_t id) +{ + bool success = true; + decoded = raw; +#if defined(_WIN32) + if (id > 0) { + if (rawparts.size() < id) { + rawparts.reserve(id); + while (rawparts.size() < id) + rawparts.push_back(std::string()); + } + raw = rawparts[id - 1] + raw; + rawparts[id - 1].clear(); + decoded = raw; + } + if (raw.size() > 0 && codepage != defaultCodepage) { + success = false; + CPINFOEXW cpinfo; + if (id > 0 && bufferSize > 0 && raw.size() == bufferSize && + GetCPInfoExW(codepage, 0, &cpinfo) == 1 && cpinfo.MaxCharSize > 1) { + if (cpinfo.MaxCharSize == 2 && cpinfo.LeadByte[0] != 0) { + LPSTR prevChar = + CharPrevExA(codepage, raw.c_str(), raw.c_str() + raw.size(), 0); + bool isLeadByte = + (*(prevChar + 1) == 0) && IsDBCSLeadByteEx(codepage, *prevChar); + if (isLeadByte) { + rawparts[id - 1] += *(raw.end() - 1); + raw.resize(raw.size() - 1); + } + success = DoDecodeText(raw, decoded, NULL); + } else { + bool restoreDecoded = false; + std::string firstDecoded = decoded; + wchar_t lastChar = 0; + for (UINT i = 0; i < cpinfo.MaxCharSize; i++) { + success = DoDecodeText(raw, decoded, &lastChar); + if (success && lastChar != 0) { + if (i == 0) { + firstDecoded = decoded; + } + if (lastChar == cpinfo.UnicodeDefaultChar) { + restoreDecoded = true; + rawparts[id - 1] = *(raw.end() - 1) + rawparts[id - 1]; + raw.resize(raw.size() - 1); + } else { + restoreDecoded = false; + break; + } + } else { + break; + } + } + if (restoreDecoded) { + decoded = firstDecoded; + rawparts[id - 1].clear(); + } + } + } else { + success = DoDecodeText(raw, decoded, NULL); + } + } +#else + static_cast(id); +#endif + return success; +} + +bool cmProcessOutput::DecodeText(const char* data, size_t length, + std::string& decoded, size_t id) +{ + return DecodeText(std::string(data, length), decoded, id); +} + +bool cmProcessOutput::DecodeText(std::vector raw, + std::vector& decoded, size_t id) +{ + std::string str; + const bool success = + DecodeText(std::string(raw.begin(), raw.end()), str, id); + decoded.assign(str.begin(), str.end()); + return success; +} + +#if defined(_WIN32) +bool cmProcessOutput::DoDecodeText(std::string raw, std::string& decoded, + wchar_t* lastChar) +{ + bool success = false; + const int wlength = + MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), NULL, 0); + wchar_t* wdata = new wchar_t[wlength]; + int r = MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), wdata, + wlength); + if (r > 0) { + if (lastChar) { + *lastChar = 0; + if ((wlength >= 2 && wdata[wlength - 2] != wdata[wlength - 1]) || + wlength >= 1) { + *lastChar = wdata[wlength - 1]; + } + } + int length = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, NULL, + 0, NULL, NULL); + char* data = new char[length]; + r = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, data, length, + NULL, NULL); + if (r > 0) { + decoded = std::string(data, length); + success = true; + } + delete[] data; + } + delete[] wdata; + return success; +} +#endif diff --git a/Source/cmProcessOutput.h b/Source/cmProcessOutput.h new file mode 100644 index 0000000..d7a5e98 --- /dev/null +++ b/Source/cmProcessOutput.h @@ -0,0 +1,80 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ +#ifndef cmProcessOutput_h +#define cmProcessOutput_h + +#include // IWYU pragma: keep + +#include +#include + +/** \class cmProcessOutput + * \brief Decode text data to internal encoding. + * + * cmProcessOutput is used to decode text output from external process + * using external encoding to our internal encoding. + */ +class cmProcessOutput +{ +public: + enum Encoding + { + None, + Auto, + UTF8, + ANSI, + OEM + }; + + /// The code page that is used as internal encoding to which we will encode. + static unsigned int defaultCodepage; + + /** + * A class constructor. + * \param encoding external process encoding from which we will decode. + * \param maxSize a maximal size for process output buffer. It should match + * to KWSYSPE_PIPE_BUFFER_SIZE. If text we decode is same size as \a maxSize + * then we will check for incomplete character at end of buffer and + * we will not return last incomplete character. This character will be + * returned with next DecodeText() call. To disable this behavior specify + * 0 as \a maxSize. + */ + cmProcessOutput(Encoding encoding = Auto, unsigned int maxSize = 1024); + ~cmProcessOutput(); + /** + * Decode \a raw string using external encoding to internal + * encoding in \a decoded. + * \a id specifies which internal buffer to use. This is important when we + * are decoding both stdout and stderr from process output and we need to + * keep incomplete characters in separate buffers for each stream. + * \return true if successfully decoded \a raw to \a decoded or false if not. + */ + bool DecodeText(std::string raw, std::string& decoded, size_t id = 0); + /** + * Decode \a data with \a length from external encoding to internal + * encoding in \a decoded. + * \param data a pointer to process output text data. + * \param length a size of data buffer. + * \param decoded a string which will contain decoded text. + * \param id an internal buffer id to use. + * \return true if successfully decoded \a data to \a decoded or false if + * not. + */ + bool DecodeText(const char* data, size_t length, std::string& decoded, + size_t id = 0); + /** + * \overload + */ + bool DecodeText(std::vector raw, std::vector& decoded, + size_t id = 0); + +private: +#if defined(_WIN32) + unsigned int codepage; + unsigned int bufferSize; + std::vector rawparts; + bool DoDecodeText(std::string raw, std::string& decoded, wchar_t* lastChar); +#endif +}; + +#endif diff --git a/bootstrap b/bootstrap index d54380a..ec081fa 100755 --- a/bootstrap +++ b/bootstrap @@ -372,6 +372,7 @@ CMAKE_CXX_SOURCES="\ cmParseArgumentsCommand \ cmPathLabel \ cmPolicies \ + cmProcessOutput \ cmProjectCommand \ cmProperty \ cmPropertyDefinition \ @@ -1423,6 +1424,7 @@ fi cmake_c_flags_String="-DKWSYS_STRING_C" if ${cmake_system_mingw}; then cmake_c_flags_EncodingC="-DKWSYS_ENCODING_DEFAULT_CODEPAGE=CP_ACP" + cmake_cxx_flags_cmProcessOutput="${cmake_c_flags_EncodingC}" fi cmake_cxx_flags_SystemTools=" -DKWSYS_CXX_HAS_SETENV=${KWSYS_CXX_HAS_SETENV} @@ -1439,8 +1441,9 @@ echo "cmake: ${objs}" > "${cmake_bootstrap_dir}/Makefile" echo " ${cmake_cxx_compiler} ${cmake_ld_flags} ${cmake_cxx_flags} ${objs} -o cmake" >> "${cmake_bootstrap_dir}/Makefile" for a in ${CMAKE_CXX_SOURCES}; do src=`cmake_escape "${cmake_source_dir}/Source/${a}.cxx"` + src_flags=`eval echo \\${cmake_cxx_flags_\${a}}` echo "${a}.o : ${src} ${dep}" >> "${cmake_bootstrap_dir}/Makefile" - echo " ${cmake_cxx_compiler} ${cmake_cxx_flags} -c ${src} -o ${a}.o" >> "${cmake_bootstrap_dir}/Makefile" + echo " ${cmake_cxx_compiler} ${cmake_cxx_flags} ${src_flags} -c ${src} -o ${a}.o" >> "${cmake_bootstrap_dir}/Makefile" done echo "cmBootstrapCommands1.o : $cmBootstrapCommands1Deps" >> "${cmake_bootstrap_dir}/Makefile" echo "cmBootstrapCommands2.o : $cmBootstrapCommands2Deps" >> "${cmake_bootstrap_dir}/Makefile" -- cgit v0.12