summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDāvis Mosāns <davispuh@gmail.com>2016-11-01 18:04:20 (GMT)
committerDāvis Mosāns <davispuh@gmail.com>2016-11-14 19:21:20 (GMT)
commit96103972ea1c478a2845fb68aee70a3395c148e0 (patch)
treef327e98f892d8e5cb8a45b1fbb250399f34149e3
parent92c865b8f5f0aef1af3ecd33692ce490467c0e70 (diff)
downloadCMake-96103972ea1c478a2845fb68aee70a3395c148e0.zip
CMake-96103972ea1c478a2845fb68aee70a3395c148e0.tar.gz
CMake-96103972ea1c478a2845fb68aee70a3395c148e0.tar.bz2
Add cmProcessOutput class to be used for decoding text data
This allows to decode text data we receive from external process which uses external encoding to our internal encoding.
-rw-r--r--Source/CMakeLists.txt5
-rw-r--r--Source/cmProcessOutput.cxx155
-rw-r--r--Source/cmProcessOutput.h80
-rwxr-xr-xbootstrap5
4 files changed, 244 insertions, 1 deletions
diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt
index 718b022..fcda6f9 100644
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@@ -330,6 +330,8 @@ set(SRCS
cmOrderDirectories.h
cmPolicies.h
cmPolicies.cxx
+ cmProcessOutput.cxx
+ cmProcessOutput.h
cmProcessTools.cxx
cmProcessTools.h
cmProperty.cxx
@@ -632,6 +634,9 @@ set(SRCS
cm_codecvt.cxx
)
+SET_PROPERTY(SOURCE cmProcessOutput.cxx APPEND PROPERTY COMPILE_DEFINITIONS
+ KWSYS_ENCODING_DEFAULT_CODEPAGE=${KWSYS_ENCODING_DEFAULT_CODEPAGE})
+
# Kdevelop only works on UNIX and not windows
if(UNIX)
set(SRCS ${SRCS} cmGlobalKdevelopGenerator.cxx)
diff --git a/Source/cmProcessOutput.cxx b/Source/cmProcessOutput.cxx
new file mode 100644
index 0000000..1440223
--- /dev/null
+++ b/Source/cmProcessOutput.cxx
@@ -0,0 +1,155 @@
+/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+ file Copyright.txt or https://cmake.org/licensing for details. */
+
+#include "cmProcessOutput.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+unsigned int cmProcessOutput::defaultCodepage =
+ KWSYS_ENCODING_DEFAULT_CODEPAGE;
+#endif
+
+cmProcessOutput::cmProcessOutput(Encoding encoding, unsigned int maxSize)
+{
+#if defined(_WIN32)
+ codepage = 0;
+ bufferSize = maxSize;
+ if (encoding == None) {
+ codepage = defaultCodepage;
+ } else if (encoding == Auto) {
+ codepage = GetConsoleCP();
+ } else if (encoding == UTF8) {
+ codepage = CP_UTF8;
+ } else if (encoding == OEM) {
+ codepage = GetOEMCP();
+ }
+ if (!codepage || encoding == ANSI) {
+ codepage = GetACP();
+ }
+#else
+ static_cast<void>(encoding);
+ static_cast<void>(maxSize);
+#endif
+}
+
+cmProcessOutput::~cmProcessOutput()
+{
+}
+
+bool cmProcessOutput::DecodeText(std::string raw, std::string& decoded,
+ size_t id)
+{
+ bool success = true;
+ decoded = raw;
+#if defined(_WIN32)
+ if (id > 0) {
+ if (rawparts.size() < id) {
+ rawparts.reserve(id);
+ while (rawparts.size() < id)
+ rawparts.push_back(std::string());
+ }
+ raw = rawparts[id - 1] + raw;
+ rawparts[id - 1].clear();
+ decoded = raw;
+ }
+ if (raw.size() > 0 && codepage != defaultCodepage) {
+ success = false;
+ CPINFOEXW cpinfo;
+ if (id > 0 && bufferSize > 0 && raw.size() == bufferSize &&
+ GetCPInfoExW(codepage, 0, &cpinfo) == 1 && cpinfo.MaxCharSize > 1) {
+ if (cpinfo.MaxCharSize == 2 && cpinfo.LeadByte[0] != 0) {
+ LPSTR prevChar =
+ CharPrevExA(codepage, raw.c_str(), raw.c_str() + raw.size(), 0);
+ bool isLeadByte =
+ (*(prevChar + 1) == 0) && IsDBCSLeadByteEx(codepage, *prevChar);
+ if (isLeadByte) {
+ rawparts[id - 1] += *(raw.end() - 1);
+ raw.resize(raw.size() - 1);
+ }
+ success = DoDecodeText(raw, decoded, NULL);
+ } else {
+ bool restoreDecoded = false;
+ std::string firstDecoded = decoded;
+ wchar_t lastChar = 0;
+ for (UINT i = 0; i < cpinfo.MaxCharSize; i++) {
+ success = DoDecodeText(raw, decoded, &lastChar);
+ if (success && lastChar != 0) {
+ if (i == 0) {
+ firstDecoded = decoded;
+ }
+ if (lastChar == cpinfo.UnicodeDefaultChar) {
+ restoreDecoded = true;
+ rawparts[id - 1] = *(raw.end() - 1) + rawparts[id - 1];
+ raw.resize(raw.size() - 1);
+ } else {
+ restoreDecoded = false;
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ if (restoreDecoded) {
+ decoded = firstDecoded;
+ rawparts[id - 1].clear();
+ }
+ }
+ } else {
+ success = DoDecodeText(raw, decoded, NULL);
+ }
+ }
+#else
+ static_cast<void>(id);
+#endif
+ return success;
+}
+
+bool cmProcessOutput::DecodeText(const char* data, size_t length,
+ std::string& decoded, size_t id)
+{
+ return DecodeText(std::string(data, length), decoded, id);
+}
+
+bool cmProcessOutput::DecodeText(std::vector<char> raw,
+ std::vector<char>& decoded, size_t id)
+{
+ std::string str;
+ const bool success =
+ DecodeText(std::string(raw.begin(), raw.end()), str, id);
+ decoded.assign(str.begin(), str.end());
+ return success;
+}
+
+#if defined(_WIN32)
+bool cmProcessOutput::DoDecodeText(std::string raw, std::string& decoded,
+ wchar_t* lastChar)
+{
+ bool success = false;
+ const int wlength =
+ MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), NULL, 0);
+ wchar_t* wdata = new wchar_t[wlength];
+ int r = MultiByteToWideChar(codepage, 0, raw.c_str(), int(raw.size()), wdata,
+ wlength);
+ if (r > 0) {
+ if (lastChar) {
+ *lastChar = 0;
+ if ((wlength >= 2 && wdata[wlength - 2] != wdata[wlength - 1]) ||
+ wlength >= 1) {
+ *lastChar = wdata[wlength - 1];
+ }
+ }
+ int length = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, NULL,
+ 0, NULL, NULL);
+ char* data = new char[length];
+ r = WideCharToMultiByte(defaultCodepage, 0, wdata, wlength, data, length,
+ NULL, NULL);
+ if (r > 0) {
+ decoded = std::string(data, length);
+ success = true;
+ }
+ delete[] data;
+ }
+ delete[] wdata;
+ return success;
+}
+#endif
diff --git a/Source/cmProcessOutput.h b/Source/cmProcessOutput.h
new file mode 100644
index 0000000..d7a5e98
--- /dev/null
+++ b/Source/cmProcessOutput.h
@@ -0,0 +1,80 @@
+/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+ file Copyright.txt or https://cmake.org/licensing for details. */
+#ifndef cmProcessOutput_h
+#define cmProcessOutput_h
+
+#include <cmConfigure.h> // IWYU pragma: keep
+
+#include <string>
+#include <vector>
+
+/** \class cmProcessOutput
+ * \brief Decode text data to internal encoding.
+ *
+ * cmProcessOutput is used to decode text output from external process
+ * using external encoding to our internal encoding.
+ */
+class cmProcessOutput
+{
+public:
+ enum Encoding
+ {
+ None,
+ Auto,
+ UTF8,
+ ANSI,
+ OEM
+ };
+
+ /// The code page that is used as internal encoding to which we will encode.
+ static unsigned int defaultCodepage;
+
+ /**
+ * A class constructor.
+ * \param encoding external process encoding from which we will decode.
+ * \param maxSize a maximal size for process output buffer. It should match
+ * to KWSYSPE_PIPE_BUFFER_SIZE. If text we decode is same size as \a maxSize
+ * then we will check for incomplete character at end of buffer and
+ * we will not return last incomplete character. This character will be
+ * returned with next DecodeText() call. To disable this behavior specify
+ * 0 as \a maxSize.
+ */
+ cmProcessOutput(Encoding encoding = Auto, unsigned int maxSize = 1024);
+ ~cmProcessOutput();
+ /**
+ * Decode \a raw string using external encoding to internal
+ * encoding in \a decoded.
+ * \a id specifies which internal buffer to use. This is important when we
+ * are decoding both stdout and stderr from process output and we need to
+ * keep incomplete characters in separate buffers for each stream.
+ * \return true if successfully decoded \a raw to \a decoded or false if not.
+ */
+ bool DecodeText(std::string raw, std::string& decoded, size_t id = 0);
+ /**
+ * Decode \a data with \a length from external encoding to internal
+ * encoding in \a decoded.
+ * \param data a pointer to process output text data.
+ * \param length a size of data buffer.
+ * \param decoded a string which will contain decoded text.
+ * \param id an internal buffer id to use.
+ * \return true if successfully decoded \a data to \a decoded or false if
+ * not.
+ */
+ bool DecodeText(const char* data, size_t length, std::string& decoded,
+ size_t id = 0);
+ /**
+ * \overload
+ */
+ bool DecodeText(std::vector<char> raw, std::vector<char>& decoded,
+ size_t id = 0);
+
+private:
+#if defined(_WIN32)
+ unsigned int codepage;
+ unsigned int bufferSize;
+ std::vector<std::string> rawparts;
+ bool DoDecodeText(std::string raw, std::string& decoded, wchar_t* lastChar);
+#endif
+};
+
+#endif
diff --git a/bootstrap b/bootstrap
index d54380a..ec081fa 100755
--- a/bootstrap
+++ b/bootstrap
@@ -372,6 +372,7 @@ CMAKE_CXX_SOURCES="\
cmParseArgumentsCommand \
cmPathLabel \
cmPolicies \
+ cmProcessOutput \
cmProjectCommand \
cmProperty \
cmPropertyDefinition \
@@ -1423,6 +1424,7 @@ fi
cmake_c_flags_String="-DKWSYS_STRING_C"
if ${cmake_system_mingw}; then
cmake_c_flags_EncodingC="-DKWSYS_ENCODING_DEFAULT_CODEPAGE=CP_ACP"
+ cmake_cxx_flags_cmProcessOutput="${cmake_c_flags_EncodingC}"
fi
cmake_cxx_flags_SystemTools="
-DKWSYS_CXX_HAS_SETENV=${KWSYS_CXX_HAS_SETENV}
@@ -1439,8 +1441,9 @@ echo "cmake: ${objs}" > "${cmake_bootstrap_dir}/Makefile"
echo " ${cmake_cxx_compiler} ${cmake_ld_flags} ${cmake_cxx_flags} ${objs} -o cmake" >> "${cmake_bootstrap_dir}/Makefile"
for a in ${CMAKE_CXX_SOURCES}; do
src=`cmake_escape "${cmake_source_dir}/Source/${a}.cxx"`
+ src_flags=`eval echo \\${cmake_cxx_flags_\${a}}`
echo "${a}.o : ${src} ${dep}" >> "${cmake_bootstrap_dir}/Makefile"
- echo " ${cmake_cxx_compiler} ${cmake_cxx_flags} -c ${src} -o ${a}.o" >> "${cmake_bootstrap_dir}/Makefile"
+ echo " ${cmake_cxx_compiler} ${cmake_cxx_flags} ${src_flags} -c ${src} -o ${a}.o" >> "${cmake_bootstrap_dir}/Makefile"
done
echo "cmBootstrapCommands1.o : $cmBootstrapCommands1Deps" >> "${cmake_bootstrap_dir}/Makefile"
echo "cmBootstrapCommands2.o : $cmBootstrapCommands2Deps" >> "${cmake_bootstrap_dir}/Makefile"