From 9a4533405beae781b1dbb68e9273242730c58fdb Mon Sep 17 00:00:00 2001 From: Brad King Date: Thu, 12 Sep 2024 12:51:06 -0400 Subject: cmExecuteProcessCommand: Explicitly ignore unknown ENCODING values The logic did this implicitly before. Make it easier to follow. --- Source/cmExecuteProcessCommand.cxx | 13 ++++++++++--- Source/cmProcessOutput.cxx | 6 ++++-- Source/cmProcessOutput.h | 4 +++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Source/cmExecuteProcessCommand.cxx b/Source/cmExecuteProcessCommand.cxx index 2b923df..f98228b 100644 --- a/Source/cmExecuteProcessCommand.cxx +++ b/Source/cmExecuteProcessCommand.cxx @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -69,7 +70,7 @@ bool cmExecuteProcessCommand(std::vector const& args, bool ErrorStripTrailingWhitespace = false; bool EchoOutputVariable = false; bool EchoErrorVariable = false; - std::string Encoding; + cm::optional Encoding; std::string CommandErrorIsFatal; }; @@ -296,8 +297,14 @@ bool cmExecuteProcessCommand(std::vector const& args, }; ReadData outputData; ReadData errorData; - cmProcessOutput processOutput( - cmProcessOutput::FindEncoding(arguments.Encoding)); + cmProcessOutput::Encoding encoding = cmProcessOutput::Auto; + if (arguments.Encoding) { + if (cm::optional maybeEncoding = + cmProcessOutput::FindEncoding(*arguments.Encoding)) { + encoding = *maybeEncoding; + } + } + cmProcessOutput processOutput(encoding); std::string strdata; std::unique_ptr outputHandle; diff --git a/Source/cmProcessOutput.cxx b/Source/cmProcessOutput.cxx index e1df661..601d701 100644 --- a/Source/cmProcessOutput.cxx +++ b/Source/cmProcessOutput.cxx @@ -12,16 +12,18 @@ unsigned int cmProcessOutput::defaultCodepage = KWSYS_ENCODING_DEFAULT_CODEPAGE; #endif -cmProcessOutput::Encoding cmProcessOutput::FindEncoding( +cm::optional cmProcessOutput::FindEncoding( std::string const& name) { - Encoding encoding = Auto; + cm::optional encoding; if ((name == "UTF8") || (name == "UTF-8")) { encoding = UTF8; } else if (name == "NONE") { encoding = None; } else if (name == "ANSI") { encoding = ANSI; + } else if (name == "AUTO") { + encoding = Auto; } else if (name == "OEM") { encoding = OEM; } diff --git a/Source/cmProcessOutput.h b/Source/cmProcessOutput.h index 8cee987..f15e3f9 100644 --- a/Source/cmProcessOutput.h +++ b/Source/cmProcessOutput.h @@ -8,6 +8,8 @@ #include #include +#include + /** \class cmProcessOutput * \brief Decode text data to internal encoding. * @@ -31,7 +33,7 @@ public: * \param name a encoding name. * \return encoding enum value or Auto if \a name was not found. */ - static Encoding FindEncoding(std::string const& name); + static cm::optional FindEncoding(std::string const& name); /// The code page that is used as internal encoding to which we will encode. static unsigned int defaultCodepage; -- cgit v0.12 From e782811cfeb9dc9b7d014f436f778a0ae619129c Mon Sep 17 00:00:00 2001 From: Brad King Date: Fri, 13 Sep 2024 11:49:50 -0400 Subject: execute_process: Change default ENCODING to UTF-8 Windows is heading toward making UTF-8 the preferred MBCS. As CMake's internal encoding, `UTF-8` is effectively equivalent to `NONE`, which was CMake's behavior prior to 3.15's accidental change to `AUTO`. Behavior of `ENCODING UTF-8` is independent of CMake's internal encoding, making it in principle a better default than `NONE`. Add policy CMP0176 for compatibility and to document the default's history. Fixes: #26262 --- Help/command/execute_process.rst | 4 +++- Help/manual/cmake-policies.7.rst | 1 + Help/policy/CMP0176.rst | 27 ++++++++++++++++++++++ Help/release/dev/execute_process-encoding.rst | 6 +++++ Source/cmExecuteProcessCommand.cxx | 8 ++++++- Source/cmPolicies.h | 2 ++ .../RunCMake/execute_process/Encoding-common.cmake | 7 +++++- .../execute_process/Encoding-windows.cmake | 4 ++++ Tests/RunCMake/execute_process/Encoding.cmake | 3 +++ .../execute_process/EncodingCMP0176-NEW-stderr.txt | 2 ++ .../execute_process/EncodingCMP0176-NEW.cmake | 3 +++ .../execute_process/EncodingCMP0176-OLD-stderr.txt | 2 ++ .../execute_process/EncodingCMP0176-OLD.cmake | 4 ++++ .../execute_process/EncodingDefault-stderr.txt | 2 -- .../RunCMake/execute_process/EncodingDefault.cmake | 3 --- Tests/RunCMake/execute_process/RunCMakeTest.cmake | 3 ++- 16 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 Help/policy/CMP0176.rst create mode 100644 Help/release/dev/execute_process-encoding.rst create mode 100644 Tests/RunCMake/execute_process/EncodingCMP0176-NEW-stderr.txt create mode 100644 Tests/RunCMake/execute_process/EncodingCMP0176-NEW.cmake create mode 100644 Tests/RunCMake/execute_process/EncodingCMP0176-OLD-stderr.txt create mode 100644 Tests/RunCMake/execute_process/EncodingCMP0176-OLD.cmake delete mode 100644 Tests/RunCMake/execute_process/EncodingDefault-stderr.txt delete mode 100644 Tests/RunCMake/execute_process/EncodingDefault.cmake diff --git a/Help/command/execute_process.rst b/Help/command/execute_process.rst index 2591dba..dba5331 100644 --- a/Help/command/execute_process.rst +++ b/Help/command/execute_process.rst @@ -147,7 +147,7 @@ Options: Use the current active console's codepage or if that isn't available then use ANSI. - This is the default since CMake 3.15. + This was the default in CMake 3.15 through 3.30. ``ANSI`` Use the ANSI codepage. @@ -160,6 +160,8 @@ Options: Use the UTF-8 codepage. + This is the default. See policy :policy:`CMP0176`. + ``UTF8`` Use the UTF-8 codepage. Use of this name is discouraged in favor of ``UTF-8`` to match the `UTF-8 RFC `_ diff --git a/Help/manual/cmake-policies.7.rst b/Help/manual/cmake-policies.7.rst index e5284e9..85bb6ec 100644 --- a/Help/manual/cmake-policies.7.rst +++ b/Help/manual/cmake-policies.7.rst @@ -57,6 +57,7 @@ Policies Introduced by CMake 3.31 .. toctree:: :maxdepth: 1 + CMP0176: execute_process() ENCODING is UTF-8 by default. CMP0175: add_custom_command() rejects invalid arguments. CMP0174: cmake_parse_arguments(PARSE_ARGV) defines a variable for an empty string after a single-value keyword. CMP0173: The CMakeFindFrameworks module is removed. diff --git a/Help/policy/CMP0176.rst b/Help/policy/CMP0176.rst new file mode 100644 index 0000000..4683174 --- /dev/null +++ b/Help/policy/CMP0176.rst @@ -0,0 +1,27 @@ +CMP0176 +------- + +.. versionadded:: 3.31 + +:command:`execute_process` ``ENCODING`` is ``UTF-8`` by default. + +The ``ENCODING`` option is meaningful only on Windows. It specifies the +character encoding expected in the process's output on stdout and stderr. +In CMake 3.14 and below the default encoding was ``NONE``, which corresponds +to CMake's internal UTF-8 encoding. In CMake 3.15 through CMake 3.30 the +default encoding was accidentally changed to ``AUTO``, but the change went +unnoticed and was not documented. + +CMake 3.31 and above prefer the ``ENCODING`` default to be ``UTF-8``. +This policy provides compatibility with projects that may have been +relying on the default being ``AUTO``. + +The ``OLD`` behavior of this policy is for :command:`execute_process` +to use ``AUTO`` by default if no ``ENCODING`` is specified. The ``NEW`` +behavior for this policy is to use ``UTF-8`` as the default ``ENCODING``. + +.. |INTRODUCED_IN_CMAKE_VERSION| replace:: 3.31 +.. |WARNS_OR_DOES_NOT_WARN| replace:: does *not* warn +.. include:: STANDARD_ADVICE.txt + +.. include:: DEPRECATED.txt diff --git a/Help/release/dev/execute_process-encoding.rst b/Help/release/dev/execute_process-encoding.rst new file mode 100644 index 0000000..f6999e3 --- /dev/null +++ b/Help/release/dev/execute_process-encoding.rst @@ -0,0 +1,6 @@ +execute_process-encoding +------------------------ + +* The :command:`execute_process` command's ``ENCODING`` option, + meaningful on Windows, now defaults to ``UTF-8``. + See policy :policy:`CMP0176`. diff --git a/Source/cmExecuteProcessCommand.cxx b/Source/cmExecuteProcessCommand.cxx index f98228b..97d0afe 100644 --- a/Source/cmExecuteProcessCommand.cxx +++ b/Source/cmExecuteProcessCommand.cxx @@ -23,6 +23,7 @@ #include "cmList.h" #include "cmMakefile.h" #include "cmMessageType.h" +#include "cmPolicies.h" #include "cmProcessOutput.h" #include "cmStringAlgorithms.h" #include "cmSystemTools.h" @@ -297,7 +298,12 @@ bool cmExecuteProcessCommand(std::vector const& args, }; ReadData outputData; ReadData errorData; - cmProcessOutput::Encoding encoding = cmProcessOutput::Auto; + cmPolicies::PolicyStatus const cmp0176 = + status.GetMakefile().GetPolicyStatus(cmPolicies::CMP0176); + cmProcessOutput::Encoding encoding = + cmp0176 == cmPolicies::OLD || cmp0176 == cmPolicies::WARN + ? cmProcessOutput::Auto + : cmProcessOutput::UTF8; if (arguments.Encoding) { if (cm::optional maybeEncoding = cmProcessOutput::FindEncoding(*arguments.Encoding)) { diff --git a/Source/cmPolicies.h b/Source/cmPolicies.h index 644cb9e..254c323 100644 --- a/Source/cmPolicies.h +++ b/Source/cmPolicies.h @@ -539,6 +539,8 @@ class cmMakefile; "string after a single-value keyword.", \ 3, 31, 0, cmPolicies::WARN) \ SELECT(POLICY, CMP0175, "add_custom_command() rejects invalid arguments.", \ + 3, 31, 0, cmPolicies::WARN) \ + SELECT(POLICY, CMP0176, "execute_process() ENCODING is UTF-8 by default.", \ 3, 31, 0, cmPolicies::WARN) #define CM_SELECT_ID(F, A1, A2, A3, A4, A5, A6) F(A1) diff --git a/Tests/RunCMake/execute_process/Encoding-common.cmake b/Tests/RunCMake/execute_process/Encoding-common.cmake index e13be75..fda1c2d 100644 --- a/Tests/RunCMake/execute_process/Encoding-common.cmake +++ b/Tests/RunCMake/execute_process/Encoding-common.cmake @@ -2,7 +2,12 @@ if(ENCODING) set(maybe_ENCODING ENCODING ${ENCODING}) else() set(maybe_ENCODING "") - set(ENCODING AUTO) # execute_process's default ENCODING + cmake_policy(GET CMP0176 cmp0176) + if(cmp0176 STREQUAL "NEW") + set(ENCODING UTF-8) # execute_process's default ENCODING + else() + set(ENCODING AUTO) # execute_process's default ENCODING + endif() endif() execute_process( COMMAND ${TEST_ENCODING_EXE} ${ENCODING} ${CMAKE_CURRENT_LIST_DIR}/Encoding${ENCODING}-stderr.txt diff --git a/Tests/RunCMake/execute_process/Encoding-windows.cmake b/Tests/RunCMake/execute_process/Encoding-windows.cmake index 69b9bc7..d51589c 100644 --- a/Tests/RunCMake/execute_process/Encoding-windows.cmake +++ b/Tests/RunCMake/execute_process/Encoding-windows.cmake @@ -1,3 +1,7 @@ +if(CMP0176 STREQUAL "NEW") + cmake_policy(SET CMP0176 NEW) +endif() + # Set the console code page. execute_process(COMMAND cmd /c chcp ${CODEPAGE}) diff --git a/Tests/RunCMake/execute_process/Encoding.cmake b/Tests/RunCMake/execute_process/Encoding.cmake index 206a71b..61d2c22 100644 --- a/Tests/RunCMake/execute_process/Encoding.cmake +++ b/Tests/RunCMake/execute_process/Encoding.cmake @@ -1,9 +1,12 @@ if(CMAKE_HOST_WIN32 AND CODEPAGE) + cmake_policy(GET CMP0176 CMP0176) + # Run cmake in a new Window to isolate its console code page. execute_process(COMMAND cmd /c start /min /wait "" ${CMAKE_COMMAND} -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE} -DENCODING=${ENCODING} -DCODEPAGE=${CODEPAGE} + -DCMP0176=${CMP0176} -P ${CMAKE_CURRENT_LIST_DIR}/Encoding-windows.cmake) # Load our internal UTF-8 representation of the output. diff --git a/Tests/RunCMake/execute_process/EncodingCMP0176-NEW-stderr.txt b/Tests/RunCMake/execute_process/EncodingCMP0176-NEW-stderr.txt new file mode 100644 index 0000000..9fd0dcd --- /dev/null +++ b/Tests/RunCMake/execute_process/EncodingCMP0176-NEW-stderr.txt @@ -0,0 +1,2 @@ +Chinese Hindi Greek English Russian +注意 यूनिकोड είναι very здорово! diff --git a/Tests/RunCMake/execute_process/EncodingCMP0176-NEW.cmake b/Tests/RunCMake/execute_process/EncodingCMP0176-NEW.cmake new file mode 100644 index 0000000..b23ff59 --- /dev/null +++ b/Tests/RunCMake/execute_process/EncodingCMP0176-NEW.cmake @@ -0,0 +1,3 @@ +cmake_policy(SET CMP0176 NEW) +# No explicit ENCODING option; fall back to default. +include(${CMAKE_CURRENT_LIST_DIR}/Encoding.cmake) diff --git a/Tests/RunCMake/execute_process/EncodingCMP0176-OLD-stderr.txt b/Tests/RunCMake/execute_process/EncodingCMP0176-OLD-stderr.txt new file mode 100644 index 0000000..b4cf27a --- /dev/null +++ b/Tests/RunCMake/execute_process/EncodingCMP0176-OLD-stderr.txt @@ -0,0 +1,2 @@ +Chinese +注意 diff --git a/Tests/RunCMake/execute_process/EncodingCMP0176-OLD.cmake b/Tests/RunCMake/execute_process/EncodingCMP0176-OLD.cmake new file mode 100644 index 0000000..8057176 --- /dev/null +++ b/Tests/RunCMake/execute_process/EncodingCMP0176-OLD.cmake @@ -0,0 +1,4 @@ +cmake_policy(SET CMP0176 OLD) +# No explicit ENCODING option; fall back to default. +set(CODEPAGE 54936) +include(${CMAKE_CURRENT_LIST_DIR}/Encoding.cmake) diff --git a/Tests/RunCMake/execute_process/EncodingDefault-stderr.txt b/Tests/RunCMake/execute_process/EncodingDefault-stderr.txt deleted file mode 100644 index b4cf27a..0000000 --- a/Tests/RunCMake/execute_process/EncodingDefault-stderr.txt +++ /dev/null @@ -1,2 +0,0 @@ -Chinese -注意 diff --git a/Tests/RunCMake/execute_process/EncodingDefault.cmake b/Tests/RunCMake/execute_process/EncodingDefault.cmake deleted file mode 100644 index 60b1eb6..0000000 --- a/Tests/RunCMake/execute_process/EncodingDefault.cmake +++ /dev/null @@ -1,3 +0,0 @@ -# No explicit ENCODING option; fall back to default. -set(CODEPAGE 54936) -include(${CMAKE_CURRENT_LIST_DIR}/Encoding.cmake) diff --git a/Tests/RunCMake/execute_process/RunCMakeTest.cmake b/Tests/RunCMake/execute_process/RunCMakeTest.cmake index b3ebdca..2516de0 100644 --- a/Tests/RunCMake/execute_process/RunCMakeTest.cmake +++ b/Tests/RunCMake/execute_process/RunCMakeTest.cmake @@ -9,7 +9,8 @@ run_cmake_command(MergeOutputVars ${CMAKE_COMMAND} -P ${RunCMake_SOURCE_DIR}/Mer run_cmake(EncodingMissing) if(TEST_ENCODING_EXE) - run_cmake_script(EncodingDefault -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) + run_cmake_script(EncodingCMP0176-NEW -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) + run_cmake_script(EncodingCMP0176-OLD -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) run_cmake_script(EncodingAUTO -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) run_cmake_script(EncodingUTF-8 -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) run_cmake_script(EncodingUTF8 -DTEST_ENCODING_EXE=${TEST_ENCODING_EXE}) -- cgit v0.12