From e73bf1c384b644a122cccc4a85a1c0fb0943759a Mon Sep 17 00:00:00 2001 From: Brad King Date: Tue, 4 Jan 2011 13:20:49 -0500 Subject: CTest: Do not truncate UTF-8 test output too early (#10656) Since commit e4beefeb (CTest: Do not munge UTF-8 output in XML files, 2009-12-08) we validate UTF-8 encoding of build and test output as it is written to XML files. However, in cmCTestTestHandler::CleanTestOutput we still processed test output one byte at a time and did not recognize multi-byte UTF-8 characters. Presence of such characters caused early truncation. Teach CleanTestOutput to truncate test output at the limit but without cutting it in the middle of a multi-byte encoding. Also, stop avoiding truncation in the middle of an XML tag like "" because the '<' and '>' will be properly escaped in the generated XML anyway. --- Source/CTest/cmCTestTestHandler.cxx | 75 ++++++++++++++----------------------- 1 file changed, 28 insertions(+), 47 deletions(-) diff --git a/Source/CTest/cmCTestTestHandler.cxx b/Source/CTest/cmCTestTestHandler.cxx index 6d1af2d..d1bc837 100644 --- a/Source/CTest/cmCTestTestHandler.cxx +++ b/Source/CTest/cmCTestTestHandler.cxx @@ -26,6 +26,7 @@ #include "cmCommand.h" #include "cmSystemTools.h" #include "cmXMLSafe.h" +#include "cm_utf8.h" #include #include @@ -1980,65 +1981,45 @@ void cmCTestTestHandler::SetTestsToRunInformation(const char* in) } } -//---------------------------------------------------------------------- -bool cmCTestTestHandler::CleanTestOutput(std::string& output, - size_t remove_threshold) +//---------------------------------------------------------------------------- +bool cmCTestTestHandler::CleanTestOutput(std::string& output, size_t length) { - if ( remove_threshold == 0 ) + if(!length || length >= output.size() || + output.find("CTEST_FULL_OUTPUT") != output.npos) { return true; } - if ( output.find("CTEST_FULL_OUTPUT") != output.npos ) + + // Truncate at given length but do not break in the middle of a multi-byte + // UTF-8 encoding. + char const* const begin = output.c_str(); + char const* const end = begin + output.size(); + char const* const truncate = begin + length; + char const* current = begin; + while(current < truncate) { - return true; - } - cmOStringStream ostr; - std::string::size_type cc; - std::string::size_type skipsize = 0; - int inTag = 0; - int skipped = 0; - for ( cc = 0; cc < output.size(); cc ++ ) - { - int ch = output[cc]; - if ( ch < 0 || ch > 255 ) - { - break; - } - if ( ch == '<' ) - { - inTag = 1; - } - if ( !inTag ) + unsigned int ch; + if(const char* next = cm_utf8_decode_character(current, end, &ch)) { - int notskip = 0; - // Skip - if ( skipsize < remove_threshold ) - { - ostr << static_cast(ch); - notskip = 1; - } - skipsize ++; - if ( notskip && skipsize >= remove_threshold ) + if(next > truncate) { - skipped = 1; + break; } + current = next; } - else - { - ostr << static_cast(ch); - } - if ( ch == '>' ) + else // Bad byte will be handled by cmXMLSafe. { - inTag = 0; + ++current; } } - if ( skipped ) - { - ostr << "..." << std::endl << "The rest of the test output was removed " - "since it exceeds the threshold of " - << remove_threshold << " characters." << std::endl; - } - output = ostr.str(); + output = output.substr(0, current - begin); + + // Append truncation message. + cmOStringStream msg; + msg << "...\n" + "The rest of the test output was removed since it exceeds the threshold " + "of " << length << " bytes.\n"; + output += msg.str(); return true; } -- cgit v0.12