CTest: Do not truncate UTF-8 test output too early (#10656)

Since commit e4beefeb (CTest: Do not munge UTF-8 output in XML files,
2009-12-08) we validate UTF-8 encoding of build and test output as it is
written to XML files.  However, in cmCTestTestHandler::CleanTestOutput
we still processed test output one byte at a time and did not recognize
multi-byte UTF-8 characters.  Presence of such characters caused early
truncation.

Teach CleanTestOutput to truncate test output at the limit but without
cutting it in the middle of a multi-byte encoding.  Also, stop avoiding
truncation in the middle of an XML tag like "<MyElement>" because the
'<' and '>' will be properly escaped in the generated XML anyway.
This commit is contained in:
Brad King 2011-01-04 13:20:49 -05:00
parent c59ed29552
commit e73bf1c384

View File

@ -26,6 +26,7 @@
#include "cmCommand.h" #include "cmCommand.h"
#include "cmSystemTools.h" #include "cmSystemTools.h"
#include "cmXMLSafe.h" #include "cmXMLSafe.h"
#include "cm_utf8.h"
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
@ -1980,65 +1981,45 @@ void cmCTestTestHandler::SetTestsToRunInformation(const char* in)
} }
} }
//---------------------------------------------------------------------- //----------------------------------------------------------------------------
bool cmCTestTestHandler::CleanTestOutput(std::string& output, bool cmCTestTestHandler::CleanTestOutput(std::string& output, size_t length)
size_t remove_threshold)
{ {
if ( remove_threshold == 0 ) if(!length || length >= output.size() ||
output.find("CTEST_FULL_OUTPUT") != output.npos)
{ {
return true; return true;
} }
if ( output.find("CTEST_FULL_OUTPUT") != output.npos )
// Truncate at given length but do not break in the middle of a multi-byte
// UTF-8 encoding.
char const* const begin = output.c_str();
char const* const end = begin + output.size();
char const* const truncate = begin + length;
char const* current = begin;
while(current < truncate)
{ {
return true; unsigned int ch;
} if(const char* next = cm_utf8_decode_character(current, end, &ch))
cmOStringStream ostr;
std::string::size_type cc;
std::string::size_type skipsize = 0;
int inTag = 0;
int skipped = 0;
for ( cc = 0; cc < output.size(); cc ++ )
{ {
int ch = output[cc]; if(next > truncate)
if ( ch < 0 || ch > 255 )
{ {
break; break;
} }
if ( ch == '<' ) current = next;
}
else // Bad byte will be handled by cmXMLSafe.
{ {
inTag = 1; ++current;
}
if ( !inTag )
{
int notskip = 0;
// Skip
if ( skipsize < remove_threshold )
{
ostr << static_cast<char>(ch);
notskip = 1;
}
skipsize ++;
if ( notskip && skipsize >= remove_threshold )
{
skipped = 1;
} }
} }
else output = output.substr(0, current - begin);
{
ostr << static_cast<char>(ch); // Append truncation message.
} cmOStringStream msg;
if ( ch == '>' ) msg << "...\n"
{ "The rest of the test output was removed since it exceeds the threshold "
inTag = 0; "of " << length << " bytes.\n";
} output += msg.str();
}
if ( skipped )
{
ostr << "..." << std::endl << "The rest of the test output was removed "
"since it exceeds the threshold of "
<< remove_threshold << " characters." << std::endl;
}
output = ostr.str();
return true; return true;
} }