/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying file Copyright.txt or https://cmake.org/licensing for details. */ #include "cmXMLSafe.h" #include "cm_utf8.h" #include <sstream> #include <stdio.h> #include <string.h> cmXMLSafe::cmXMLSafe(const char* s) : Data(s) , Size(static_cast<unsigned long>(strlen(s))) , DoQuotes(true) { } cmXMLSafe::cmXMLSafe(std::string const& s) : Data(s.c_str()) , Size(static_cast<unsigned long>(s.length())) , DoQuotes(true) { } cmXMLSafe& cmXMLSafe::Quotes(bool b) { this->DoQuotes = b; return *this; } std::string cmXMLSafe::str() { std::ostringstream ss; ss << *this; return ss.str(); } std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self) { char const* first = self.Data; char const* last = self.Data + self.Size; while (first != last) { unsigned int ch; if (const char* next = cm_utf8_decode_character(first, last, &ch)) { // http://www.w3.org/TR/REC-xml/#NT-Char if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) || (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA || ch == 0xD) { switch (ch) { // Escape XML control characters. case '&': os << "&"; break; case '<': os << "<"; break; case '>': os << ">"; break; case '"': os << (self.DoQuotes ? """ : "\""); break; case '\'': os << (self.DoQuotes ? "'" : "'"); break; case '\r': break; // Ignore CR // Print the UTF-8 character. default: os.write(first, next - first); break; } } else { // Use a human-readable hex value for this invalid character. char buf[16]; sprintf(buf, "%X", ch); os << "[NON-XML-CHAR-0x" << buf << "]"; } first = next; } else { ch = static_cast<unsigned char>(*first++); // Use a human-readable hex value for this invalid byte. char buf[16]; sprintf(buf, "%X", ch); os << "[NON-UTF-8-BYTE-0x" << buf << "]"; } } return os; }