summaryrefslogtreecommitdiffstats
path: root/Source/cmXMLSafe.cxx
blob: d31a239c737b8e3c65c16c2a9907003ee2b2ae36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
   file Copyright.txt or https://cmake.org/licensing for details.  */
#include "cmXMLSafe.h"

#include <cstdio>
#include <cstring>
#include <sstream>

#include "cm_utf8.h"

cmXMLSafe::cmXMLSafe(const char* s)
  : Data(s)
  , Size(static_cast<unsigned long>(strlen(s)))
  , DoQuotes(true)
{
}

cmXMLSafe::cmXMLSafe(std::string const& s)
  : Data(s.c_str())
  , Size(static_cast<unsigned long>(s.length()))
  , DoQuotes(true)
{
}

cmXMLSafe& cmXMLSafe::Quotes(bool b)
{
  this->DoQuotes = b;
  return *this;
}

std::string cmXMLSafe::str() const
{
  std::ostringstream ss;
  ss << *this;
  return ss.str();
}

std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self)
{
  char const* first = self.Data;
  char const* last = self.Data + self.Size;
  while (first != last) {
    unsigned int ch;
    if (const char* next = cm_utf8_decode_character(first, last, &ch)) {
      // http://www.w3.org/TR/REC-xml/#NT-Char
      if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) ||
          (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA ||
          ch == 0xD) {
        switch (ch) {
          // Escape XML control characters.
          case '&':
            os << "&amp;";
            break;
          case '<':
            os << "&lt;";
            break;
          case '>':
            os << "&gt;";
            break;
          case '"':
            os << (self.DoQuotes ? "&quot;" : "\"");
            break;
          case '\'':
            os << (self.DoQuotes ? "&apos;" : "'");
            break;
          case '\r':
            break; // Ignore CR
          // Print the UTF-8 character.
          default:
            os.write(first, next - first);
            break;
        }
      } else {
        // Use a human-readable hex value for this invalid character.
        char buf[16];
        sprintf(buf, "%X", ch);
        os << "[NON-XML-CHAR-0x" << buf << "]";
      }

      first = next;
    } else {
      ch = static_cast<unsigned char>(*first++);
      // Use a human-readable hex value for this invalid byte.
      char buf[16];
      sprintf(buf, "%X", ch);
      os << "[NON-UTF-8-BYTE-0x" << buf << "]";
    }
  }
  return os;
}