From 74b51ac1e5fb76250251a66d8d326baaaf1f1cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sat, 26 Oct 2002 14:50:45 +0000 Subject: Patch #613256: Add nescape method to xml.sax.saxutils. --- Doc/lib/xmlsaxutils.tex | 11 +++++++++++ Lib/test/output/test_sax | 5 ++++- Lib/test/test_sax.py | 14 +++++++++++++- Lib/xml/sax/saxutils.py | 32 ++++++++++++++++++++++++++------ Misc/ACKS | 1 + Misc/NEWS | 3 +++ 6 files changed, 58 insertions(+), 8 deletions(-) diff --git a/Doc/lib/xmlsaxutils.tex b/Doc/lib/xmlsaxutils.tex index 6ab8d4a..91f25a4 100644 --- a/Doc/lib/xmlsaxutils.tex +++ b/Doc/lib/xmlsaxutils.tex @@ -22,6 +22,17 @@ either in direct use, or as base classes. strings; each key will be replaced with its corresponding value. \end{funcdesc} +\begin{funcdesc}{unescape}{data\optional{, entities}} + Unescape \character{\&}, \character{\<}, and \character{\>} + in a string of data. + + You can unescape other strings of data by passing a dictionary as the + optional \var{entities} parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + + \versionadded{2.3} +\end{funcdesc} + \begin{funcdesc}{quoteattr}{data\optional{, entities}} Similar to \function{escape()}, but also prepares \var{data} to be used as an attribute value. The return value is a quoted version of diff --git a/Lib/test/output/test_sax b/Lib/test/output/test_sax index 8aa5a77..cfb56cb 100644 --- a/Lib/test/output/test_sax +++ b/Lib/test/output/test_sax @@ -29,6 +29,9 @@ Passed test_nsattrs_wattr Passed test_quoteattr_basic Passed test_single_double_quoteattr Passed test_single_quoteattr +Passed test_unescape_all +Passed test_unescape_basic +Passed test_unescape_extra Passed test_xmlgen_attr_escape Passed test_xmlgen_basic Passed test_xmlgen_content @@ -36,4 +39,4 @@ Passed test_xmlgen_content_escape Passed test_xmlgen_ignorable Passed test_xmlgen_ns Passed test_xmlgen_pi -37 tests, 0 failures +40 tests, 0 failures diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 1200329..3c5b11a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -8,7 +8,8 @@ try: except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise ImportError("no XML parsers available") -from xml.sax.saxutils import XMLGenerator, escape, quoteattr, XMLFilterBase +from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ + XMLFilterBase from xml.sax.expatreader import create_parser from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO @@ -70,6 +71,17 @@ def test_escape_all(): def test_escape_extra(): return escape("Hei på deg", {"å" : "å"}) == "Hei på deg" +# ===== unescape + +def test_unescape_basic(): + return unescape("Donald Duck & Co") == "Donald Duck & Co" + +def test_unescape_all(): + return unescape("<Donald Duck & Co>") == "" + +def test_unescape_extra(): + return unescape("Hei på deg", {"å" : "å"}) == "Hei på deg" + # ===== quoteattr def test_quoteattr_basic(): diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index 8a96be6..c369f98 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -12,20 +12,40 @@ try: except AttributeError: _StringTypes = [types.StringType] +def __dict_replace(s, d): + """Replace substrings of a string using a dictionary.""" + for key, value in d.items(): + s = s.replace(key, value) + return s def escape(data, entities={}): """Escape &, <, and > in a string of data. - + You can escape other strings of data by passing a dictionary as the optional entities parameter. The keys and values must all be strings; each key will be replaced with its corresponding value. """ + + # must do ampersand first data = data.replace("&", "&") - data = data.replace("<", "<") - data = data.replace(">", ">") - for chars, entity in entities.items(): - data = data.replace(chars, entity) - return data + data = __dict_replace(data, {"<" : "<", + ">" : ">", + }) + return __dict_replace(data, entities) + +def unescape(data, entities={}): + """Unescape &, <, and > in a string of data. + + You can unescape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = __dict_replace(data, {"<" : "<", + ">" : ">", + }) + # must do ampersand last + data = data.replace("&", "&") + return __dict_replace(data, entities) def quoteattr(data, entities={}): """Escape and quote an attribute value. diff --git a/Misc/ACKS b/Misc/ACKS index 6f17773..901bb45 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -56,6 +56,7 @@ Pablo Bleyer Erik van Blokland Finn Bock Paul Boddie +Matthew Boedicker David Bolen Jurjen Bos Peter Bosch diff --git a/Misc/NEWS b/Misc/NEWS index a0ead8a..1e83389 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -352,6 +352,9 @@ Extension modules Library ------- +- xml.sax.saxutils.unescape has been added, to replace entity references + with their entity value. + - Queue.Queue.{put,get} now support an optional timeout argument. - Various features of Tk 8.4 are exposed in Tkinter.py. The multiple -- cgit v0.12 70' href='#n70'>70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
"""Test the binascii C module."""

import unittest
import binascii
import array

# Note: "*_hex" functions are aliases for "(un)hexlify"
b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_hqx', 'b2a_qp', 'b2a_uu',
                 'hexlify', 'rlecode_hqx']
a2b_functions = ['a2b_base64', 'a2b_hex', 'a2b_hqx', 'a2b_qp', 'a2b_uu',
                 'unhexlify', 'rledecode_hqx']
all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx']


class BinASCIITest(unittest.TestCase):

    type2test = bytes
    # Create binary test data
    rawdata = b"The quick brown fox jumps over the lazy dog.\r\n"
    # Be slow so we don't depend on other modules
    rawdata += bytes(range(256))
    rawdata += b"\r\nHello world.\n"

    def setUp(self):
        self.data = self.type2test(self.rawdata)

    def test_exceptions(self):
        # Check module exceptions
        self.assertTrue(issubclass(binascii.Error, Exception))
        self.assertTrue(issubclass(binascii.Incomplete, Exception))

    def test_functions(self):
        # Check presence of all functions
        for name in all_functions:
            self.assertTrue(hasattr(getattr(binascii, name), '__call__'))
            self.assertRaises(TypeError, getattr(binascii, name))

    def test_returned_value(self):
        # Limit to the minimum of all limits (b2a_uu)
        MAX_ALL = 45
        raw = self.rawdata[:MAX_ALL]
        for fa, fb in zip(a2b_functions, b2a_functions):
            a2b = getattr(binascii, fa)
            b2a = getattr(binascii, fb)
            try:
                a = b2a(self.type2test(raw))
                res = a2b(self.type2test(a))
            except Exception as err:
                self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
            if fb == 'b2a_hqx':
                # b2a_hqx returns a tuple
                res, _ = res
            self.assertEqual(res, raw, "{}/{} conversion: "
                             "{!r} != {!r}".format(fb, fa, res, raw))
            self.assertIsInstance(res, bytes)
            self.assertIsInstance(a, bytes)
            self.assertLess(max(a), 128)
        self.assertIsInstance(binascii.crc_hqx(raw, 0), int)
        self.assertIsInstance(binascii.crc32(raw), int)

    def test_base64valid(self):
        # Test base64 with valid data
        MAX_BASE64 = 57
        lines = []
        for i in range(0, len(self.rawdata), MAX_BASE64):
            b = self.type2test(self.rawdata[i:i+MAX_BASE64])
            a = binascii.b2a_base64(b)
            lines.append(a)
        res = bytes()
        for line in lines:
            a = self.type2test(line)
            b = binascii.a2b_base64(a)
            res += b
        self.assertEqual(res, self.rawdata)

    def test_base64invalid(self):
        # Test base64 with random invalid characters sprinkled throughout
        # (This requires a new version of binascii.)
        MAX_BASE64 = 57
        lines = []
        for i in range(0, len(self.data), MAX_BASE64):
            b = self.type2test(self.rawdata[i:i+MAX_BASE64])
            a = binascii.b2a_base64(b)
            lines.append(a)

        fillers = bytearray()
        valid = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
        for i in range(256):
            if i not in valid:
                fillers.append(i)
        def addnoise(line):
            noise = fillers
            ratio = len(line) // len(noise)
            res = bytearray()
            while line and noise:
                if len(line) // len(noise) > ratio:
                    c, line = line[0], line[1:]
                else:
                    c, noise = noise[0], noise[1:]
                res.append(c)
            return res + noise + line
        res = bytearray()
        for line in map(addnoise, lines):
            a = self.type2test(line)
            b = binascii.a2b_base64(a)
            res += b
        self.assertEqual(res, self.rawdata)

        # Test base64 with just invalid characters, which should return
        # empty strings. TBD: shouldn't it raise an exception instead ?
        self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')

    def test_uu(self):
        MAX_UU = 45
        lines = []
        for i in range(0, len(self.data), MAX_UU):
            b = self.type2test(self.rawdata[i:i+MAX_UU])
            a = binascii.b2a_uu(b)
            lines.append(a)
        res = bytes()
        for line in lines:
            a = self.type2test(line)
            b = binascii.a2b_uu(a)
            res += b
        self.assertEqual(res, self.rawdata)

        self.assertEqual(binascii.a2b_uu(b"\x7f"), b"\x00"*31)
        self.assertEqual(binascii.a2b_uu(b"\x80"), b"\x00"*32)
        self.assertEqual(binascii.a2b_uu(b"\xff"), b"\x00"*31)
        self.assertRaises(binascii.Error, binascii.a2b_uu, b"\xff\x00")
        self.assertRaises(binascii.Error, binascii.a2b_uu, b"!!!!")

        self.assertRaises(binascii.Error, binascii.b2a_uu, 46*b"!")

        # Issue #7701 (crash on a pydebug build)
        self.assertEqual(binascii.b2a_uu(b'x'), b'!>   \n')

    def test_crc_hqx(self):
        crc = binascii.crc_hqx(self.type2test(b"Test the CRC-32 of"), 0)
        crc = binascii.crc_hqx(self.type2test(b" this string."), crc)
        self.assertEqual(crc, 14290)

        self.assertRaises(TypeError, binascii.crc_hqx)
        self.assertRaises(TypeError, binascii.crc_hqx, self.type2test(b''))

        for crc in 0, 1, 0x1234, 0x12345, 0x12345678, -1:
            self.assertEqual(binascii.crc_hqx(self.type2test(b''), crc),
                             crc & 0xffff)

    def test_crc32(self):
        crc = binascii.crc32(self.type2test(b"Test the CRC-32 of"))
        crc = binascii.crc32(self.type2test(b" this string."), crc)
        self.assertEqual(crc, 1571220330)

        self.assertRaises(TypeError, binascii.crc32)

    def test_hqx(self):
        # Perform binhex4 style RLE-compression
        # Then calculate the hexbin4 binary-to-ASCII translation
        rle = binascii.rlecode_hqx(self.data)
        a = binascii.b2a_hqx(self.type2test(rle))

        b, _ = binascii.a2b_hqx(self.type2test(a))
        res = binascii.rledecode_hqx(b)
        self.assertEqual(res, self.rawdata)

    def test_rle(self):
        # test repetition with a repetition longer than the limit of 255
        data = (b'a' * 100 + b'b' + b'c' * 300)

        encoded = binascii.rlecode_hqx(data)
        self.assertEqual(encoded,
                         (b'a\x90d'      # 'a' * 100
                          b'b'           # 'b'
                          b'c\x90\xff'   # 'c' * 255
                          b'c\x90-'))    # 'c' * 45

        decoded = binascii.rledecode_hqx(encoded)
        self.assertEqual(decoded, data)

    def test_hex(self):
        # test hexlification
        s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
        t = binascii.b2a_hex(self.type2test(s))
        u = binascii.a2b_hex(self.type2test(t))
        self.assertEqual(s, u)
        self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1])
        self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1] + b'q')

        # Confirm that b2a_hex == hexlify and a2b_hex == unhexlify
        self.assertEqual(binascii.hexlify(self.type2test(s)), t)
        self.assertEqual(binascii.unhexlify(self.type2test(t)), u)

    def test_qp(self):
        binascii.a2b_qp(data=b"", header=False)  # Keyword arguments allowed

        # A test for SF bug 534347 (segfaults without the proper fix)
        try:
            binascii.a2b_qp(b"", **{1:1})
        except TypeError:
            pass
        else:
            self.fail("binascii.a2b_qp(**{1:1}) didn't raise TypeError")

        self.assertEqual(binascii.a2b_qp(b"= "), b"= ")
        self.assertEqual(binascii.a2b_qp(b"=="), b"=")
        self.assertEqual(binascii.a2b_qp(b"=AX"), b"=AX")
        self.assertRaises(TypeError, binascii.b2a_qp, foo="bar")
        self.assertEqual(binascii.a2b_qp(b"=00\r\n=00"), b"\x00\r\n\x00")
        self.assertEqual(
            binascii.b2a_qp(b"\xff\r\n\xff\n\xff"),
            b"=FF\r\n=FF\r\n=FF")
        self.assertEqual(
            binascii.b2a_qp(b"0"*75+b"\xff\r\n\xff\r\n\xff"),
            b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF")

        self.assertEqual(binascii.b2a_qp(b'\0\n'), b'=00\n')
        self.assertEqual(binascii.b2a_qp(b'\0\n', quotetabs=True), b'=00\n')
        self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n'), b'foo\tbar=09\n')
        self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n', quotetabs=True),
                         b'foo=09bar=09\n')

        self.assertEqual(binascii.b2a_qp(b'.'), b'=2E')
        self.assertEqual(binascii.b2a_qp(b'.\n'), b'=2E\n')
        self.assertEqual(binascii.b2a_qp(b'a.\n'), b'a.\n')

    def test_empty_string(self):
        # A test for SF bug #1022953.  Make sure SystemError is not raised.
        empty = self.type2test(b'')
        for func in all_functions:
            if func == 'crc_hqx':
                # crc_hqx needs 2 arguments
                binascii.crc_hqx(empty, 0)
                continue
            f = getattr(binascii, func)
            try:
                f(empty)
            except Exception as err:
                self.fail("{}({!r}) raises {!r}".format(func, empty, err))

    def test_unicode_b2a(self):
        # Unicode strings are not accepted by b2a_* functions.
        for func in set(all_functions) - set(a2b_functions) | {'rledecode_hqx'}:
            try:
                self.assertRaises(TypeError, getattr(binascii, func), "test")
            except Exception as err:
                self.fail('{}("test") raises {!r}'.format(func, err))
        # crc_hqx needs 2 arguments
        self.assertRaises(TypeError, binascii.crc_hqx, "test", 0)

    def test_unicode_a2b(self):
        # Unicode strings are accepted by a2b_* functions.
        MAX_ALL = 45
        raw = self.rawdata[:MAX_ALL]
        for fa, fb in zip(a2b_functions, b2a_functions):
            if fa == 'rledecode_hqx':
                # Takes non-ASCII data
                continue
            a2b = getattr(binascii, fa)
            b2a = getattr(binascii, fb)
            try:
                a = b2a(self.type2test(raw))
                binary_res = a2b(a)
                a = a.decode('ascii')
                res = a2b(a)
            except Exception as err:
                self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
            if fb == 'b2a_hqx':
                # b2a_hqx returns a tuple
                res, _ = res
                binary_res, _ = binary_res
            self.assertEqual(res, raw, "{}/{} conversion: "
                             "{!r} != {!r}".format(fb, fa, res, raw))
            self.assertEqual(res, binary_res)
            self.assertIsInstance(res, bytes)
            # non-ASCII string
            self.assertRaises(ValueError, a2b, "\x80")

    def test_b2a_base64_newline(self):
        # Issue #25357: test newline parameter
        b = self.type2test(b'hello')
        self.assertEqual(binascii.b2a_base64(b),
                         b'aGVsbG8=\n')
        self.assertEqual(binascii.b2a_base64(b, newline=True),
                         b'aGVsbG8=\n')
        self.assertEqual(binascii.b2a_base64(b, newline=False),
                         b'aGVsbG8=')


class ArrayBinASCIITest(BinASCIITest):
    def type2test(self, s):
        return array.array('B', list(s))


class BytearrayBinASCIITest(BinASCIITest):
    type2test = bytearray


class MemoryviewBinASCIITest(BinASCIITest):
    type2test = memoryview


if __name__ == "__main__":
    unittest.main()