summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_hmac.py
blob: 8dcbe7e89c0fe266b2b1a96d5cf68501241d94a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import hmac
import hashlib
import unittest
import warnings
from test import support

class TestVectorsTestCase(unittest.TestCase):

    def test_md5_vectors(self):
        # Test the HMAC module against test vectors from the RFC.

        def md5test(key, data, digest):
            h = hmac.HMAC(key, data)
            self.assertEqual(h.hexdigest().upper(), digest.upper())

        md5test(b"\x0b" * 16,
                b"Hi There",
                "9294727A3638BB1C13F48EF8158BFC9D")

        md5test(b"Jefe",
                b"what do ya want for nothing?",
                "750c783e6ab0b503eaa86e310a5db738")

        md5test(b"\xaa" * 16,
                b"\xdd" * 50,
                "56be34521d144c88dbb8c733f0e8b3f6")

        md5test(bytes(range(1, 26)),
                b"\xcd" * 50,
                "697eaf0aca3a3aea3a75164746ffaa79")

        md5test(b"\x0C" * 16,
                b"Test With Truncation",
                "56461ef2342edc00f9bab995690efd4c")

        md5test(b"\xaa" * 80,
                b"Test Using Larger Than Block-Size Key - Hash Key First",
                "6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd")

        md5test(b"\xaa" * 80,
                (b"Test Using Larger Than Block-Size Key "
                 b"and Larger Than One Block-Size Data"),
                "6f630fad67cda0ee1fb1f562db3aa53e")

    def test_sha_vectors(self):
        def shatest(key, data, digest):
            h = hmac.HMAC(key, data, digestmod=hashlib.sha1)
            self.assertEqual(h.hexdigest().upper(), digest.upper())

        shatest(b"\x0b" * 20,
                b"Hi There",
                "b617318655057264e28bc0b6fb378c8ef146be00")

        shatest(b"Jefe",
                b"what do ya want for nothing?",
                "effcdf6ae5eb2fa2d27416d5f184df9c259a7c79")

        shatest(b"\xAA" * 20,
                b"\xDD" * 50,
                "125d7342b9ac11cd91a39af48aa17b4f63f175d3")

        shatest(bytes(range(1, 26)),
                b"\xCD" * 50,
                "4c9007f4026250c6bc8414f9bf50c86c2d7235da")

        shatest(b"\x0C" * 20,
                b"Test With Truncation",
                "4c1a03424b55e07fe7f27be1d58bb9324a9a5a04")

        shatest(b"\xAA" * 80,
                b"Test Using Larger Than Block-Size Key - Hash Key First",
                "aa4ae5e15272d00e95705637ce8a3b55ed402112")

        shatest(b"\xAA" * 80,
                (b"Test Using Larger Than Block-Size Key "
                 b"and Larger Than One Block-Size Data"),
                "e8e99d0f45237d786d6bbaa7965c7808bbff1a91")

    def _rfc4231_test_cases(self, hashfunc):
        def hmactest(key, data, hexdigests):
            h = hmac.HMAC(key, data, digestmod=hashfunc)
            self.assertEqual(h.hexdigest().lower(), hexdigests[hashfunc])

        # 4.2.  Test Case 1
        hmactest(key = b'\x0b'*20,
                 data = b'Hi There',
                 hexdigests = {
                   hashlib.sha224: '896fb1128abbdf196832107cd49df33f'
                                   '47b4b1169912ba4f53684b22',
                   hashlib.sha256: 'b0344c61d8db38535ca8afceaf0bf12b'
                                   '881dc200c9833da726e9376c2e32cff7',
                   hashlib.sha384: 'afd03944d84895626b0825f4ab46907f'
                                   '15f9dadbe4101ec682aa034c7cebc59c'
                                   'faea9ea9076ede7f4af152e8b2fa9cb6',
                   hashlib.sha512: '87aa7cdea5ef619d4ff0b4241a1d6cb0'
                                   '2379f4e2ce4ec2787ad0b30545e17cde'
                                   'daa833b7d6b8a702038b274eaea3f4e4'
                                   'be9d914eeb61f1702e696c203a126854',
                 })

        # 4.3.  Test Case 2
        hmactest(key = b'Jefe',
                 data = b'what do ya want for nothing?',
                 hexdigests = {
                   hashlib.sha224: 'a30e01098bc6dbbf45690f3a7e9e6d0f'
                                   '8bbea2a39e6148008fd05e44',
                   hashlib.sha256: '5bdcc146bf60754e6a042426089575c7'
                                   '5a003f089d2739839dec58b964ec3843',
                   hashlib.sha384: 'af45d2e376484031617f78d2b58a6b1b'
                                   '9c7ef464f5a01b47e42ec3736322445e'
                                   '8e2240ca5e69e2c78b3239ecfab21649',
                   hashlib.sha512: '164b7a7bfcf819e2e395fbe73b56e0a3'
                                   '87bd64222e831fd610270cd7ea250554'
                                   '9758bf75c05a994a6d034f65f8f0e6fd'
                                   'caeab1a34d4a6b4b636e070a38bce737',
                 })

        # 4.4.  Test Case 3
        hmactest(key = b'\xaa'*20,
                 data = b'\xdd'*50,
                 hexdigests = {
                   hashlib.sha224: '7fb3cb3588c6c1f6ffa9694d7d6ad264'
                                   '9365b0c1f65d69d1ec8333ea',
                   hashlib.sha256: '773ea91e36800e46854db8ebd09181a7'
                                   '2959098b3ef8c122d9635514ced565fe',
                   hashlib.sha384: '88062608d3e6ad8a0aa2ace014c8a86f'
                                   '0aa635d947ac9febe83ef4e55966144b'
                                   '2a5ab39dc13814b94e3ab6e101a34f27',
                   hashlib.sha512: 'fa73b0089d56a284efb0f0756c890be9'
                                   'b1b5dbdd8ee81a3655f83e33b2279d39'
                                   'bf3e848279a722c806b485a47e67c807'
                                   'b946a337bee8942674278859e13292fb',
                 })

        # 4.5.  Test Case 4
        hmactest(key = bytes(x for x in range(0x01, 0x19+1)),
                 data = b'\xcd'*50,
                 hexdigests = {
                   hashlib.sha224: '6c11506874013cac6a2abc1bb382627c'
                                   'ec6a90d86efc012de7afec5a',
                   hashlib.sha256: '82558a389a443c0ea4cc819899f2083a'
                                   '85f0faa3e578f8077a2e3ff46729665b',
                   hashlib.sha384: '3e8a69b7783c25851933ab6290af6ca7'
                                   '7a9981480850009cc5577c6e1f573b4e'
                                   '6801dd23c4a7d679ccf8a386c674cffb',
                   hashlib.sha512: 'b0ba465637458c6990e5a8c5f61d4af7'
                                   'e576d97ff94b872de76f8050361ee3db'
                                   'a91ca5c11aa25eb4d679275cc5788063'
                                   'a5f19741120c4f2de2adebeb10a298dd',
                 })

        # 4.7.  Test Case 6
        hmactest(key = b'\xaa'*131,
                 data = b'Test Using Larger Than Block-Siz'
                        b'e Key - Hash Key First',
                 hexdigests = {
                   hashlib.sha224: '95e9a0db962095adaebe9b2d6f0dbce2'
                                   'd499f112f2d2b7273fa6870e',
                   hashlib.sha256: '60e431591ee0b67f0d8a26aacbf5b77f'
                                   '8e0bc6213728c5140546040f0ee37f54',
                   hashlib.sha384: '4ece084485813e9088d2c63a041bc5b4'
                                   '4f9ef1012a2b588f3cd11f05033ac4c6'
                                   '0c2ef6ab4030fe8296248df163f44952',
                   hashlib.sha512: '80b24263c7c1a3ebb71493c1dd7be8b4'
                                   '9b46d1f41b4aeec1121b013783f8f352'
                                   '6b56d037e05f2598bd0fd2215d6a1e52'
                                   '95e64f73f63f0aec8b915a985d786598',
                 })

        # 4.8.  Test Case 7
        hmactest(key = b'\xaa'*131,
                 data = b'This is a test using a larger th'
                        b'an block-size key and a larger t'
                        b'han block-size data. The key nee'
                        b'ds to be hashed before being use'
                        b'd by the HMAC algorithm.',
                 hexdigests = {
                   hashlib.sha224: '3a854166ac5d9f023f54d517d0b39dbd'
                                   '946770db9c2b95c9f6f565d1',
                   hashlib.sha256: '9b09ffa71b942fcb27635fbcd5b0e944'
                                   'bfdc63644f0713938a7f51535c3a35e2',
                   hashlib.sha384: '6617178e941f020d351e2f254e8fd32c'
                                   '602420feb0b8fb9adccebb82461e99c5'
                                   'a678cc31e799176d3860e6110c46523e',
                   hashlib.sha512: 'e37b6a775dc87dbaa4dfa9f96e5e3ffd'
                                   'debd71f8867289865df5a32d20cdc944'
                                   'b6022cac3c4982b10d5eeb55c3e4de15'
                                   '134676fb6de0446065c97440fa8c6a58',
                 })

    def test_sha224_rfc4231(self):
        self._rfc4231_test_cases(hashlib.sha224)

    def test_sha256_rfc4231(self):
        self._rfc4231_test_cases(hashlib.sha256)

    def test_sha384_rfc4231(self):
        self._rfc4231_test_cases(hashlib.sha384)

    def test_sha512_rfc4231(self):
        self._rfc4231_test_cases(hashlib.sha512)

    def test_legacy_block_size_warnings(self):
        class MockCrazyHash(object):
            """Ain't no block_size attribute here."""
            def __init__(self, *args):
                self._x = hashlib.sha1(*args)
                self.digest_size = self._x.digest_size
            def update(self, v):
                self._x.update(v)
            def digest(self):
                return self._x.digest()

        with warnings.catch_warnings():
            warnings.simplefilter('error', RuntimeWarning)
            try:
                hmac.HMAC(b'a', b'b', digestmod=MockCrazyHash)
            except RuntimeWarning:
                pass
            else:
                self.fail('Expected warning about missing block_size')

            MockCrazyHash.block_size = 1
            try:
                hmac.HMAC(b'a', b'b', digestmod=MockCrazyHash)
            except RuntimeWarning:
                pass
            else:
                self.fail('Expected warning about small block_size')



class ConstructorTestCase(unittest.TestCase):

    def test_normal(self):
        # Standard constructor call.
        failed = 0
        try:
            h = hmac.HMAC(b"key")
        except:
            self.fail("Standard constructor call raised exception.")

    def test_withtext(self):
        # Constructor call with text.
        try:
            h = hmac.HMAC(b"key", b"hash this!")
        except:
            self.fail("Constructor call with text argument raised exception.")

    def test_withmodule(self):
        # Constructor call with text and digest module.
        try:
            h = hmac.HMAC(b"key", b"", hashlib.sha1)
        except:
            self.fail("Constructor call with hashlib.sha1 raised exception.")

class SanityTestCase(unittest.TestCase):

    def test_default_is_md5(self):
        # Testing if HMAC defaults to MD5 algorithm.
        # NOTE: this whitebox test depends on the hmac class internals
        h = hmac.HMAC(b"key")
        self.assertEqual(h.digest_cons, hashlib.md5)

    def test_exercise_all_methods(self):
        # Exercising all methods once.
        # This must not raise any exceptions
        try:
            h = hmac.HMAC(b"my secret key")
            h.update(b"compute the hash of this text!")
            dig = h.digest()
            dig = h.hexdigest()
            h2 = h.copy()
        except:
            self.fail("Exception raised during normal usage of HMAC class.")

class CopyTestCase(unittest.TestCase):

    def test_attributes(self):
        # Testing if attributes are of same type.
        h1 = hmac.HMAC(b"key")
        h2 = h1.copy()
        self.failUnless(h1.digest_cons == h2.digest_cons,
            "digest constructors don't match.")
        self.assertEqual(type(h1.inner), type(h2.inner),
            "Types of inner don't match.")
        self.assertEqual(type(h1.outer), type(h2.outer),
            "Types of outer don't match.")

    def test_realcopy(self):
        # Testing if the copy method created a real copy.
        h1 = hmac.HMAC(b"key")
        h2 = h1.copy()
        # Using id() in case somebody has overridden __eq__/__ne__.
        self.failUnless(id(h1) != id(h2), "No real copy of the HMAC instance.")
        self.failUnless(id(h1.inner) != id(h2.inner),
            "No real copy of the attribute 'inner'.")
        self.failUnless(id(h1.outer) != id(h2.outer),
            "No real copy of the attribute 'outer'.")

    def test_equality(self):
        # Testing if the copy has the same digests.
        h1 = hmac.HMAC(b"key")
        h1.update(b"some random text")
        h2 = h1.copy()
        self.assertEqual(h1.digest(), h2.digest(),
            "Digest of copy doesn't match original digest.")
        self.assertEqual(h1.hexdigest(), h2.hexdigest(),
            "Hexdigest of copy doesn't match original hexdigest.")

def test_main():
    support.run_unittest(l kwd">Py_DECREF(result);
            goto onError;
        }
        break;
    }
    if (i == len) {
        /* XXX Perhaps we should cache misses too ? */
        PyErr_Format(PyExc_LookupError,
                     "unknown encoding: %s", encoding);
        goto onError;
    }

    /* Cache and return the result */
    if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
        Py_DECREF(result);
        goto onError;
    }
    Py_DECREF(args);
    return result;

 onError:
    Py_XDECREF(args);
    return NULL;
}

/* Codec registry encoding check API. */

int PyCodec_KnownEncoding(const char *encoding)
{
    PyObject *codecs;

    codecs = _PyCodec_Lookup(encoding);
    if (!codecs) {
        PyErr_Clear();
        return 0;
    }
    else {
        Py_DECREF(codecs);
        return 1;
    }
}

static
PyObject *args_tuple(PyObject *object,
                     const char *errors)
{
    PyObject *args;

    args = PyTuple_New(1 + (errors != NULL));
    if (args == NULL)
        return NULL;
    Py_INCREF(object);
    PyTuple_SET_ITEM(args,0,object);
    if (errors) {
        PyObject *v;

        v = PyUnicode_FromString(errors);
        if (v == NULL) {
            Py_DECREF(args);
            return NULL;
        }
        PyTuple_SET_ITEM(args, 1, v);
    }
    return args;
}

/* Helper function to get a codec item */

static
PyObject *codec_getitem(const char *encoding, int index)
{
    PyObject *codecs;
    PyObject *v;

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
        return NULL;
    v = PyTuple_GET_ITEM(codecs, index);
    Py_DECREF(codecs);
    Py_INCREF(v);
    return v;
}

/* Helper function to create an incremental codec. */

static
PyObject *codec_getincrementalcodec(const char *encoding,
                                    const char *errors,
                                    const char *attrname)
{
    PyObject *codecs, *ret, *inccodec;

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
        return NULL;
    inccodec = PyObject_GetAttrString(codecs, attrname);
    Py_DECREF(codecs);
    if (inccodec == NULL)
        return NULL;
    if (errors)
        ret = PyObject_CallFunction(inccodec, "s", errors);
    else
        ret = PyObject_CallFunction(inccodec, NULL);
    Py_DECREF(inccodec);
    return ret;
}

/* Helper function to create a stream codec. */

static
PyObject *codec_getstreamcodec(const char *encoding,
                               PyObject *stream,
                               const char *errors,
                               const int index)
{
    PyObject *codecs, *streamcodec, *codeccls;

    codecs = _PyCodec_Lookup(encoding);
    if (codecs == NULL)
        return NULL;

    codeccls = PyTuple_GET_ITEM(codecs, index);
    if (errors != NULL)
        streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
    else
        streamcodec = PyObject_CallFunction(codeccls, "O", stream);
    Py_DECREF(codecs);
    return streamcodec;
}

/* Convenience APIs to query the Codec registry.

   All APIs return a codec object with incremented refcount.

 */

PyObject *PyCodec_Encoder(const char *encoding)
{
    return codec_getitem(encoding, 0);
}

PyObject *PyCodec_Decoder(const char *encoding)
{
    return codec_getitem(encoding, 1);
}

PyObject *PyCodec_IncrementalEncoder(const char *encoding,
                                     const char *errors)
{
    return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
}

PyObject *PyCodec_IncrementalDecoder(const char *encoding,
                                     const char *errors)
{
    return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
}

PyObject *PyCodec_StreamReader(const char *encoding,
                               PyObject *stream,
                               const char *errors)
{
    return codec_getstreamcodec(encoding, stream, errors, 2);
}

PyObject *PyCodec_StreamWriter(const char *encoding,
                               PyObject *stream,
                               const char *errors)
{
    return codec_getstreamcodec(encoding, stream, errors, 3);
}

/* Encode an object (e.g. an Unicode object) using the given encoding
   and return the resulting encoded object (usually a Python string).

   errors is passed to the encoder factory as argument if non-NULL. */

PyObject *PyCodec_Encode(PyObject *object,
                         const char *encoding,
                         const char *errors)
{
    PyObject *encoder = NULL;
    PyObject *args = NULL, *result = NULL;
    PyObject *v = NULL;

    encoder = PyCodec_Encoder(encoding);
    if (encoder == NULL)
        goto onError;

    args = args_tuple(object, errors);
    if (args == NULL)
        goto onError;

    result = PyEval_CallObject(encoder, args);
    if (result == NULL)
        goto onError;

    if (!PyTuple_Check(result) ||
        PyTuple_GET_SIZE(result) != 2) {
        PyErr_SetString(PyExc_TypeError,
                        "encoder must return a tuple (object, integer)");
        goto onError;
    }
    v = PyTuple_GET_ITEM(result,0);
    Py_INCREF(v);
    /* We don't check or use the second (integer) entry. */

    Py_DECREF(args);
    Py_DECREF(encoder);
    Py_DECREF(result);
    return v;

 onError:
    Py_XDECREF(result);
    Py_XDECREF(args);
    Py_XDECREF(encoder);
    return NULL;
}

/* Decode an object (usually a Python string) using the given encoding
   and return an equivalent object (e.g. an Unicode object).

   errors is passed to the decoder factory as argument if non-NULL. */

PyObject *PyCodec_Decode(PyObject *object,
                         const char *encoding,
                         const char *errors)
{
    PyObject *decoder = NULL;
    PyObject *args = NULL, *result = NULL;
    PyObject *v;

    decoder = PyCodec_Decoder(encoding);
    if (decoder == NULL)
        goto onError;

    args = args_tuple(object, errors);
    if (args == NULL)
        goto onError;

    result = PyEval_CallObject(decoder,args);
    if (result == NULL)
        goto onError;
    if (!PyTuple_Check(result) ||
        PyTuple_GET_SIZE(result) != 2) {
        PyErr_SetString(PyExc_TypeError,
                        "decoder must return a tuple (object,integer)");
        goto onError;
    }
    v = PyTuple_GET_ITEM(result,0);
    Py_INCREF(v);
    /* We don't check or use the second (integer) entry. */

    Py_DECREF(args);
    Py_DECREF(decoder);
    Py_DECREF(result);
    return v;

 onError:
    Py_XDECREF(args);
    Py_XDECREF(decoder);
    Py_XDECREF(result);
    return NULL;
}

/* Register the error handling callback function error under the name
   name. This function will be called by the codec when it encounters
   an unencodable characters/undecodable bytes and doesn't know the
   callback name, when name is specified as the error parameter
   in the call to the encode/decode function.
   Return 0 on success, -1 on error */
int PyCodec_RegisterError(const char *name, PyObject *error)
{
    PyInterpreterState *interp = PyThreadState_GET()->interp;
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
        return -1;
    if (!PyCallable_Check(error)) {
        PyErr_SetString(PyExc_TypeError, "handler must be callable");
        return -1;
    }
    return PyDict_SetItemString(interp->codec_error_registry,
                                (char *)name, error);
}

/* Lookup the error handling callback function registered under the
   name error. As a special case NULL can be passed, in which case
   the error handling callback for strict encoding will be returned. */
PyObject *PyCodec_LookupError(const char *name)
{
    PyObject *handler = NULL;

    PyInterpreterState *interp = PyThreadState_GET()->interp;
    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
        return NULL;

    if (name==NULL)
        name = "strict";
    handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
    if (!handler)
        PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
    else
        Py_INCREF(handler);
    return handler;
}

static void wrong_exception_type(PyObject *exc)
{
    _Py_IDENTIFIER(__class__);
    _Py_IDENTIFIER(__name__);
    PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
    if (type != NULL) {
        PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
        Py_DECREF(type);
        if (name != NULL) {
            PyErr_Format(PyExc_TypeError,
                         "don't know how to handle %S in error callback", name);
            Py_DECREF(name);
        }
    }
}

PyObject *PyCodec_StrictErrors(PyObject *exc)
{
    if (PyExceptionInstance_Check(exc))
        PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
    else
        PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
    return NULL;
}


PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
    Py_ssize_t end;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
}


PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
    Py_ssize_t start, end, i, len;

    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, '?');
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i = 0; i < len; ++i)
            PyUnicode_WRITE(kind, data, i, '?');
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        return Py_BuildValue("(Cn)",
                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
                             end);
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
        PyObject *res;
        int kind;
        void *data;
        if (PyUnicodeTranslateError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeTranslateError_GetEnd(exc, &end))
            return NULL;
        len = end - start;
        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
        if (res == NULL)
            return NULL;
        kind = PyUnicode_KIND(res);
        data = PyUnicode_DATA(res);
        for (i=0; i < len; i++)
            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
        assert(_PyUnicode_CheckConsistency(res, 1));
        return Py_BuildValue("(Nn)", res, end);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}

PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        int ressize;
        Py_UCS4 ch;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            ch = PyUnicode_READ_CHAR(object, i);
            if (ch<10)
                ressize += 2+1+1;
            else if (ch<100)
                ressize += 2+2+1;
            else if (ch<1000)
                ressize += 2+3+1;
            else if (ch<10000)
                ressize += 2+4+1;
            else if (ch<100000)
                ressize += 2+5+1;
            else if (ch<1000000)
                ressize += 2+6+1;
            else
                ressize += 2+7+1;
        }
        /* allocate replacement */
        res = PyUnicode_New(ressize, 127);
        if (res == NULL) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyUnicode_1BYTE_DATA(res);
        /* generate replacement */
        for (i = start; i < end; ++i) {
            int digits;
            int base;
            ch = PyUnicode_READ_CHAR(object, i);
            *outp++ = '&';
            *outp++ = '#';
            if (ch<10) {
                digits = 1;
                base = 1;
            }
            else if (ch<100) {
                digits = 2;
                base = 10;
            }
            else if (ch<1000) {
                digits = 3;
                base = 100;
            }
            else if (ch<10000) {
                digits = 4;
                base = 1000;
            }
            else if (ch<100000) {
                digits = 5;
                base = 10000;
            }
            else if (ch<1000000) {
                digits = 6;
                base = 100000;
            }
            else {
                digits = 7;
                base = 1000000;
            }
            while (digits-->0) {
                *outp++ = '0' + ch/base;
                ch %= base;
                base /= 10;
            }
            *outp++ = ';';
        }
        assert(_PyUnicode_CheckConsistency(res, 1));
        restuple = Py_BuildValue("(Nn)", res, end);
        Py_DECREF(object);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}

PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        PyObject *restuple;
        PyObject *object;
        Py_ssize_t i;
        Py_ssize_t start;
        Py_ssize_t end;
        PyObject *res;
        unsigned char *outp;
        int ressize;
        Py_UCS4 c;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        for (i = start, ressize = 0; i < end; ++i) {
            /* object is guaranteed to be "ready" */
            c = PyUnicode_READ_CHAR(object, i);
            if (c >= 0x10000) {
                ressize += 1+1+8;
            }
            else if (c >= 0x100) {
                ressize += 1+1+4;
            }
            else
                ressize += 1+1+2;
        }
        res = PyUnicode_New(ressize, 127);
        if (res==NULL)
            return NULL;
        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
            i < end; ++i) {
            c = PyUnicode_READ_CHAR(object, i);
            *outp++ = '\\';
            if (c >= 0x00010000) {
                *outp++ = 'U';
                *outp++ = Py_hexdigits[(c>>28)&0xf];
                *outp++ = Py_hexdigits[(c>>24)&0xf];
                *outp++ = Py_hexdigits[(c>>20)&0xf];
                *outp++ = Py_hexdigits[(c>>16)&0xf];
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
            else if (c >= 0x100) {
                *outp++ = 'u';
                *outp++ = Py_hexdigits[(c>>12)&0xf];
                *outp++ = Py_hexdigits[(c>>8)&0xf];
            }
            else
                *outp++ = 'x';
            *outp++ = Py_hexdigits[(c>>4)&0xf];
            *outp++ = Py_hexdigits[c&0xf];
        }

        assert(_PyUnicode_CheckConsistency(res, 1));
        restuple = Py_BuildValue("(Nn)", res, end);
        Py_DECREF(object);
        return restuple;
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}

/* This handler is declared static until someone demonstrates
   a need to call it directly. */
static PyObject *
PyCodec_SurrogatePassErrors(PyObject *exc)
{
    PyObject *restuple;
    PyObject *object;
    Py_ssize_t i;
    Py_ssize_t start;
    Py_ssize_t end;
    PyObject *res;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        char *outp;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
        if (!res) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyBytes_AsString(res);
        for (i = start; i < end; i++) {
            /* object is guaranteed to be "ready" */
            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
            if (!Py_UNICODE_IS_SURROGATE(ch)) {
                /* Not a surrogate, fail with original exception */
                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
                Py_DECREF(res);
                Py_DECREF(object);
                return NULL;
            }
            *outp++ = (char)(0xe0 | (ch >> 12));
            *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));
            *outp++ = (char)(0x80 | (ch & 0x3f));
        }
        restuple = Py_BuildValue("(On)", res, end);
        Py_DECREF(res);
        Py_DECREF(object);
        return restuple;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        unsigned char *p;
        Py_UCS4 ch = 0;
        if (PyUnicodeDecodeError_GetStart(exc, &start))
            return NULL;
        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
            return NULL;
        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
            Py_DECREF(object);
            return NULL;
        }
        /* Try decoding a single surrogate character. If
           there are more, let the codec call us again. */
        p += start;
        if (PyBytes_GET_SIZE(object) - start >= 3 &&
            (p[0] & 0xf0) == 0xe0 &&
            (p[1] & 0xc0) == 0x80 &&
            (p[2] & 0xc0) == 0x80) {
            /* it's a three-byte code */
            ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
            if (!Py_UNICODE_IS_SURROGATE(ch))
                /* it's not a surrogate - fail */
                ch = 0;
        }
        Py_DECREF(object);
        if (ch == 0) {
            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
            return NULL;
        }
        res = PyUnicode_FromOrdinal(ch);
        if (res == NULL)
            return NULL;
        return Py_BuildValue("(Nn)", res, start+3);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}

static PyObject *
PyCodec_SurrogateEscapeErrors(PyObject *exc)
{
    PyObject *restuple;
    PyObject *object;
    Py_ssize_t i;
    Py_ssize_t start;
    Py_ssize_t end;
    PyObject *res;
    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
        char *outp;
        if (PyUnicodeEncodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeEncodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
            return NULL;
        res = PyBytes_FromStringAndSize(NULL, end-start);
        if (!res) {
            Py_DECREF(object);
            return NULL;
        }
        outp = PyBytes_AsString(res);
        for (i = start; i < end; i++) {
            /* object is guaranteed to be "ready" */
            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
            if (ch < 0xdc80 || ch > 0xdcff) {
                /* Not a UTF-8b surrogate, fail with original exception */
                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
                Py_DECREF(res);
                Py_DECREF(object);
                return NULL;
            }
            *outp++ = ch - 0xdc00;
        }
        restuple = Py_BuildValue("(On)", res, end);
        Py_DECREF(res);
        Py_DECREF(object);
        return restuple;
    }
    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
        PyObject *str;
        unsigned char *p;
        Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
        int consumed = 0;
        if (PyUnicodeDecodeError_GetStart(exc, &start))
            return NULL;
        if (PyUnicodeDecodeError_GetEnd(exc, &end))
            return NULL;
        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
            return NULL;
        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
            Py_DECREF(object);
            return NULL;
        }
        while (consumed < 4 && consumed < end-start) {
            /* Refuse to escape ASCII bytes. */
            if (p[start+consumed] < 128)
                break;
            ch[consumed] = 0xdc00 + p[start+consumed];
            consumed++;
        }
        Py_DECREF(object);
        if (!consumed) {
            /* codec complained about ASCII byte. */
            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
            return NULL;
        }
        str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
        if (str == NULL)
            return NULL;
        return Py_BuildValue("(Nn)", str, start+consumed);
    }
    else {
        wrong_exception_type(exc);
        return NULL;
    }
}


static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_StrictErrors(exc);
}


static PyObject *ignore_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_IgnoreErrors(exc);
}


static PyObject *replace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_ReplaceErrors(exc);
}


static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_XMLCharRefReplaceErrors(exc);
}


static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_BackslashReplaceErrors(exc);
}

static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_SurrogatePassErrors(exc);
}

static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
{
    return PyCodec_SurrogateEscapeErrors(exc);
}

static int _PyCodecRegistry_Init(void)
{
    static struct {
        char *name;
        PyMethodDef def;
    } methods[] =
    {
        {
            "strict",
            {
                "strict_errors",
                strict_errors,
                METH_O,
                PyDoc_STR("Implements the 'strict' error handling, which "
                          "raises a UnicodeError on coding errors.")
            }
        },
        {
            "ignore",
            {
                "ignore_errors",
                ignore_errors,
                METH_O,
                PyDoc_STR("Implements the 'ignore' error handling, which "
                          "ignores malformed data and continues.")
            }
        },
        {
            "replace",
            {
                "replace_errors",
                replace_errors,
                METH_O,
                PyDoc_STR("Implements the 'replace' error handling, which "
                          "replaces malformed data with a replacement marker.")
            }
        },
        {
            "xmlcharrefreplace",
            {
                "xmlcharrefreplace_errors",
                xmlcharrefreplace_errors,
                METH_O,
                PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
                          "which replaces an unencodable character with the "
                          "appropriate XML character reference.")
            }
        },
        {
            "backslashreplace",
            {
                "backslashreplace_errors",
                backslashreplace_errors,
                METH_O,
                PyDoc_STR("Implements the 'backslashreplace' error handling, "
                          "which replaces an unencodable character with a "
                          "backslashed escape sequence.")
            }
        },
        {
            "surrogatepass",
            {
                "surrogatepass",
                surrogatepass_errors,
                METH_O
            }
        },
        {
            "surrogateescape",
            {
                "surrogateescape",
                surrogateescape_errors,
                METH_O
            }
        }
    };

    PyInterpreterState *interp = PyThreadState_GET()->interp;
    PyObject *mod;
    unsigned i;

    if (interp->codec_search_path != NULL)
        return 0;

    interp->codec_search_path = PyList_New(0);
    interp->codec_search_cache = PyDict_New();
    interp->codec_error_registry = PyDict_New();

    if (interp->codec_error_registry) {
        for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
            PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
            int res;
            if (!func)
                Py_FatalError("can't initialize codec error registry");
            res = PyCodec_RegisterError(methods[i].name, func);
            Py_DECREF(func);
            if (res)
                Py_FatalError("can't initialize codec error registry");
        }
    }

    if (interp->codec_search_path == NULL ||
        interp->codec_search_cache == NULL ||
        interp->codec_error_registry == NULL)
        Py_FatalError("can't initialize codec registry");

    mod = PyImport_ImportModuleNoBlock("encodings");
    if (mod == NULL) {
        return -1;
    }
    Py_DECREF(mod);
    interp->codecs_initialized = 1;
    return 0;
}