Lib/test/mapping_tests.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657

# tests common to dict and UserDict
import unittest
import collections


class BasicTestMappingProtocol(unittest.TestCase):
    # This base class can be used to check that an object conforms to the
    # mapping protocol

    # Functions that can be useful to override to adapt to dictionary
    # semantics
    type2test = None # which class is being tested (overwrite in subclasses)

    def _reference(self):
        """Return a dictionary of values which are invariant by storage
        in the object under test."""
        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
    def _empty_mapping(self):
        """Return an empty mapping object"""
        return self.type2test()
    def _full_mapping(self, data):
        """Return a mapping object with the value contained in data
        dictionary"""
        x = self._empty_mapping()
        for key, value in data.items():
            x[key] = value
        return x

    def __init__(self, *args, **kw):
        unittest.TestCase.__init__(self, *args, **kw)
        self.reference = self._reference().copy()

        # A (key, value) pair not in the mapping
        key, value = self.reference.popitem()
        self.other = {key:value}

        # A (key, value) pair in the mapping
        key, value = self.reference.popitem()
        self.inmapping = {key:value}
        self.reference[key] = value

    def test_read(self):
        # Test for read only operations on mapping
        p = self._empty_mapping()
        p1 = dict(p) #workaround for singleton objects
        d = self._full_mapping(self.reference)
        if d is p:
            p = p1
        #Indexing
        for key, value in self.reference.items():
            self.assertEqual(d[key], value)
        knownkey = list(self.other.keys())[0]
        self.assertRaises(KeyError, lambda:d[knownkey])
        #len
        self.assertEqual(len(p), 0)
        self.assertEqual(len(d), len(self.reference))
        #__contains__
        for k in self.reference:
            self.assertIn(k, d)
        for k in self.other:
            self.assertNotIn(k, d)
        #cmp
        self.assertEqual(p, p)
        self.assertEqual(d, d)
        self.assertNotEqual(p, d)
        self.assertNotEqual(d, p)
        #__non__zero__
        if p: self.fail("Empty mapping must compare to False")
        if not d: self.fail("Full mapping must compare to True")
        # keys(), items(), iterkeys() ...
        def check_iterandlist(iter, lst, ref):
            self.assertTrue(hasattr(iter, '__next__'))
            self.assertTrue(hasattr(iter, '__iter__'))
            x = list(iter)
            self.assertTrue(set(x)==set(lst)==set(ref))
        check_iterandlist(iter(d.keys()), list(d.keys()),
                          self.reference.keys())
        check_iterandlist(iter(d), list(d.keys()), self.reference.keys())
        check_iterandlist(iter(d.values()), list(d.values()),
                          self.reference.values())
        check_iterandlist(iter(d.items()), list(d.items()),
                          self.reference.items())
        #get
        key, value = next(iter(d.items()))
        knownkey, knownvalue = next(iter(self.other.items()))
        self.assertEqual(d.get(key, knownvalue), value)
        self.assertEqual(d.get(knownkey, knownvalue), knownvalue)
        self.assertNotIn(knownkey, d)

    def test_write(self):
        # Test for write operations on mapping
        p = self._empty_mapping()
        #Indexing
        for key, value in self.reference.items():
            p[key] = value
            self.assertEqual(p[key], value)
        for key in self.reference.keys():
            del p[key]
            self.assertRaises(KeyError, lambda:p[key])
        p = self._empty_mapping()
        #update
        p.update(self.reference)
        self.assertEqual(dict(p), self.reference)
        items = list(p.items())
        p = self._empty_mapping()
        p.update(items)
        self.assertEqual(dict(p), self.reference)
        d = self._full_mapping(self.reference)
        #setdefault
        key, value = next(iter(d.items()))
        knownkey, knownvalue = next(iter(self.other.items()))
        self.assertEqual(d.setdefault(key, knownvalue), value)
        self.assertEqual(d[key], value)
        self.assertEqual(d.setdefault(knownkey, knownvalue), knownvalue)
        self.assertEqual(d[knownkey], knownvalue)
        #pop
        self.assertEqual(d.pop(knownkey), knownvalue)
        self.assertNotIn(knownkey, d)
        self.assertRaises(KeyError, d.pop, knownkey)
        default = 909
        d[knownkey] = knownvalue
        self.assertEqual(d.pop(knownkey, default), knownvalue)
        self.assertNotIn(knownkey, d)
        self.assertEqual(d.pop(knownkey, default), default)
        #popitem
        key, value = d.popitem()
        self.assertNotIn(key, d)
        self.assertEqual(value, self.reference[key])
        p=self._empty_mapping()
        self.assertRaises(KeyError, p.popitem)

    def test_constructor(self):
        self.assertEqual(self._empty_mapping(), self._empty_mapping())

    def test_bool(self):
        self.assertTrue(not self._empty_mapping())
        self.assertTrue(self.reference)
        self.assertTrue(bool(self._empty_mapping()) is False)
        self.assertTrue(bool(self.reference) is True)

    def test_keys(self):
        d = self._empty_mapping()
        self.assertEqual(list(d.keys()), [])
        d = self.reference
        self.assertIn(list(self.inmapping.keys())[0], d.keys())
        self.assertNotIn(list(self.other.keys())[0], d.keys())
        self.assertRaises(TypeError, d.keys, None)

    def test_values(self):
        d = self._empty_mapping()
        self.assertEqual(list(d.values()), [])

        self.assertRaises(TypeError, d.values, None)

    def test_items(self):
        d = self._empty_mapping()
        self.assertEqual(list(d.items()), [])

        self.assertRaises(TypeError, d.items, None)

    def test_len(self):
        d = self._empty_mapping()
        self.assertEqual(len(d), 0)

    def test_getitem(self):
        d = self.reference
        self.assertEqual(d[list(self.inmapping.keys())[0]],
                         list(self.inmapping.values())[0])

        self.assertRaises(TypeError, d.__getitem__)

    def test_update(self):
        # mapping argument
        d = self._empty_mapping()
        d.update(self.other)
        self.assertEqual(list(d.items()), list(self.other.items()))

        # No argument
        d = self._empty_mapping()
        d.update()
        self.assertEqual(d, self._empty_mapping())

        # item sequence
        d = self._empty_mapping()
        d.update(self.other.items())
        self.assertEqual(list(d.items()), list(self.other.items()))

        # Iterator
        d = self._empty_mapping()
        d.update(self.other.items())
        self.assertEqual(list(d.items()), list(self.other.items()))

        # FIXME: Doesn't work with UserDict
        # self.assertRaises((TypeError, AttributeError), d.update, None)
        self.assertRaises((TypeError, AttributeError), d.update, 42)

        outerself = self
        class SimpleUserDict:
            def __init__(self):
                self.d = outerself.reference
            def keys(self):
                return self.d.keys()
            def __getitem__(self, i):
                return self.d[i]
        d.clear()
        d.update(SimpleUserDict())
        i1 = sorted(d.items())
        i2 = sorted(self.reference.items())
        self.assertEqual(i1, i2)

        class Exc(Exception): pass

        d = self._empty_mapping()
        class FailingUserDict:
            def keys(self):
                raise Exc
        self.assertRaises(Exc, d.update, FailingUserDict())

        d.clear()

        class FailingUserDict:
            def keys(self):
                class BogonIter:
                    def __init__(self):
                        self.i = 1
                    def __iter__(self):
                        return self
                    def __next__(self):
                        if self.i:
                            self.i = 0
                            return 'a'
                        raise Exc
                return BogonIter()
            def __getitem__(self, key):
                return key
        self.assertRaises(Exc, d.update, FailingUserDict())

        class FailingUserDict:
            def keys(self):
                class BogonIter:
                    def __init__(self):
                        self.i = ord('a')
                    def __iter__(self):
                        return self
                    def __next__(self):
                        if self.i <= ord('z'):
                            rtn = chr(self.i)
                            self.i += 1
                            return rtn
                        raise StopIteration
                return BogonIter()
            def __getitem__(self, key):
                raise Exc
        self.assertRaises(Exc, d.update, FailingUserDict())

        d = self._empty_mapping()
        class badseq(object):
            def __iter__(self):
                return self
            def __next__(self):
                raise Exc()

        self.assertRaises(Exc, d.update, badseq())

        self.assertRaises(ValueError, d.update, [(1, 2, 3)])

    # no test_fromkeys or test_copy as both os.environ and selves don't support it

    def test_get(self):
        d = self._empty_mapping()
        self.assertTrue(d.get(list(self.other.keys())[0]) is None)
        self.assertEqual(d.get(list(self.other.keys())[0], 3), 3)
        d = self.reference
        self.assertTrue(d.get(list(self.other.keys())[0]) is None)
        self.assertEqual(d.get(list(self.other.keys())[0], 3), 3)
        self.assertEqual(d.get(list(self.inmapping.keys())[0]),
                         list(self.inmapping.values())[0])
        self.assertEqual(d.get(list(self.inmapping.keys())[0], 3),
                         list(self.inmapping.values())[0])
        self.assertRaises(TypeError, d.get)
        self.assertRaises(TypeError, d.get, None, None, None)

    def test_setdefault(self):
        d = self._empty_mapping()
        self.assertRaises(TypeError, d.setdefault)

    def test_popitem(self):
        d = self._empty_mapping()
        self.assertRaises(KeyError, d.popitem)
        self.assertRaises(TypeError, d.popitem, 42)

    def test_pop(self):
        d = self._empty_mapping()
        k, v = list(self.inmapping.items())[0]
        d[k] = v
        self.assertRaises(KeyError, d.pop, list(self.other.keys())[0])

        self.assertEqual(d.pop(k), v)
        self.assertEqual(len(d), 0)

        self.assertRaises(KeyError, d.pop, k)


class TestMappingProtocol(BasicTestMappingProtocol):
    def test_constructor(self):
        BasicTestMappingProtocol.test_constructor(self)
        self.assertTrue(self._empty_mapping() is not self._empty_mapping())
        self.assertEqual(self.type2test(x=1, y=2), {"x": 1, "y": 2})

    def test_bool(self):
        BasicTestMappingProtocol.test_bool(self)
        self.assertTrue(not self._empty_mapping())
        self.assertTrue(self._full_mapping({"x": "y"}))
        self.assertTrue(bool(self._empty_mapping()) is False)
        self.assertTrue(bool(self._full_mapping({"x": "y"})) is True)

    def test_keys(self):
        BasicTestMappingProtocol.test_keys(self)
        d = self._empty_mapping()
        self.assertEqual(list(d.keys()), [])
        d = self._full_mapping({'a': 1, 'b': 2})
        k = d.keys()
        self.assertIn('a', k)
        self.assertIn('b', k)
        self.assertNotIn('c', k)

    def test_values(self):
        BasicTestMappingProtocol.test_values(self)
        d = self._full_mapping({1:2})
        self.assertEqual(list(d.values()), [2])

    def test_items(self):
        BasicTestMappingProtocol.test_items(self)

        d = self._full_mapping({1:2})
        self.assertEqual(list(d.items()), [(1, 2)])

    def test_contains(self):
        d = self._empty_mapping()
        self.assertNotIn('a', d)
        self.assertTrue(not ('a' in d))
        self.assertTrue('a' not in d)
        d = self._full_mapping({'a': 1, 'b': 2})
        self.assertIn('a', d)
        self.assertIn('b', d)
        self.assertNotIn('c', d)

        self.assertRaises(TypeError, d.__contains__)

    def test_len(self):
        BasicTestMappingProtocol.test_len(self)
        d = self._full_mapping({'a': 1, 'b': 2})
        self.assertEqual(len(d), 2)

    def test_getitem(self):
        BasicTestMappingProtocol.test_getitem(self)
        d = self._full_mapping({'a': 1, 'b': 2})
        self.assertEqual(d['a'], 1)
        self.assertEqual(d['b'], 2)
        d['c'] = 3
        d['a'] = 4
        self.assertEqual(d['c'], 3)
        self.assertEqual(d['a'], 4)
        del d['b']
        self.assertEqual(d, {'a': 4, 'c': 3})

        self.assertRaises(TypeError, d.__getitem__)

    def test_clear(self):
        d = self._full_mapping({1:1, 2:2, 3:3})
        d.clear()
        self.assertEqual(d, {})

        self.assertRaises(TypeError, d.clear, None)

    def test_update(self):
        BasicTestMappingProtocol.test_update(self)
        # mapping argument
        d = self._empty_mapping()
        d.update({1:100})
        d.update({2:20})
        d.update({1:1, 2:2, 3:3})
        self.assertEqual(d, {1:1, 2:2, 3:3})

        # no argument
        d.update()
        self.assertEqual(d, {1:1, 2:2, 3:3})

        # keyword arguments
        d = self._empty_mapping()
        d.update(x=100)
        d.update(y=20)
        d.update(x=1, y=2, z=3)
        self.assertEqual(d, {"x":1, "y":2, "z":3})

        # item sequence
        d = self._empty_mapping()
        d.update([("x", 100), ("y", 20)])
        self.assertEqual(d, {"x":100, "y":20})

        # Both item sequence and keyword arguments
        d = self._empty_mapping()
        d.update([("x", 100), ("y", 20)], x=1, y=2)
        self.assertEqual(d, {"x":1, "y":2})

        # iterator
        d = self._full_mapping({1:3, 2:4})
        d.update(self._full_mapping({1:2, 3:4, 5:6}).items())
        self.assertEqual(d, {1:2, 2:4, 3:4, 5:6})

        class SimpleUserDict:
            def __init__(self):
                self.d = {1:1, 2:2, 3:3}
            def keys(self):
                return self.d.keys()
            def __getitem__(self, i):
                return self.d[i]
        d.clear()
        d.update(SimpleUserDict())
        self.assertEqual(d, {1:1, 2:2, 3:3})

    def test_fromkeys(self):
        self.assertEqual(self.type2test.fromkeys('abc'), {'a':None, 'b':None, 'c':None})
        d = self._empty_mapping()
        self.assertTrue(not(d.fromkeys('abc') is d))
        self.assertEqual(d.fromkeys('abc'), {'a':None, 'b':None, 'c':None})
        self.assertEqual(d.fromkeys((4,5),0), {4:0, 5:0})
        self.assertEqual(d.fromkeys([]), {})
        def g():
            yield 1
        self.assertEqual(d.fromkeys(g()), {1:None})
        self.assertRaises(TypeError, {}.fromkeys, 3)
        class dictlike(self.type2test): pass
        self.assertEqual(dictlike.fromkeys('a'), {'a':None})
        self.assertEqual(dictlike().fromkeys('a'), {'a':None})
        self.assertTrue(dictlike.fromkeys('a').__class__ is dictlike)
        self.assertTrue(dictlike().fromkeys('a').__class__ is dictlike)
        self.assertTrue(type(dictlike.fromkeys('a')) is dictlike)
        class mydict(self.type2test):
            def __new__(cls):
                return collections.UserDict()
        ud = mydict.fromkeys('ab')
        self.assertEqual(ud, {'a':None, 'b':None})
        self.assertIsInstance(ud, collections.UserDict)
        self.assertRaises(TypeError, dict.fromkeys)

        class Exc(Exception): pass

        class baddict1(self.type2test):
            def __init__(self):
                raise Exc()

        self.assertRaises(Exc, baddict1.fromkeys, [1])

        class BadSeq(object):
            def __iter__(self):
                return self
            def __next__(self):
                raise Exc()

        self.assertRaises(Exc, self.type2test.fromkeys, BadSeq())

        class baddict2(self.type2test):
            def __setitem__(self, key, value):
                raise Exc()

        self.assertRaises(Exc, baddict2.fromkeys, [1])

    def test_copy(self):
        d = self._full_mapping({1:1, 2:2, 3:3})
        self.assertEqual(d.copy(), {1:1, 2:2, 3:3})
        d = self._empty_mapping()
        self.assertEqual(d.copy(), d)
        self.assertIsInstance(d.copy(), d.__class__)
        self.assertRaises(TypeError, d.copy, None)

    def test_get(self):
        BasicTestMappingProtocol.test_get(self)
        d = self._empty_mapping()
        self.assertTrue(d.get('c') is None)
        self.assertEqual(d.get('c', 3), 3)
        d = self._full_mapping({'a' : 1, 'b' : 2})
        self.assertTrue(d.get('c') is None)
        self.assertEqual(d.get('c', 3), 3)
        self.assertEqual(d.get('a'), 1)
        self.assertEqual(d.get('a', 3), 1)

    def test_setdefault(self):
        BasicTestMappingProtocol.test_setdefault(self)
        d = self._empty_mapping()
        self.assertTrue(d.setdefault('key0') is None)
        d.setdefault('key0', [])
        self.assertTrue(d.setdefault('key0') is None)
        d.setdefault('key', []).append(3)
        self.assertEqual(d['key'][0], 3)
        d.setdefault('key', []).append(4)
        self.assertEqual(len(d['key']), 2)

    def test_popitem(self):
        BasicTestMappingProtocol.test_popitem(self)
        for copymode in -1, +1:
            # -1: b has same structure as a
            # +1: b is a.copy()
            for log2size in range(12):
                size = 2**log2size
                a = self._empty_mapping()
                b = self._empty_mapping()
                for i in range(size):
                    a[repr(i)] = i
                    if copymode < 0:
                        b[repr(i)] = i
                if copymode > 0:
                    b = a.copy()
                for i in range(size):
                    ka, va = ta = a.popitem()
                    self.assertEqual(va, int(ka))
                    kb, vb = tb = b.popitem()
                    self.assertEqual(vb, int(kb))
                    self.assertTrue(not(copymode < 0 and ta != tb))
                self.assertTrue(not a)
                self.assertTrue(not b)

    def test_pop(self):
        BasicTestMappingProtocol.test_pop(self)

        # Tests for pop with specified key
        d = self._empty_mapping()
        k, v = 'abc', 'def'

        self.assertEqual(d.pop(k, v), v)
        d[k] = v
        self.assertEqual(d.pop(k, 1), v)


class TestHashMappingProtocol(TestMappingProtocol):

    def test_getitem(self):
        TestMappingProtocol.test_getitem(self)
        class Exc(Exception): pass

        class BadEq(object):
            def __eq__(self, other):
                raise Exc()
            def __hash__(self):
                return 24

        d = self._empty_mapping()
        d[BadEq()] = 42
        self.assertRaises(KeyError, d.__getitem__, 23)

        class BadHash(object):
            fail = False
            def __hash__(self):
                if self.fail:
                    raise Exc()
                else:
                    return 42

        d = self._empty_mapping()
        x = BadHash()
        d[x] = 42
        x.fail = True
        self.assertRaises(Exc, d.__getitem__, x)

    def test_fromkeys(self):
        TestMappingProtocol.test_fromkeys(self)
        class mydict(self.type2test):
            def __new__(cls):
                return collections.UserDict()
        ud = mydict.fromkeys('ab')
        self.assertEqual(ud, {'a':None, 'b':None})
        self.assertIsInstance(ud, collections.UserDict)

    def test_pop(self):
        TestMappingProtocol.test_pop(self)

        class Exc(Exception): pass

        class BadHash(object):
            fail = False
            def __hash__(self):
                if self.fail:
                    raise Exc()
                else:
                    return 42

        d = self._empty_mapping()
        x = BadHash()
        d[x] = 42
        x.fail = True
        self.assertRaises(Exc, d.pop, x)

    def test_mutatingiteration(self):
        d = self._empty_mapping()
        d[1] = 1
        try:
            for i in d:
                d[i+1] = 1
        except RuntimeError:
            pass
        else:
            self.fail("changing dict size during iteration doesn't raise Error")

    def test_repr(self):
        d = self._empty_mapping()
        self.assertEqual(repr(d), '{}')
        d[1] = 2
        self.assertEqual(repr(d), '{1: 2}')
        d = self._empty_mapping()
        d[1] = d
        self.assertEqual(repr(d), '{1: {...}}')

        class Exc(Exception): pass

        class BadRepr(object):
            def __repr__(self):
                raise Exc()

        d = self._full_mapping({1: BadRepr()})
        self.assertRaises(Exc, repr, d)

    def test_eq(self):
        self.assertEqual(self._empty_mapping(), self._empty_mapping())
        self.assertEqual(self._full_mapping({1: 2}),
                         self._full_mapping({1: 2}))

        class Exc(Exception): pass

        class BadCmp(object):
            def __eq__(self, other):
                raise Exc()
            def __hash__(self):
                return 1

        d1 = self._full_mapping({BadCmp(): 1})
        d2 = self._full_mapping({1: 1})
        self.assertRaises(Exc, lambda: BadCmp()==1)
        self.assertRaises(Exc, lambda: d1==d2)

    def test_setdefault(self):
        TestMappingProtocol.test_setdefault(self)

        class Exc(Exception): pass

        class BadHash(object):
            fail = False
            def __hash__(self):
                if self.fail:
                    raise Exc()
                else:
                    return 42

        d = self._empty_mapping()
        x = BadHash()
        d[x] = 42
        x.fail = True
        self.assertRaises(Exc, d.setdefault, x, [])
/*

Unicode implementation based on original code by Fredrik Lundh,
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
Unicode Integration Proposal (see file Misc/unicode.txt).

Major speed upgrades to the method implementations at the Reykjavik
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.

Copyright (c) Corporation for National Research Initiatives.

--------------------------------------------------------------------
The original string type implementation is:

    Copyright (c) 1999 by Secret Labs AB
    Copyright (c) 1999 by Fredrik Lundh

By obtaining, using, and/or copying this software and/or its
associated documentation, you agree that you have read, understood,
and will comply with the following terms and conditions:

Permission to use, copy, modify, and distribute this software and its
associated documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appears in all
copies, and that both that copyright notice and this permission notice
appear in supporting documentation, and that the name of Secret Labs
AB or the author not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.

SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--------------------------------------------------------------------

*/

#define PY_SSIZE_T_CLEAN
#include "Python.h"

#include "formatter_unicode.h"

#include "unicodeobject.h"
#include "ucnhash.h"

#ifdef MS_WINDOWS
#include <windows.h>
#endif

/* Limit for the Unicode object free list */

#define PyUnicode_MAXFREELIST       1024

/* Limit for the Unicode object free list stay alive optimization.

   The implementation will keep allocated Unicode memory intact for
   all objects on the free list having a size less than this
   limit. This reduces malloc() overhead for small Unicode objects.

   At worst this will result in PyUnicode_MAXFREELIST *
   (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
   malloc()-overhead) bytes of unused garbage.

   Setting the limit to 0 effectively turns the feature off.

   Note: This is an experimental feature ! If you get core dumps when
   using Unicode objects, turn this feature off.

*/

#define KEEPALIVE_SIZE_LIMIT       9

/* Endianness switches; defaults to little endian */

#ifdef WORDS_BIGENDIAN
# define BYTEORDER_IS_BIG_ENDIAN
#else
# define BYTEORDER_IS_LITTLE_ENDIAN
#endif

/* --- Globals ------------------------------------------------------------

   The globals are initialized by the _PyUnicode_Init() API and should
   not be used before calling that API.

*/


#ifdef __cplusplus
extern "C" {
#endif

/* Free list for Unicode objects */
static PyUnicodeObject *free_list;
static int numfree;

/* The empty Unicode object is shared to improve performance. */
static PyUnicodeObject *unicode_empty;

/* Single character Unicode strings in the Latin-1 range are being
   shared as well. */
static PyUnicodeObject *unicode_latin1[256];

/* Default encoding to use and assume when NULL is passed as encoding
   parameter; it is initialized by _PyUnicode_Init().

   Always use the PyUnicode_SetDefaultEncoding() and
   PyUnicode_GetDefaultEncoding() APIs to access this global.

*/
static char unicode_default_encoding[100];

/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
	0, 0, 0, 0, 0, 0, 0, 0,
//     case 0x0009: /* HORIZONTAL TABULATION */
//     case 0x000A: /* LINE FEED */
//     case 0x000B: /* VERTICAL TABULATION */
//     case 0x000C: /* FORM FEED */
//     case 0x000D: /* CARRIAGE RETURN */
	0, 1, 1, 1, 1, 1, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
//     case 0x001C: /* FILE SEPARATOR */
//     case 0x001D: /* GROUP SEPARATOR */
//     case 0x001E: /* RECORD SEPARATOR */
//     case 0x001F: /* UNIT SEPARATOR */
	0, 0, 0, 0, 1, 1, 1, 1,
//     case 0x0020: /* SPACE */
	1, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,

	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0
};

/* Same for linebreaks */
static unsigned char ascii_linebreak[] = {
	0, 0, 0, 0, 0, 0, 0, 0,
//         0x000A, /* LINE FEED */
//         0x000D, /* CARRIAGE RETURN */
	0, 0, 1, 0, 0, 1, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
//         0x001C, /* FILE SEPARATOR */
//         0x001D, /* GROUP SEPARATOR */
//         0x001E, /* RECORD SEPARATOR */
	0, 0, 0, 0, 1, 1, 1, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,

	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0
};


Py_UNICODE
PyUnicode_GetMax(void)
{
#ifdef Py_UNICODE_WIDE
	return 0x10FFFF;
#else
	/* This is actually an illegal character, so it should
	   not be passed to unichr. */
	return 0xFFFF;
#endif
}

/* --- Bloom Filters ----------------------------------------------------- */

/* stuff to implement simple "bloom filters" for Unicode characters.
   to keep things simple, we use a single bitmask, using the least 5
   bits from each unicode characters as the bit index. */

/* the linebreak mask is set up by Unicode_Init below */

#define BLOOM_MASK unsigned long

static BLOOM_MASK bloom_linebreak;

#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))

#define BLOOM_LINEBREAK(ch) \
    ((ch) < 128U ? ascii_linebreak[(ch)] : \
    (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))

Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
{
    /* calculate simple bloom-style bitmask for a given unicode string */

    long mask;
    Py_ssize_t i;

    mask = 0;
    for (i = 0; i < len; i++)
        mask |= (1 << (ptr[i] & 0x1F));

    return mask;
}

Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
{
    Py_ssize_t i;

    for (i = 0; i < setlen; i++)
        if (set[i] == chr)
            return 1;

    return 0;
}

#define BLOOM_MEMBER(mask, chr, set, setlen)\
    BLOOM(mask, chr) && unicode_member(chr, set, setlen)

/* --- Unicode Object ----------------------------------------------------- */

static
int unicode_resize(register PyUnicodeObject *unicode,
                      Py_ssize_t length)
{
    void *oldstr;

    /* Shortcut if there's nothing much to do. */
    if (unicode->length == length)
	goto reset;

    /* Resizing shared object (unicode_empty or single character
       objects) in-place is not allowed. Use PyUnicode_Resize()
       instead ! */

    if (unicode == unicode_empty || 
	(unicode->length == 1 && 
	 unicode->str[0] < 256U &&
	 unicode_latin1[unicode->str[0]] == unicode)) {
        PyErr_SetString(PyExc_SystemError,
                        "can't resize shared unicode objects");
        return -1;
    }

    /* We allocate one more byte to make sure the string is Ux0000 terminated.
       The overallocation is also used by fastsearch, which assumes that it's
       safe to look at str[length] (without making any assumptions about what
       it contains). */

    oldstr = unicode->str;
    unicode->str = PyObject_REALLOC(unicode->str,
				    sizeof(Py_UNICODE) * (length + 1));
    if (!unicode->str) {
	unicode->str = (Py_UNICODE *)oldstr;
        PyErr_NoMemory();
        return -1;
    }
    unicode->str[length] = 0;
    unicode->length = length;

 reset:
    /* Reset the object caches */
    if (unicode->defenc) {
        Py_DECREF(unicode->defenc);
        unicode->defenc = NULL;
    }
    unicode->hash = -1;

    return 0;
}

/* We allocate one more byte to make sure the string is
   Ux0000 terminated -- XXX is this needed ?

   XXX This allocator could further be enhanced by assuring that the
       free list never reduces its size below 1.

*/

static
PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
{
    register PyUnicodeObject *unicode;

    /* Optimization for empty strings */
    if (length == 0 && unicode_empty != NULL) {
        Py_INCREF(unicode_empty);
        return unicode_empty;
    }

    /* Unicode freelist & memory allocation */
    if (free_list) {
        unicode = free_list;
        free_list = *(PyUnicodeObject **)unicode;
        numfree--;
	if (unicode->str) {
	    /* Keep-Alive optimization: we only upsize the buffer,
	       never downsize it. */
	    if ((unicode->length < length) &&
                unicode_resize(unicode, length) < 0) {
		PyObject_DEL(unicode->str);
		goto onError;
	    }
	}
        else {
	    size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
	    unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
        }
        PyObject_INIT(unicode, &PyUnicode_Type);
    }
    else {
	size_t new_size;
        unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
        if (unicode == NULL)
            return NULL;
	new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
	unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
    }

    if (!unicode->str) {
	PyErr_NoMemory();
	goto onError;
    }
    /* Initialize the first element to guard against cases where
     * the caller fails before initializing str -- unicode_resize()
     * reads str[0], and the Keep-Alive optimization can keep memory
     * allocated for str alive across a call to unicode_dealloc(unicode).
     * We don't want unicode_resize to read uninitialized memory in
     * that case.
     */
    unicode->str[0] = 0;
    unicode->str[length] = 0;
    unicode->length = length;
    unicode->hash = -1;
    unicode->defenc = NULL;
    return unicode;

 onError:
    _Py_ForgetReference((PyObject *)unicode);
    PyObject_Del(unicode);
    return NULL;
}

static
void unicode_dealloc(register PyUnicodeObject *unicode)
{
    if (PyUnicode_CheckExact(unicode) &&
	numfree < PyUnicode_MAXFREELIST) {
        /* Keep-Alive optimization */
	if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
	    PyObject_DEL(unicode->str);
	    unicode->str = NULL;
	    unicode->length = 0;
	}
	if (unicode->defenc) {
	    Py_DECREF(unicode->defenc);
	    unicode->defenc = NULL;
	}
	/* Add to free list */
        *(PyUnicodeObject **)unicode = free_list;
        free_list = unicode;
        numfree++;
    }
    else {
	PyObject_DEL(unicode->str);
	Py_XDECREF(unicode->defenc);
	Py_TYPE(unicode)->tp_free((PyObject *)unicode);
    }
}

int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length)
{
    register PyUnicodeObject *v;

    /* Argument checks */
    if (unicode == NULL) {
	PyErr_BadInternalCall();
	return -1;
    }
    v = (PyUnicodeObject *)*unicode;
    if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) {
	PyErr_BadInternalCall();
	return -1;
    }

    /* Resizing unicode_empty and single character objects is not
       possible since these are being shared. We simply return a fresh
       copy with the same Unicode content. */
    if (v->length != length &&
	(v == unicode_empty || v->length == 1)) {
	PyUnicodeObject *w = _PyUnicode_New(length);
	if (w == NULL)
	    return -1;
	Py_UNICODE_COPY(w->str, v->str,
			length < v->length ? length : v->length);
	Py_DECREF(*unicode);
	*unicode = (PyObject *)w;
	return 0;
    }

    /* Note that we don't have to modify *unicode for unshared Unicode
       objects, since we can modify them in-place. */
    return unicode_resize(v, length);
}

/* Internal API for use in unicodeobject.c only ! */
#define _PyUnicode_Resize(unicodevar, length) \
        PyUnicode_Resize(((PyObject **)(unicodevar)), length)

PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
				Py_ssize_t size)
{
    PyUnicodeObject *unicode;

    /* If the Unicode data is known at construction time, we can apply
       some optimizations which share commonly used objects. */
    if (u != NULL) {

	/* Optimization for empty strings */
	if (size == 0 && unicode_empty != NULL) {
	    Py_INCREF(unicode_empty);
	    return (PyObject *)unicode_empty;
	}

	/* Single character Unicode objects in the Latin-1 range are
	   shared when using this constructor */
	if (size == 1 && *u < 256) {
	    unicode = unicode_latin1[*u];
	    if (!unicode) {
		unicode = _PyUnicode_New(1);
		if (!unicode)
		    return NULL;
		unicode->str[0] = *u;
		unicode_latin1[*u] = unicode;
	    }
	    Py_INCREF(unicode);
	    return (PyObject *)unicode;
	}
    }

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;

    /* Copy the Unicode data into the new object */
    if (u != NULL)
	Py_UNICODE_COPY(unicode->str, u, size);

    return (PyObject *)unicode;
}

PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
{
    PyUnicodeObject *unicode;
    /* If the Unicode data is known at construction time, we can apply
       some optimizations which share commonly used objects.
       Also, this means the input must be UTF-8, so fall back to the
       UTF-8 decoder at the end. */
    if (u != NULL) {

	/* Optimization for empty strings */
	if (size == 0 && unicode_empty != NULL) {
	    Py_INCREF(unicode_empty);
	    return (PyObject *)unicode_empty;
	}

	/* Single characters are shared when using this constructor.
           Restrict to ASCII, since the input must be UTF-8. */
	if (size == 1 && Py_CHARMASK(*u) < 128) {
	    unicode = unicode_latin1[Py_CHARMASK(*u)];
	    if (!unicode) {
		unicode = _PyUnicode_New(1);
		if (!unicode)
		    return NULL;
		unicode->str[0] = Py_CHARMASK(*u);
		unicode_latin1[Py_CHARMASK(*u)] = unicode;
	    }
	    Py_INCREF(unicode);
	    return (PyObject *)unicode;
	}

        return PyUnicode_DecodeUTF8(u, size, NULL);
    }

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;

    return (PyObject *)unicode;
}

PyObject *PyUnicode_FromString(const char *u)
{
    size_t size = strlen(u);
    if (size > PY_SSIZE_T_MAX) {
        PyErr_SetString(PyExc_OverflowError, "input too long");
        return NULL;
    }

    return PyUnicode_FromStringAndSize(u, size);
}

#ifdef HAVE_WCHAR_H

PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
				 Py_ssize_t size)
{
    PyUnicodeObject *unicode;

    if (w == NULL) {
	PyErr_BadInternalCall();
	return NULL;
    }

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;

    /* Copy the wchar_t data into the new object */
#ifdef HAVE_USABLE_WCHAR_T
    memcpy(unicode->str, w, size * sizeof(wchar_t));
#else
    {
	register Py_UNICODE *u;
	register Py_ssize_t i;
	u = PyUnicode_AS_UNICODE(unicode);
	for (i = size; i > 0; i--)
	    *u++ = *w++;
    }
#endif

    return (PyObject *)unicode;
}

static void
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
{
	*fmt++ = '%';
	if (width) {
		if (zeropad)
			*fmt++ = '0';
		fmt += sprintf(fmt, "%d", width);
	}
	if (precision)
		fmt += sprintf(fmt, ".%d", precision);
	if (longflag)
		*fmt++ = 'l';
	else if (size_tflag) {
		char *f = PY_FORMAT_SIZE_T;
		while (*f)
			*fmt++ = *f++;
	}
	*fmt++ = c;
	*fmt = '\0';
}

#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}

PyObject *
PyUnicode_FromFormatV(const char *format, va_list vargs)
{
	va_list count;
	Py_ssize_t callcount = 0;
	PyObject **callresults = NULL;
	PyObject **callresult = NULL;
	Py_ssize_t n = 0;
	int width = 0;
	int precision = 0;
	int zeropad;
	const char* f;
	Py_UNICODE *s;
	PyObject *string;
	/* used by sprintf */
	char buffer[21];
	/* use abuffer instead of buffer, if we need more space
	 * (which can happen if there's a format specifier with width). */
	char *abuffer = NULL;
	char *realbuffer;
	Py_ssize_t abuffersize = 0;
	char fmt[60]; /* should be enough for %0width.precisionld */
	const char *copy;

#ifdef VA_LIST_IS_ARRAY
	Py_MEMCPY(count, vargs, sizeof(va_list));
#else
#ifdef  __va_copy
	__va_copy(count, vargs);
#else
	count = vargs;
#endif
#endif
	/* step 1: count the number of %S/%R format specifications
	 * (we call PyObject_Str()/PyObject_Repr() for these objects
	 * once during step 3 and put the result in an array) */
	for (f = format; *f; f++) {
		if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R'))
			++callcount;
	}
	/* step 2: allocate memory for the results of
	 * PyObject_Str()/PyObject_Repr() calls */
	if (callcount) {
		callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
		if (!callresults) {
			PyErr_NoMemory();
			return NULL;
		}
		callresult = callresults;
	}
	/* step 3: figure out how large a buffer we need */
	for (f = format; *f; f++) {
		if (*f == '%') {
			const char* p = f;
			width = 0;
			while (isdigit((unsigned)*f))
				width = (width*10) + *f++ - '0';
			while (*++f && *f != '%' && !isalpha((unsigned)*f))
				;

			/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
			 * they don't affect the amount of space we reserve.
			 */
			if ((*f == 'l' || *f == 'z') &&
					(f[1] == 'd' || f[1] == 'u'))
                                ++f;

			switch (*f) {
			case 'c':
				(void)va_arg(count, int);
				/* fall through... */
			case '%':
				n++;
				break;
			case 'd': case 'u': case 'i': case 'x':
				(void) va_arg(count, int);
				/* 20 bytes is enough to hold a 64-bit
				   integer.  Decimal takes the most space.
				   This isn't enough for octal.
				   If a width is specified we need more
				   (which we allocate later). */
				if (width < 20)
					width = 20;
				n += width;
				if (abuffersize < width)
					abuffersize = width;
				break;
			case 's':
			{
				/* UTF-8 */
				unsigned char*s;
				s = va_arg(count, unsigned char*);
				while (*s) {
					if (*s < 128) {
						n++; s++;
					} else if (*s < 0xc0) {
						/* invalid UTF-8 */
						n++; s++;
					} else if (*s < 0xc0) {
						n++;
						s++; if(!*s)break;
						s++;
					} else if (*s < 0xe0) {
						n++;
						s++; if(!*s)break;
						s++; if(!*s)break;
						s++;
					} else {
						#ifdef Py_UNICODE_WIDE
						n++;
						#else
						n+=2;
						#endif
						s++; if(!*s)break;
						s++; if(!*s)break;
						s++; if(!*s)break;
						s++;
					}
				}
				break;
			}
			case 'U':
			{
				PyObject *obj = va_arg(count, PyObject *);
				assert(obj && PyUnicode_Check(obj));
				n += PyUnicode_GET_SIZE(obj);
				break;
			}
			case 'V':
			{
				PyObject *obj = va_arg(count, PyObject *);
				const char *str = va_arg(count, const char *);
				assert(obj || str);
				assert(!obj || PyUnicode_Check(obj));
				if (obj)
					n += PyUnicode_GET_SIZE(obj);
				else
					n += strlen(str);
				break;
			}
			case 'S':
			{
				PyObject *obj = va_arg(count, PyObject *);
				PyObject *str;
				assert(obj);
				str = PyObject_Str(obj);
				if (!str)
					goto fail;
				n += PyUnicode_GET_SIZE(str);
				/* Remember the str and switch to the next slot */
				*callresult++ = str;
				break;
			}
			case 'R':
			{
				PyObject *obj = va_arg(count, PyObject *);
				PyObject *repr;
				assert(obj);
				repr = PyObject_Repr(obj);
				if (!repr)
					goto fail;
				n += PyUnicode_GET_SIZE(repr);
				/* Remember the repr and switch to the next slot */
				*callresult++ = repr;
				break;
			}
			case 'p':
				(void) va_arg(count, int);
				/* maximum 64-bit pointer representation:
				 * 0xffffffffffffffff
				 * so 19 characters is enough.
				 * XXX I count 18 -- what's the extra for?
				 */
				n += 19;
				break;
			default:
				/* if we stumble upon an unknown
				   formatting code, copy the rest of
				   the format string to the output
				   string. (we cannot just skip the
				   code, since there's no way to know
				   what's in the argument list) */
				n += strlen(p);
				goto expand;
			}
		} else
			n++;
	}
 expand:
	if (abuffersize > 20) {
		abuffer = PyObject_Malloc(abuffersize);
		if (!abuffer) {
			PyErr_NoMemory();
			goto fail;
		}
		realbuffer = abuffer;
	}
	else
		realbuffer = buffer;
	/* step 4: fill the buffer */
	/* Since we've analyzed how much space we need for the worst case,
	   we don't have to resize the string.
	   There can be no errors beyond this point. */
	string = PyUnicode_FromUnicode(NULL, n);
	if (!string)
		goto fail;

	s = PyUnicode_AS_UNICODE(string);
	callresult = callresults;

	for (f = format; *f; f++) {
		if (*f == '%') {
			const char* p = f++;
			int longflag = 0;
			int size_tflag = 0;
			zeropad = (*f == '0');
			/* parse the width.precision part */
			width = 0;
			while (isdigit((unsigned)*f))
				width = (width*10) + *f++ - '0';
			precision = 0;
			if (*f == '.') {
				f++;
				while (isdigit((unsigned)*f))
					precision = (precision*10) + *f++ - '0';
			}
			/* handle the long flag, but only for %ld and %lu.
			   others can be added when necessary. */
			if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
				longflag = 1;
				++f;
			}
			/* handle the size_t flag. */
			if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
				size_tflag = 1;
				++f;
			}

			switch (*f) {
			case 'c':
				*s++ = va_arg(vargs, int);
				break;
			case 'd':
				makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
				if (longflag)
					sprintf(realbuffer, fmt, va_arg(vargs, long));
				else if (size_tflag)
					sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t));
				else
					sprintf(realbuffer, fmt, va_arg(vargs, int));
				appendstring(realbuffer);
				break;
			case 'u':
				makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u');
				if (longflag)
					sprintf(realbuffer, fmt, va_arg(vargs, unsigned long));
				else if (size_tflag)
					sprintf(realbuffer, fmt, va_arg(vargs, size_t));
				else
					sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
				appendstring(realbuffer);
				break;
			case 'i':
				makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
				sprintf(realbuffer, fmt, va_arg(vargs, int));
				appendstring(realbuffer);
				break;
			case 'x':
				makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
				sprintf(realbuffer, fmt, va_arg(vargs, int));
				appendstring(realbuffer);
				break;
			case 's':
			{
				/* Parameter must be UTF-8 encoded.
				   In case of encoding errors, use
				   the replacement character. */
				PyObject *u;
				p = va_arg(vargs, char*);
				u = PyUnicode_DecodeUTF8(p, strlen(p), 
							 "replace");
				if (!u)
					goto fail;
				Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
						PyUnicode_GET_SIZE(u));
				s += PyUnicode_GET_SIZE(u);
				Py_DECREF(u);
				break;
			}
			case 'U':
			{
				PyObject *obj = va_arg(vargs, PyObject *);
				Py_ssize_t size = PyUnicode_GET_SIZE(obj);
				Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
				s += size;
				break;
			}
			case 'V':
			{
				PyObject *obj = va_arg(vargs, PyObject *);
				const char *str = va_arg(vargs, const char *);
				if (obj) {
					Py_ssize_t size = PyUnicode_GET_SIZE(obj);
					Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
					s += size;
				} else {
					appendstring(str);
				}
				break;
			}
			case 'S':
			case 'R':
			{
				Py_UNICODE *ucopy;
				Py_ssize_t usize;
				Py_ssize_t upos;
				/* unused, since we already have the result */
				(void) va_arg(vargs, PyObject *);
				ucopy = PyUnicode_AS_UNICODE(*callresult);
				usize = PyUnicode_GET_SIZE(*callresult);
				for (upos = 0; upos<usize;)
					*s++ = ucopy[upos++];
				/* We're done with the unicode()/repr() => forget it */
				Py_DECREF(*callresult);
				/* switch to next unicode()/repr() result */
				++callresult;
				break;
			}
			case 'p':
				sprintf(buffer, "%p", va_arg(vargs, void*));
				/* %p is ill-defined:  ensure leading 0x. */
				if (buffer[1] == 'X')
					buffer[1] = 'x';
				else if (buffer[1] != 'x') {
					memmove(buffer+2, buffer, strlen(buffer)+1);
					buffer[0] = '0';
					buffer[1] = 'x';
				}
				appendstring(buffer);
				break;
			case '%':
				*s++ = '%';
				break;
			default:
				appendstring(p);
				goto end;
			}
		} else
			*s++ = *f;
	}

 end:
	if (callresults)
		PyObject_Free(callresults);
	if (abuffer)
		PyObject_Free(abuffer);
	_PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
	return string;
 fail:
	if (callresults) {
		PyObject **callresult2 = callresults;
		while (callresult2 < callresult) {
			Py_DECREF(*callresult2);
			++callresult2;
		}
		PyObject_Free(callresults);
	}
	if (abuffer)
		PyObject_Free(abuffer);
	return NULL;
}

#undef appendstring

PyObject *
PyUnicode_FromFormat(const char *format, ...)
{
	PyObject* ret;
	va_list vargs;

#ifdef HAVE_STDARG_PROTOTYPES
	va_start(vargs, format);
#else
	va_start(vargs);
#endif
	ret = PyUnicode_FromFormatV(format, vargs);
	va_end(vargs);
	return ret;
}

Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
				wchar_t *w,
				Py_ssize_t size)
{
    if (unicode == NULL) {
	PyErr_BadInternalCall();
	return -1;
    }

    /* If possible, try to copy the 0-termination as well */
    if (size > PyUnicode_GET_SIZE(unicode))
	size = PyUnicode_GET_SIZE(unicode) + 1;

#ifdef HAVE_USABLE_WCHAR_T
    memcpy(w, unicode->str, size * sizeof(wchar_t));
#else
    {
	register Py_UNICODE *u;
	register Py_ssize_t i;
	u = PyUnicode_AS_UNICODE(unicode);
	for (i = size; i > 0; i--)
	    *w++ = *u++;
    }
#endif

    if (size > PyUnicode_GET_SIZE(unicode))
        return PyUnicode_GET_SIZE(unicode);
    else
    return size;
}

#endif

PyObject *PyUnicode_FromOrdinal(int ordinal)
{
    Py_UNICODE s[1];

#ifdef Py_UNICODE_WIDE
    if (ordinal < 0 || ordinal > 0x10ffff) {
	PyErr_SetString(PyExc_ValueError,
			"unichr() arg not in range(0x110000) "
			"(wide Python build)");
	return NULL;
    }
#else
    if (ordinal < 0 || ordinal > 0xffff) {
	PyErr_SetString(PyExc_ValueError,
			"unichr() arg not in range(0x10000) "
			"(narrow Python build)");
	return NULL;
    }
#endif

    s[0] = (Py_UNICODE)ordinal;
    return PyUnicode_FromUnicode(s, 1);
}

PyObject *PyUnicode_FromObject(register PyObject *obj)
{
    /* XXX Perhaps we should make this API an alias of
           PyObject_Unicode() instead ?! */
    if (PyUnicode_CheckExact(obj)) {
	Py_INCREF(obj);
	return obj;
    }
    if (PyUnicode_Check(obj)) {
	/* For a Unicode subtype that's not a Unicode object,
	   return a true Unicode object with the same data. */
	return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
				     PyUnicode_GET_SIZE(obj));
    }
    return PyUnicode_FromEncodedObject(obj, NULL, "strict");
}

PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
				      const char *encoding,
				      const char *errors)
{
    const char *s = NULL;
    Py_ssize_t len;
    PyObject *v;

    if (obj == NULL) {
	PyErr_BadInternalCall();
	return NULL;
    }

#if 0
    /* For b/w compatibility we also accept Unicode objects provided
       that no encodings is given and then redirect to
       PyObject_Unicode() which then applies the additional logic for
       Unicode subclasses.

       NOTE: This API should really only be used for object which
             represent *encoded* Unicode !

    */
	if (PyUnicode_Check(obj)) {
	    if (encoding) {
		PyErr_SetString(PyExc_TypeError,
				"decoding Unicode is not supported");
	    return NULL;
	    }
	return PyObject_Unicode(obj);
	    }
#else
    if (PyUnicode_Check(obj)) {
	PyErr_SetString(PyExc_TypeError,
			"decoding Unicode is not supported");
	return NULL;
	}
#endif

    /* Coerce object */
    if (PyString_Check(obj)) {
	    s = PyString_AS_STRING(obj);
	    len = PyString_GET_SIZE(obj);
	    }
    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
	/* Overwrite the error message with something more useful in
	   case of a TypeError. */
	if (PyErr_ExceptionMatches(PyExc_TypeError))
	PyErr_Format(PyExc_TypeError,
			 "coercing to Unicode: need string or buffer, "
			 "%.80s found",
		     Py_TYPE(obj)->tp_name);
	goto onError;
    }

    /* Convert to Unicode */
    if (len == 0) {
	Py_INCREF(unicode_empty);
	v = (PyObject *)unicode_empty;
    }
    else
	v = PyUnicode_Decode(s, len, encoding, errors);

    return v;

 onError:
    return NULL;
}

PyObject *PyUnicode_Decode(const char *s,
			   Py_ssize_t size,
			   const char *encoding,
			   const char *errors)
{
    PyObject *buffer = NULL, *unicode;

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (strcmp(encoding, "utf-8") == 0)
        return PyUnicode_DecodeUTF8(s, size, errors);
    else if (strcmp(encoding, "latin-1") == 0)
        return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
    else if (strcmp(encoding, "mbcs") == 0)
        return PyUnicode_DecodeMBCS(s, size, errors);
#endif
    else if (strcmp(encoding, "ascii") == 0)
        return PyUnicode_DecodeASCII(s, size, errors);

    /* Decode via the codec registry */
    buffer = PyBuffer_FromMemory((void *)s, size);
    if (buffer == NULL)
        goto onError;
    unicode = PyCodec_Decode(buffer, encoding, errors);
    if (unicode == NULL)
        goto onError;
    if (!PyUnicode_Check(unicode)) {
        PyErr_Format(PyExc_TypeError,
                     "decoder did not return an unicode object (type=%.400s)",
                     Py_TYPE(unicode)->tp_name);
        Py_DECREF(unicode);
        goto onError;
    }
    Py_DECREF(buffer);
    return unicode;

 onError:
    Py_XDECREF(buffer);
    return NULL;
}

PyObject *PyUnicode_AsDecodedObject(PyObject *unicode,
                                    const char *encoding,
                                    const char *errors)
{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Decode via the codec registry */
    v = PyCodec_Decode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    return v;

 onError:
    return NULL;
}

PyObject *PyUnicode_Encode(const Py_UNICODE *s,
			   Py_ssize_t size,
			   const char *encoding,
			   const char *errors)
{
    PyObject *v, *unicode;

    unicode = PyUnicode_FromUnicode(s, size);
    if (unicode == NULL)
	return NULL;
    v = PyUnicode_AsEncodedString(unicode, encoding, errors);
    Py_DECREF(unicode);
    return v;
}

PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
                                    const char *encoding,
                                    const char *errors)
{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    return v;

 onError:
    return NULL;
}

PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
                                    const char *encoding,
                                    const char *errors)
{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (errors == NULL) {
	if (strcmp(encoding, "utf-8") == 0)
	    return PyUnicode_AsUTF8String(unicode);
	else if (strcmp(encoding, "latin-1") == 0)
	    return PyUnicode_AsLatin1String(unicode);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
	else if (strcmp(encoding, "mbcs") == 0)
	    return PyUnicode_AsMBCSString(unicode);
#endif
	else if (strcmp(encoding, "ascii") == 0)
	    return PyUnicode_AsASCIIString(unicode);
    }

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    if (!PyString_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "encoder did not return a string object (type=%.400s)",
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return v;

 onError:
    return NULL;
}

PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
					    const char *errors)
{
    PyObject *v = ((PyUnicodeObject *)unicode)->defenc;

    if (v)
        return v;
    v = PyUnicode_AsEncodedString(unicode, NULL, errors);
    if (v && errors == NULL)
        ((PyUnicodeObject *)unicode)->defenc = v;
    return v;
}

Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_AS_UNICODE(unicode);

 onError:
    return NULL;
}

Py_ssize_t PyUnicode_GetSize(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_GET_SIZE(unicode);

 onError:
    return -1;
}

const char *PyUnicode_GetDefaultEncoding(void)
{
    return unicode_default_encoding;
}

int PyUnicode_SetDefaultEncoding(const char *encoding)
{
    PyObject *v;

    /* Make sure the encoding is valid. As side effect, this also
       loads the encoding into the codec registry cache. */
    v = _PyCodec_Lookup(encoding);
    if (v == NULL)
	goto onError;
    Py_DECREF(v);
    strncpy(unicode_default_encoding,
	    encoding,
	    sizeof(unicode_default_encoding));
    return 0;

 onError:
    return -1;
}

/* error handling callback helper:
   build arguments, call the callback and check the arguments,
   if no exception occurred, copy the replacement to the output
   and adjust various state variables.
   return 0 on success, -1 on error
*/

static
int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
                 const char *encoding, const char *reason,
                 const char *input, Py_ssize_t insize, Py_ssize_t *startinpos,
                 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
                 PyObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
{
    static char *argparse = "O!n;decoding error handler must return (unicode, int) tuple";

    PyObject *restuple = NULL;
    PyObject *repunicode = NULL;
    Py_ssize_t outsize = PyUnicode_GET_SIZE(*output);
    Py_ssize_t requiredsize;
    Py_ssize_t newpos;
    Py_UNICODE *repptr;
    Py_ssize_t repsize;
    int res = -1;

    if (*errorHandler == NULL) {
	*errorHandler = PyCodec_LookupError(errors);
	if (*errorHandler == NULL)
	   goto onError;
    }

    if (*exceptionObject == NULL) {
    	*exceptionObject = PyUnicodeDecodeError_Create(
	    encoding, input, insize, *startinpos, *endinpos, reason);
	if (*exceptionObject == NULL)
	   goto onError;
    }
    else {
	if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos))
	    goto onError;
	if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos))
	    goto onError;
	if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
	    goto onError;
    }

    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
    if (restuple == NULL)
	goto onError;
    if (!PyTuple_Check(restuple)) {
	PyErr_Format(PyExc_TypeError, &argparse[4]);
	goto onError;
    }
    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
	goto onError;
    if (newpos<0)
	newpos = insize+newpos;
    if (newpos<0 || newpos>insize) {
	PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
	goto onError;
    }

    /* need more space? (at least enough for what we
       have+the replacement+the rest of the string (starting
       at the new input position), so we won't have to check space
       when there are no errors in the rest of the string) */
    repptr = PyUnicode_AS_UNICODE(repunicode);
    repsize = PyUnicode_GET_SIZE(repunicode);
    requiredsize = *outpos + repsize + insize-newpos;
    if (requiredsize > outsize) {
	if (requiredsize<2*outsize)
	    requiredsize = 2*outsize;
	if (PyUnicode_Resize(output, requiredsize) < 0)
	    goto onError;
	*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
    }
    *endinpos = newpos;
    *inptr = input + newpos;
    Py_UNICODE_COPY(*outptr, repptr, repsize);
    *outptr += repsize;
    *outpos += repsize;
    /* we made it! */
    res = 0;

    onError:
    Py_XDECREF(restuple);
    return res;
}

/* --- UTF-7 Codec -------------------------------------------------------- */

/* see RFC2152 for details */

static
char utf7_special[128] = {
    /* indicate whether a UTF-7 character is special i.e. cannot be directly
       encoded:
	   0 - not special
	   1 - special
	   2 - whitespace (optional)
	   3 - RFC2152 Set O (optional) */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3,
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,

};

/* Note: The comparison (c) <= 0 is a trick to work-around gcc
   warnings about the comparison always being false; since
   utf7_special[0] is 1, we can safely make that one comparison
   true  */

#define SPECIAL(c, encodeO, encodeWS) \
    ((c) > 127 || (c) <= 0 || utf7_special[(c)] == 1 || \
     (encodeWS && (utf7_special[(c)] == 2)) || \
     (encodeO && (utf7_special[(c)] == 3)))

#define B64(n)  \
    ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
#define B64CHAR(c) \
    (isalnum(c) || (c) == '+' || (c) == '/')
#define UB64(c) \
    ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ?                   \
     (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 )

#define ENCODE(out, ch, bits)                   \
    while (bits >= 6) {                         \
        *out++ = B64(ch >> (bits-6));           \
        bits -= 6;                              \
    }

#define DECODE(out, ch, bits, surrogate)                                \
    while (bits >= 16) {                                                \
        Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff);   \
        bits -= 16;                                                     \
        if (surrogate) {                                                \
            /* We have already generated an error for the high surrogate \
               so let's not bother seeing if the low surrogate is correct or not */ \
            surrogate = 0;                                              \
        } else if (0xDC00 <= outCh && outCh <= 0xDFFF) {                \
            /* This is a surrogate pair. Unfortunately we can't represent \
               it in a 16-bit character */                              \
            surrogate = 1;                                              \
            errmsg = "code pairs are not supported";                    \
            goto utf7Error;                                             \
        } else {                                                        \
            *out++ = outCh;                                             \
        }                                                               \
    }

PyObject *PyUnicode_DecodeUTF7(const char *s,
			       Py_ssize_t size,
			       const char *errors)
{
    return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
}

PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
			       Py_ssize_t size,
			       const char *errors,
			       Py_ssize_t *consumed)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const char *errmsg = "";
    int inShift = 0;
    unsigned int bitsleft = 0;
    unsigned long charsleft = 0;
    int surrogate = 0;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0) {
        if (consumed)
            *consumed = 0;
        return (PyObject *)unicode;
    }

    p = unicode->str;
    e = s + size;

    while (s < e) {
        Py_UNICODE ch;
        restart:
        ch = *s;

        if (inShift) {
            if ((ch == '-') || !B64CHAR(ch)) {
                inShift = 0;
                s++;

                /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
                if (bitsleft >= 6) {
                    /* The shift sequence has a partial character in it. If
                       bitsleft < 6 then we could just classify it as padding
                       but that is not the case here */

                    errmsg = "partial character in shift sequence";
                    goto utf7Error;
                }
                /* According to RFC2152 the remaining bits should be zero. We
                   choose to signal an error/insert a replacement character
                   here so indicate the potential of a misencoded character. */

                /* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */
                if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) {
                    errmsg = "non-zero padding bits in shift sequence";
                    goto utf7Error;
                }

                if (ch == '-') {
                    if ((s < e) && (*(s) == '-')) {
                        *p++ = '-';
                        inShift = 1;
                    }
                } else if (SPECIAL(ch,0,0)) {
                    errmsg = "unexpected special character";
	                goto utf7Error;
                } else  {
                    *p++ = ch;
                }
            } else {
                charsleft = (charsleft << 6) | UB64(ch);
                bitsleft += 6;
                s++;
                /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
            }
        }
        else if ( ch == '+' ) {
            startinpos = s-starts;
            s++;
            if (s < e && *s == '-') {
                s++;
                *p++ = '+';
            } else
            {
                inShift = 1;
                bitsleft = 0;
            }
        }
        else if (SPECIAL(ch,0,0)) {
            startinpos = s-starts;
            errmsg = "unexpected special character";
            s++;
	        goto utf7Error;
        }
        else {
            *p++ = ch;
            s++;
        }
        continue;
    utf7Error:
        outpos = p-PyUnicode_AS_UNICODE(unicode);
        endinpos = s-starts;
        if (unicode_decode_call_errorhandler(
             errors, &errorHandler,
             "utf7", errmsg,
             starts, size, &startinpos, &endinpos, &exc, &s,
             (PyObject **)&unicode, &outpos, &p))
        goto onError;
    }

    if (inShift && !consumed) {
        outpos = p-PyUnicode_AS_UNICODE(unicode);
        endinpos = size;
        if (unicode_decode_call_errorhandler(
             errors, &errorHandler,
             "utf7", "unterminated shift sequence",
             starts, size, &startinpos, &endinpos, &exc, &s,
             (PyObject **)&unicode, &outpos, &p))
            goto onError;
        if (s < e)
           goto restart;
    }
    if (consumed) {
        if(inShift)
            *consumed = startinpos;
        else
            *consumed = s-starts;
    }

    if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

onError:
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    Py_DECREF(unicode);
    return NULL;
}


PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
                   Py_ssize_t size,
                   int encodeSetO,
                   int encodeWhiteSpace,
                   const char *errors)
{
    PyObject *v;
    /* It might be possible to tighten this worst case */
    Py_ssize_t cbAllocated = 5 * size;
    int inShift = 0;
    Py_ssize_t i = 0;
    unsigned int bitsleft = 0;
    unsigned long charsleft = 0;
    char * out;
    char * start;

    if (size == 0)
		return PyString_FromStringAndSize(NULL, 0);

    v = PyString_FromStringAndSize(NULL, cbAllocated);
    if (v == NULL)
        return NULL;

    start = out = PyString_AS_STRING(v);
    for (;i < size; ++i) {
        Py_UNICODE ch = s[i];

        if (!inShift) {
            if (ch == '+') {
                *out++ = '+';
                *out++ = '-';
            } else if (SPECIAL(ch, encodeSetO, encodeWhiteSpace)) {
                charsleft = ch;
                bitsleft = 16;
                *out++ = '+';
                /* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft);
                inShift = bitsleft > 0;
            } else {
                *out++ = (char) ch;
            }
        } else {
            if (!SPECIAL(ch, encodeSetO, encodeWhiteSpace)) {
                *out++ = B64(charsleft << (6-bitsleft));
                charsleft = 0;
                bitsleft = 0;
                /* Characters not in the BASE64 set implicitly unshift the sequence
                   so no '-' is required, except if the character is itself a '-' */
                if (B64CHAR(ch) || ch == '-') {
                    *out++ = '-';
                }
                inShift = 0;
                *out++ = (char) ch;
            } else {
                bitsleft += 16;
                charsleft = (charsleft << 16) | ch;
                /* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft);

                /* If the next character is special then we dont' need to terminate
                   the shift sequence. If the next character is not a BASE64 character
                   or '-' then the shift sequence will be terminated implicitly and we
                   don't have to insert a '-'. */

                if (bitsleft == 0) {
                    if (i + 1 < size) {
                        Py_UNICODE ch2 = s[i+1];

                        if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) {

                        } else if (B64CHAR(ch2) || ch2 == '-') {
                            *out++ = '-';
                            inShift = 0;
                        } else {
                            inShift = 0;
                        }

                    }
                    else {
                        *out++ = '-';
                        inShift = 0;
                    }
                }
            }
        }
    }
    if (bitsleft) {
        *out++= B64(charsleft << (6-bitsleft) );
        *out++ = '-';
    }

    _PyString_Resize(&v, out - start);
    return v;
}

#undef SPECIAL
#undef B64
#undef B64CHAR
#undef UB64
#undef ENCODE
#undef DECODE

/* --- UTF-8 Codec -------------------------------------------------------- */

static
char utf8_code_length[256] = {
    /* Map UTF-8 encoded prefix byte to sequence length.  zero means
       illegal prefix.  see RFC 2279 for details */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};

PyObject *PyUnicode_DecodeUTF8(const char *s,
			       Py_ssize_t size,
			       const char *errors)
{
    return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
}

PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
			                Py_ssize_t size,
			                const char *errors,
			                Py_ssize_t *consumed)
{
    const char *starts = s;
    int n;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const char *errmsg = "";
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Note: size will always be longer than the resulting Unicode
       character count */
    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0) {
        if (consumed)
            *consumed = 0;
        return (PyObject *)unicode;
    }

    /* Unpack UTF-8 encoded data */
    p = unicode->str;
    e = s + size;

    while (s < e) {
        Py_UCS4 ch = (unsigned char)*s;

        if (ch < 0x80) {
            *p++ = (Py_UNICODE)ch;
            s++;
            continue;
        }

        n = utf8_code_length[ch];

        if (s + n > e) {
	    if (consumed)
		break;
	    else {
		errmsg = "unexpected end of data";
		startinpos = s-starts;
		endinpos = size;
		goto utf8Error;
	    }
	}

        switch (n) {

        case 0:
            errmsg = "unexpected code byte";
	    startinpos = s-starts;
	    endinpos = startinpos+1;
	    goto utf8Error;

        case 1:
            errmsg = "internal error";
	    startinpos = s-starts;
	    endinpos = startinpos+1;
	    goto utf8Error;

        case 2:
            if ((s[1] & 0xc0) != 0x80) {
                errmsg = "invalid data";
		startinpos = s-starts;
		endinpos = startinpos+2;
		goto utf8Error;
	    }
            ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
            if (ch < 0x80) {
		startinpos = s-starts;
		endinpos = startinpos+2;
                errmsg = "illegal encoding";
		goto utf8Error;
	    }
	    else
		*p++ = (Py_UNICODE)ch;
            break;

        case 3:
            if ((s[1] & 0xc0) != 0x80 ||
                (s[2] & 0xc0) != 0x80) {
                errmsg = "invalid data";
		startinpos = s-starts;
		endinpos = startinpos+3;
		goto utf8Error;
	    }
            ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
            if (ch < 0x0800) {
		/* Note: UTF-8 encodings of surrogates are considered
		   legal UTF-8 sequences;

		   XXX For wide builds (UCS-4) we should probably try
		       to recombine the surrogates into a single code
		       unit.
		*/
                errmsg = "illegal encoding";
		startinpos = s-starts;
		endinpos = startinpos+3;
		goto utf8Error;
	    }
	    else
		*p++ = (Py_UNICODE)ch;
            break;

        case 4:
            if ((s[1] & 0xc0) != 0x80 ||
                (s[2] & 0xc0) != 0x80 ||
                (s[3] & 0xc0) != 0x80) {
                errmsg = "invalid data";
		startinpos = s-starts;
		endinpos = startinpos+4;
		goto utf8Error;
	    }
            ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
                 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
            /* validate and convert to UTF-16 */
            if ((ch < 0x10000)        /* minimum value allowed for 4
					 byte encoding */
                || (ch > 0x10ffff))   /* maximum value allowed for
					 UTF-16 */
	    {
                errmsg = "illegal encoding";
		startinpos = s-starts;
		endinpos = startinpos+4;
		goto utf8Error;
	    }
#ifdef Py_UNICODE_WIDE
	    *p++ = (Py_UNICODE)ch;
#else
            /*  compute and append the two surrogates: */

            /*  translate from 10000..10FFFF to 0..FFFF */
            ch -= 0x10000;

            /*  high surrogate = top 10 bits added to D800 */
            *p++ = (Py_UNICODE)(0xD800 + (ch >> 10));

            /*  low surrogate = bottom 10 bits added to DC00 */
            *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
#endif
            break;

        default:
            /* Other sizes are only needed for UCS-4 */
            errmsg = "unsupported Unicode code range";
	    startinpos = s-starts;
	    endinpos = startinpos+n;
	    goto utf8Error;
        }
        s += n;
	continue;

    utf8Error:
    outpos = p-PyUnicode_AS_UNICODE(unicode);
    if (unicode_decode_call_errorhandler(
	     errors, &errorHandler,
	     "utf8", errmsg,
	     starts, size, &startinpos, &endinpos, &exc, &s,
	     (PyObject **)&unicode, &outpos, &p))
	goto onError;
    }
    if (consumed)
	*consumed = s-starts;

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

onError:
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    Py_DECREF(unicode);
    return NULL;
}

/* Allocation strategy:  if the string is short, convert into a stack buffer
   and allocate exactly as much space needed at the end.  Else allocate the
   maximum possible needed (4 result bytes per Unicode character), and return
   the excess memory at the end.
*/
PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
		     Py_ssize_t size,
		     const char *errors)
{
#define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */

    Py_ssize_t i;           /* index into s of next input byte */
    PyObject *v;        /* result string object */
    char *p;            /* next free byte in output buffer */
    Py_ssize_t nallocated;  /* number of result bytes allocated */
    Py_ssize_t nneeded;        /* number of result bytes needed */
    char stackbuf[MAX_SHORT_UNICHARS * 4];

    assert(s != NULL);
    assert(size >= 0);

    if (size <= MAX_SHORT_UNICHARS) {
        /* Write into the stack buffer; nallocated can't overflow.
         * At the end, we'll allocate exactly as much heap space as it
         * turns out we need.
         */
        nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
        v = NULL;   /* will allocate after we're done */
        p = stackbuf;
    }
    else {
        /* Overallocate on the heap, and give the excess back at the end. */
        nallocated = size * 4;
        if (nallocated / 4 != size)  /* overflow! */
            return PyErr_NoMemory();
        v = PyString_FromStringAndSize(NULL, nallocated);
        if (v == NULL)
            return NULL;
        p = PyString_AS_STRING(v);
    }

    for (i = 0; i < size;) {
        Py_UCS4 ch = s[i++];

        if (ch < 0x80)
            /* Encode ASCII */
            *p++ = (char) ch;

        else if (ch < 0x0800) {
            /* Encode Latin-1 */
            *p++ = (char)(0xc0 | (ch >> 6));
            *p++ = (char)(0x80 | (ch & 0x3f));
        }
        else {
            /* Encode UCS2 Unicode ordinals */
            if (ch < 0x10000) {
                /* Special case: check for high surrogate */
                if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
                    Py_UCS4 ch2 = s[i];
                    /* Check for low surrogate and combine the two to
                       form a UCS4 value */
                    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
                        ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
                        i++;
                        goto encodeUCS4;
                    }
                    /* Fall through: handles isolated high surrogates */
                }
                *p++ = (char)(0xe0 | (ch >> 12));
                *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
                *p++ = (char)(0x80 | (ch & 0x3f));
                continue;
    	    }
encodeUCS4:
            /* Encode UCS4 Unicode ordinals */
            *p++ = (char)(0xf0 | (ch >> 18));
            *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
            *p++ = (char)(0x80 | (ch & 0x3f));
        }
    }

    if (v == NULL) {
        /* This was stack allocated. */
        nneeded = p - stackbuf;
        assert(nneeded <= nallocated);
        v = PyString_FromStringAndSize(stackbuf, nneeded);
    }
    else {
    	/* Cut back to size actually needed. */
        nneeded = p - PyString_AS_STRING(v);
        assert(nneeded <= nallocated);
        _PyString_Resize(&v, nneeded);
    }
    return v;

#undef MAX_SHORT_UNICHARS
}

PyObject *PyUnicode_AsUTF8String(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
				PyUnicode_GET_SIZE(unicode),
				NULL);
}

/* --- UTF-32 Codec ------------------------------------------------------- */

PyObject *
PyUnicode_DecodeUTF32(const char *s,
		      Py_ssize_t size,
		      const char *errors,
		      int *byteorder)
{
    return PyUnicode_DecodeUTF32Stateful(s, size, errors, byteorder, NULL);
}

PyObject *
PyUnicode_DecodeUTF32Stateful(const char *s,
			      Py_ssize_t size,
			      const char *errors,
			      int *byteorder,
			      Py_ssize_t *consumed)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
#ifndef Py_UNICODE_WIDE
    int i, pairs;
#else
    const int pairs = 0;
#endif
    const unsigned char *q, *e;
    int bo = 0;       /* assume native ordering by default */
    const char *errmsg = "";
    /* Offsets from q for retrieving bytes in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int iorder[] = {0, 1, 2, 3};
#else
    int iorder[] = {3, 2, 1, 0};
#endif
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    /* On narrow builds we split characters outside the BMP into two
       codepoints => count how much extra space we need. */
#ifndef Py_UNICODE_WIDE
    for (i = pairs = 0; i < size/4; i++)
	if (((Py_UCS4 *)s)[i] >= 0x10000)
	    pairs++;
#endif

    /* This might be one to much, because of a BOM */
    unicode = _PyUnicode_New((size+3)/4+pairs);
    if (!unicode)
        return NULL;
    if (size == 0)
        return (PyObject *)unicode;

    /* Unpack UTF-32 encoded data */
    p = unicode->str;
    q = (unsigned char *)s;
    e = q + size;

    if (byteorder)
        bo = *byteorder;

    /* Check for BOM marks (U+FEFF) in the input and adjust current
       byte order setting accordingly. In native mode, the leading BOM
       mark is skipped, in all other modes, it is copied to the output
       stream as-is (giving a ZWNBSP character). */
    if (bo == 0) {
        if (size >= 4) {
            const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
                                (q[iorder[1]] << 8) | q[iorder[0]];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
	    if (bom == 0x0000FEFF) {
		q += 4;
		bo = -1;
	    }
	    else if (bom == 0xFFFE0000) {
		q += 4;
		bo = 1;
	    }
#else
	    if (bom == 0x0000FEFF) {
		q += 4;
		bo = 1;
	    }
	    else if (bom == 0xFFFE0000) {
		q += 4;
		bo = -1;
	    }
#endif
	}
    }

    if (bo == -1) {
        /* force LE */
        iorder[0] = 0;
        iorder[1] = 1;
        iorder[2] = 2;
        iorder[3] = 3;
    }
    else if (bo == 1) {
        /* force BE */
        iorder[0] = 3;
        iorder[1] = 2;
        iorder[2] = 1;
        iorder[3] = 0;
    }

    while (q < e) {
	Py_UCS4 ch;
	/* remaining bytes at the end? (size should be divisible by 4) */
	if (e-q<4) {
	    if (consumed)
		break;
	    errmsg = "truncated data";
	    startinpos = ((const char *)q)-starts;
	    endinpos = ((const char *)e)-starts;
	    goto utf32Error;
	    /* The remaining input chars are ignored if the callback
	       chooses to skip the input */
	}
	ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
	     (q[iorder[1]] << 8) | q[iorder[0]];

	if (ch >= 0x110000)
	{
	    errmsg = "codepoint not in range(0x110000)";
	    startinpos = ((const char *)q)-starts;
	    endinpos = startinpos+4;
	    goto utf32Error;
	}
#ifndef Py_UNICODE_WIDE
	if (ch >= 0x10000)
	{
	    *p++ = 0xD800 | ((ch-0x10000) >> 10);
	    *p++ = 0xDC00 | ((ch-0x10000) & 0x3FF);
	}
	else
#endif
	    *p++ = ch;
	q += 4;
	continue;
    utf32Error:
	outpos = p-PyUnicode_AS_UNICODE(unicode);
    if (unicode_decode_call_errorhandler(
         errors, &errorHandler,
         "utf32", errmsg,
         starts, size, &startinpos, &endinpos, &exc, &s,
         (PyObject **)&unicode, &outpos, &p))
	    goto onError;
    }

    if (byteorder)
        *byteorder = bo;

    if (consumed)
	*consumed = (const char *)q-starts;

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

onError:
    Py_DECREF(unicode);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

PyObject *
PyUnicode_EncodeUTF32(const Py_UNICODE *s,
		      Py_ssize_t size,
		      const char *errors,
		      int byteorder)
{
    PyObject *v;
    unsigned char *p;
#ifndef Py_UNICODE_WIDE
    int i, pairs;
#else
    const int pairs = 0;
#endif
    /* Offsets from p for storing byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int iorder[] = {0, 1, 2, 3};
#else
    int iorder[] = {3, 2, 1, 0};
#endif

#define STORECHAR(CH)                       \
    do {                                    \
        p[iorder[3]] = ((CH) >> 24) & 0xff; \
        p[iorder[2]] = ((CH) >> 16) & 0xff; \
        p[iorder[1]] = ((CH) >> 8) & 0xff;  \
        p[iorder[0]] = (CH) & 0xff;         \
        p += 4;                             \
    } while(0)

    /* In narrow builds we can output surrogate pairs as one codepoint,
       so we need less space. */
#ifndef Py_UNICODE_WIDE
    for (i = pairs = 0; i < size-1; i++)
	if (0xD800 <= s[i] && s[i] <= 0xDBFF &&
	    0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF)
	    pairs++;
#endif
    v = PyString_FromStringAndSize(NULL,
		  4 * (size - pairs + (byteorder == 0)));
    if (v == NULL)
        return NULL;

    p = (unsigned char *)PyString_AS_STRING(v);
    if (byteorder == 0)
	STORECHAR(0xFEFF);
    if (size == 0)
        return v;

    if (byteorder == -1) {
        /* force LE */
        iorder[0] = 0;
        iorder[1] = 1;
        iorder[2] = 2;
        iorder[3] = 3;
    }
    else if (byteorder == 1) {
        /* force BE */
        iorder[0] = 3;
        iorder[1] = 2;
        iorder[2] = 1;
        iorder[3] = 0;
    }

    while (size-- > 0) {
	Py_UCS4 ch = *s++;
#ifndef Py_UNICODE_WIDE
	if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
	    Py_UCS4 ch2 = *s;
	    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
		ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
		s++;
		size--;
	    }
	}
#endif
        STORECHAR(ch);
    }
    return v;
#undef STORECHAR
}

PyObject *PyUnicode_AsUTF32String(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode),
				 PyUnicode_GET_SIZE(unicode),
				 NULL,
				 0);
}

/* --- UTF-16 Codec ------------------------------------------------------- */

PyObject *
PyUnicode_DecodeUTF16(const char *s,
		      Py_ssize_t size,
		      const char *errors,
		      int *byteorder)
{
    return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
}

PyObject *
PyUnicode_DecodeUTF16Stateful(const char *s,
			      Py_ssize_t size,
			      const char *errors,
			      int *byteorder,
			      Py_ssize_t *consumed)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const unsigned char *q, *e;
    int bo = 0;       /* assume native ordering by default */
    const char *errmsg = "";
    /* Offsets from q for retrieving byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int ihi = 1, ilo = 0;
#else
    int ihi = 0, ilo = 1;
#endif
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Note: size will always be longer than the resulting Unicode
       character count */
    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0)
        return (PyObject *)unicode;

    /* Unpack UTF-16 encoded data */
    p = unicode->str;
    q = (unsigned char *)s;
    e = q + size;

    if (byteorder)
        bo = *byteorder;

    /* Check for BOM marks (U+FEFF) in the input and adjust current
       byte order setting accordingly. In native mode, the leading BOM
       mark is skipped, in all other modes, it is copied to the output
       stream as-is (giving a ZWNBSP character). */
    if (bo == 0) {
        if (size >= 2) {
            const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
	    if (bom == 0xFEFF) {
		q += 2;
		bo = -1;
	    }
	    else if (bom == 0xFFFE) {
		q += 2;
		bo = 1;
	    }
#else
	    if (bom == 0xFEFF) {
		q += 2;
		bo = 1;
	    }
	    else if (bom == 0xFFFE) {
		q += 2;
		bo = -1;
	    }
#endif
	}
    }

    if (bo == -1) {
        /* force LE */
        ihi = 1;
        ilo = 0;
    }
    else if (bo == 1) {
        /* force BE */
        ihi = 0;
        ilo = 1;
    }

    while (q < e) {
	Py_UNICODE ch;
	/* remaining bytes at the end? (size should be even) */
	if (e-q<2) {
	    if (consumed)
		break;
	    errmsg = "truncated data";
	    startinpos = ((const char *)q)-starts;
	    endinpos = ((const char *)e)-starts;
	    goto utf16Error;
	    /* The remaining input chars are ignored if the callback
	       chooses to skip the input */
	}
	ch = (q[ihi] << 8) | q[ilo];

	q += 2;

	if (ch < 0xD800 || ch > 0xDFFF) {
	    *p++ = ch;
	    continue;
	}

	/* UTF-16 code pair: */
	if (q >= e) {
	    errmsg = "unexpected end of data";
	    startinpos = (((const char *)q)-2)-starts;
	    endinpos = ((const char *)e)-starts;
	    goto utf16Error;
	}
	if (0xD800 <= ch && ch <= 0xDBFF) {
	    Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
	    q += 2;
	    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#ifndef Py_UNICODE_WIDE
		*p++ = ch;
		*p++ = ch2;
#else
		*p++ = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
#endif
		continue;
	    }
	    else {
                errmsg = "illegal UTF-16 surrogate";
		startinpos = (((const char *)q)-4)-starts;
		endinpos = startinpos+2;
		goto utf16Error;
	    }

	}
	errmsg = "illegal encoding";
	startinpos = (((const char *)q)-2)-starts;
	endinpos = startinpos+2;
	/* Fall through to report the error */

    utf16Error:
	outpos = p-PyUnicode_AS_UNICODE(unicode);
	if (unicode_decode_call_errorhandler(
	         errors, &errorHandler,
	         "utf16", errmsg,
	         starts, size, &startinpos, &endinpos, &exc, (const char **)&q,
	         (PyObject **)&unicode, &outpos, &p))
	    goto onError;
    }

    if (byteorder)
        *byteorder = bo;

    if (consumed)
	*consumed = (const char *)q-starts;

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
        goto onError;

    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)unicode;

onError:
    Py_DECREF(unicode);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

PyObject *
PyUnicode_EncodeUTF16(const Py_UNICODE *s,
		      Py_ssize_t size,
		      const char *errors,
		      int byteorder)
{
    PyObject *v;
    unsigned char *p;
#ifdef Py_UNICODE_WIDE
    int i, pairs;
#else
    const int pairs = 0;
#endif
    /* Offsets from p for storing byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
    int ihi = 1, ilo = 0;
#else
    int ihi = 0, ilo = 1;
#endif

#define STORECHAR(CH)                   \
    do {                                \
        p[ihi] = ((CH) >> 8) & 0xff;    \
        p[ilo] = (CH) & 0xff;           \
        p += 2;                         \
    } while(0)

#ifdef Py_UNICODE_WIDE
    for (i = pairs = 0; i < size; i++)
	if (s[i] >= 0x10000)
	    pairs++;
#endif
    v = PyString_FromStringAndSize(NULL,
		  2 * (size + pairs + (byteorder == 0)));
    if (v == NULL)
        return NULL;

    p = (unsigned char *)PyString_AS_STRING(v);
    if (byteorder == 0)
	STORECHAR(0xFEFF);
    if (size == 0)
        return v;

    if (byteorder == -1) {
        /* force LE */
        ihi = 1;
        ilo = 0;
    }
    else if (byteorder == 1) {
        /* force BE */
        ihi = 0;
        ilo = 1;
    }

    while (size-- > 0) {
	Py_UNICODE ch = *s++;
	Py_UNICODE ch2 = 0;
#ifdef Py_UNICODE_WIDE
	if (ch >= 0x10000) {
	    ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF);
	    ch  = 0xD800 | ((ch-0x10000) >> 10);
	}
#endif
        STORECHAR(ch);
        if (ch2)
            STORECHAR(ch2);
    }
    return v;
#undef STORECHAR
}

PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode),
				 PyUnicode_GET_SIZE(unicode),
				 NULL,
				 0);
}

/* --- Unicode Escape Codec ----------------------------------------------- */

static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;

PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
					Py_ssize_t size,
					const char *errors)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    int i;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    char* message;
    Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
       length after conversion to the true value.
       (but if the error callback returns a long replacement string
       we'll have to allocate more space) */
    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;

    p = PyUnicode_AS_UNICODE(v);
    end = s + size;

    while (s < end) {
        unsigned char c;
        Py_UNICODE x;
        int digits;

        /* Non-escape characters are interpreted as Unicode ordinals */
        if (*s != '\\') {
            *p++ = (unsigned char) *s++;
            continue;
        }

        startinpos = s-starts;
        /* \ - Escapes */
        s++;
        c = *s++;
        if (s > end)
            c = '\0'; /* Invalid after \ */
        switch (c) {

        /* \x escapes */
        case '\n': break;
        case '\\': *p++ = '\\'; break;
        case '\'': *p++ = '\''; break;
        case '\"': *p++ = '\"'; break;
        case 'b': *p++ = '\b'; break;
        case 'f': *p++ = '\014'; break; /* FF */
        case 't': *p++ = '\t'; break;
        case 'n': *p++ = '\n'; break;
        case 'r': *p++ = '\r'; break;
        case 'v': *p++ = '\013'; break; /* VT */
        case 'a': *p++ = '\007'; break; /* BEL, not classic C */

        /* \OOO (octal) escapes */
        case '0': case '1': case '2': case '3':
        case '4': case '5': case '6': case '7':
            x = s[-1] - '0';
            if (s < end && '0' <= *s && *s <= '7') {
                x = (x<<3) + *s++ - '0';
                if (s < end && '0' <= *s && *s <= '7')
                    x = (x<<3) + *s++ - '0';
            }
            *p++ = x;
            break;

        /* hex escapes */
        /* \xXX */
        case 'x':
            digits = 2;
            message = "truncated \\xXX escape";
            goto hexescape;

        /* \uXXXX */
        case 'u':
            digits = 4;
            message = "truncated \\uXXXX escape";
            goto hexescape;

        /* \UXXXXXXXX */
        case 'U':
            digits = 8;
            message = "truncated \\UXXXXXXXX escape";
        hexescape:
            chr = 0;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (s+digits>end) {
                endinpos = size;
                if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicodeescape", "end of string in escape sequence",
                    starts, size, &startinpos, &endinpos, &exc, &s,
                    (PyObject **)&v, &outpos, &p))
                    goto onError;
                goto nextByte;
            }
            for (i = 0; i < digits; ++i) {
                c = (unsigned char) s[i];
                if (!isxdigit(c)) {
                    endinpos = (s+i+1)-starts;
                    if (unicode_decode_call_errorhandler(
                        errors, &errorHandler,
                        "unicodeescape", message,
                        starts, size, &startinpos, &endinpos, &exc, &s,
                        (PyObject **)&v, &outpos, &p))
                        goto onError;
                    goto nextByte;
                }
                chr = (chr<<4) & ~0xF;
                if (c >= '0' && c <= '9')
                    chr += c - '0';
                else if (c >= 'a' && c <= 'f')
                    chr += 10 + c - 'a';
                else
                    chr += 10 + c - 'A';
            }
            s += i;
            if (chr == 0xffffffff && PyErr_Occurred())
                /* _decoding_error will have already written into the
                   target buffer. */
                break;
        store:
            /* when we get here, chr is a 32-bit unicode character */
            if (chr <= 0xffff)
                /* UCS-2 character */
                *p++ = (Py_UNICODE) chr;
            else if (chr <= 0x10ffff) {
                /* UCS-4 character. Either store directly, or as
                   surrogate pair. */
#ifdef Py_UNICODE_WIDE
                *p++ = chr;
#else
                chr -= 0x10000L;
                *p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
                *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
#endif
            } else {
                endinpos = s-starts;
                outpos = p-PyUnicode_AS_UNICODE(v);
                if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicodeescape", "illegal Unicode character",
                    starts, size, &startinpos, &endinpos, &exc, &s,
                    (PyObject **)&v, &outpos, &p))
                    goto onError;
            }
            break;

        /* \N{name} */
        case 'N':
            message = "malformed \\N character escape";
            if (ucnhash_CAPI == NULL) {
                /* load the unicode data module */
                PyObject *m, *api;
                m = PyImport_ImportModuleNoBlock("unicodedata");
                if (m == NULL)
                    goto ucnhashError;
                api = PyObject_GetAttrString(m, "ucnhash_CAPI");
                Py_DECREF(m);
                if (api == NULL)
                    goto ucnhashError;
                ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCObject_AsVoidPtr(api);
                Py_DECREF(api);
                if (ucnhash_CAPI == NULL)
                    goto ucnhashError;
            }
            if (*s == '{') {
                const char *start = s+1;
                /* look for the closing brace */
                while (*s != '}' && s < end)
                    s++;
                if (s > start && s < end && *s == '}') {
                    /* found a name.  look it up in the unicode database */
                    message = "unknown Unicode character name";
                    s++;
                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
                        goto store;
                }
            }
            endinpos = s-starts;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                errors, &errorHandler,
                "unicodeescape", message,
                starts, size, &startinpos, &endinpos, &exc, &s,
                (PyObject **)&v, &outpos, &p))
                goto onError;
            break;

        default:
            if (s > end) {
                message = "\\ at end of string";
                s--;
                endinpos = s-starts;
                outpos = p-PyUnicode_AS_UNICODE(v);
                if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicodeescape", message,
                    starts, size, &startinpos, &endinpos, &exc, &s,
                    (PyObject **)&v, &outpos, &p))
                    goto onError;
            }
            else {
                *p++ = '\\';
                *p++ = (unsigned char)s[-1];
            }
            break;
        }
        nextByte:
        ;
    }
    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

ucnhashError:
    PyErr_SetString(
        PyExc_UnicodeError,
        "\\N escapes not supported (can't load unicodedata module)"
        );
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;

onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

/* Return a Unicode-Escape string version of the Unicode object.

   If quotes is true, the string is enclosed in u"" or u'' quotes as
   appropriate.

*/

Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s,
                                      Py_ssize_t size,
                                      Py_UNICODE ch)
{
    /* like wcschr, but doesn't stop at NULL characters */

    while (size-- > 0) {
        if (*s == ch)
            return s;
        s++;
    }

    return NULL;
}

static
PyObject *unicodeescape_string(const Py_UNICODE *s,
                               Py_ssize_t size,
                               int quotes)
{
    PyObject *repr;
    char *p;

    static const char *hexdigit = "0123456789abcdef";

    /* XXX(nnorwitz): rather than over-allocating, it would be
       better to choose a different scheme.  Perhaps scan the
       first N-chars of the string and allocate based on that size.
    */
    /* Initial allocation is based on the longest-possible unichr
       escape.

       In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
       unichr, so in this case it's the longest unichr escape. In
       narrow (UTF-16) builds this is five chars per source unichr
       since there are two unichrs in the surrogate pair, so in narrow
       (UTF-16) builds it's not the longest unichr escape.

       In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
       so in the narrow (UTF-16) build case it's the longest unichr
       escape.
    */

    repr = PyString_FromStringAndSize(NULL,
        2
#ifdef Py_UNICODE_WIDE
        + 10*size
#else
        + 6*size
#endif
        + 1);
    if (repr == NULL)
        return NULL;

    p = PyString_AS_STRING(repr);

    if (quotes) {
        *p++ = 'u';
        *p++ = (findchar(s, size, '\'') &&
                !findchar(s, size, '"')) ? '"' : '\'';
    }
    while (size-- > 0) {
        Py_UNICODE ch = *s++;

        /* Escape quotes and backslashes */
        if ((quotes &&
	     ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') {
            *p++ = '\\';
            *p++ = (char) ch;
	    continue;
        }

#ifdef Py_UNICODE_WIDE
        /* Map 21-bit characters to '\U00xxxxxx' */
        else if (ch >= 0x10000) {
            *p++ = '\\';
            *p++ = 'U';
            *p++ = hexdigit[(ch >> 28) & 0x0000000F];
            *p++ = hexdigit[(ch >> 24) & 0x0000000F];
            *p++ = hexdigit[(ch >> 20) & 0x0000000F];
            *p++ = hexdigit[(ch >> 16) & 0x0000000F];
            *p++ = hexdigit[(ch >> 12) & 0x0000000F];
            *p++ = hexdigit[(ch >> 8) & 0x0000000F];
            *p++ = hexdigit[(ch >> 4) & 0x0000000F];
            *p++ = hexdigit[ch & 0x0000000F];
	    continue;
        }
#else
	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
	else if (ch >= 0xD800 && ch < 0xDC00) {
	    Py_UNICODE ch2;
	    Py_UCS4 ucs;

	    ch2 = *s++;
	    size--;
	    if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
		ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
		*p++ = '\\';
		*p++ = 'U';
		*p++ = hexdigit[(ucs >> 28) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 24) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 20) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 16) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 12) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 8) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 4) & 0x0000000F];
		*p++ = hexdigit[ucs & 0x0000000F];
		continue;
	    }
	    /* Fall through: isolated surrogates are copied as-is */
	    s--;
	    size++;
	}
#endif

        /* Map 16-bit characters to '\uxxxx' */
        if (ch >= 256) {
            *p++ = '\\';
            *p++ = 'u';
            *p++ = hexdigit[(ch >> 12) & 0x000F];
            *p++ = hexdigit[(ch >> 8) & 0x000F];
            *p++ = hexdigit[(ch >> 4) & 0x000F];
            *p++ = hexdigit[ch & 0x000F];
        }

        /* Map special whitespace to '\t', \n', '\r' */
        else if (ch == '\t') {
            *p++ = '\\';
            *p++ = 't';
        }
        else if (ch == '\n') {
            *p++ = '\\';
            *p++ = 'n';
        }
        else if (ch == '\r') {
            *p++ = '\\';
            *p++ = 'r';
        }

        /* Map non-printable US ASCII to '\xhh' */
        else if (ch < ' ' || ch >= 0x7F) {
            *p++ = '\\';
            *p++ = 'x';
            *p++ = hexdigit[(ch >> 4) & 0x000F];
            *p++ = hexdigit[ch & 0x000F];
        }

        /* Copy everything else as-is */
        else
            *p++ = (char) ch;
    }
    if (quotes)
        *p++ = PyString_AS_STRING(repr)[1];

    *p = '\0';
    _PyString_Resize(&repr, p - PyString_AS_STRING(repr));
    return repr;
}

PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
					Py_ssize_t size)
{
    return unicodeescape_string(s, size, 0);
}

PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
					 PyUnicode_GET_SIZE(unicode));
}

/* --- Raw Unicode Escape Codec ------------------------------------------- */

PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
					   Py_ssize_t size,
					   const char *errors)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    const char *bs;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
       length after conversion to the true value. (But decoding error
       handler might have to resize the string) */
    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    end = s + size;
    while (s < end) {
	unsigned char c;
	Py_UCS4 x;
	int i;
        int count;

	/* Non-escape characters are interpreted as Unicode ordinals */
	if (*s != '\\') {
	    *p++ = (unsigned char)*s++;
	    continue;
	}
	startinpos = s-starts;

	/* \u-escapes are only interpreted iff the number of leading
	   backslashes if odd */
	bs = s;
	for (;s < end;) {
	    if (*s != '\\')
		break;
	    *p++ = (unsigned char)*s++;
	}
	if (((s - bs) & 1) == 0 ||
	    s >= end ||
	    (*s != 'u' && *s != 'U')) {
	    continue;
	}
	p--;
        count = *s=='u' ? 4 : 8;
	s++;

	/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
	outpos = p-PyUnicode_AS_UNICODE(v);
	for (x = 0, i = 0; i < count; ++i, ++s) {
	    c = (unsigned char)*s;
	    if (!isxdigit(c)) {
		endinpos = s-starts;
		if (unicode_decode_call_errorhandler(
		    errors, &errorHandler,
		    "rawunicodeescape", "truncated \\uXXXX",
		    starts, size, &startinpos, &endinpos, &exc, &s,
		    (PyObject **)&v, &outpos, &p))
		    goto onError;
		goto nextByte;
	    }
	    x = (x<<4) & ~0xF;
	    if (c >= '0' && c <= '9')
		x += c - '0';
	    else if (c >= 'a' && c <= 'f')
		x += 10 + c - 'a';
	    else
		x += 10 + c - 'A';
	}
        if (x <= 0xffff)
                /* UCS-2 character */
                *p++ = (Py_UNICODE) x;
        else if (x <= 0x10ffff) {
                /* UCS-4 character. Either store directly, or as
                   surrogate pair. */
#ifdef Py_UNICODE_WIDE
                *p++ = (Py_UNIC0DE) x;
#else
                x -= 0x10000L;
                *p++ = 0xD800 + (Py_UNICODE) (x >> 10);
                *p++ = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
#endif
        } else {
            endinpos = s-starts;
            outpos = p-PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "rawunicodeescape", "\\Uxxxxxxxx out of range",
		    starts, size, &startinpos, &endinpos, &exc, &s,
		    (PyObject **)&v, &outpos, &p))
		    goto onError;
        }
	nextByte:
	;
    }
    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
	goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
					   Py_ssize_t size)
{
    PyObject *repr;
    char *p;
    char *q;

    static const char *hexdigit = "0123456789abcdef";

#ifdef Py_UNICODE_WIDE
    repr = PyString_FromStringAndSize(NULL, 10 * size);
#else
    repr = PyString_FromStringAndSize(NULL, 6 * size);
#endif
    if (repr == NULL)
        return NULL;
    if (size == 0)
	return repr;

    p = q = PyString_AS_STRING(repr);
    while (size-- > 0) {
        Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
	/* Map 32-bit characters to '\Uxxxxxxxx' */
	if (ch >= 0x10000) {
            *p++ = '\\';
            *p++ = 'U';
            *p++ = hexdigit[(ch >> 28) & 0xf];
            *p++ = hexdigit[(ch >> 24) & 0xf];
            *p++ = hexdigit[(ch >> 20) & 0xf];
            *p++ = hexdigit[(ch >> 16) & 0xf];
            *p++ = hexdigit[(ch >> 12) & 0xf];
            *p++ = hexdigit[(ch >> 8) & 0xf];
            *p++ = hexdigit[(ch >> 4) & 0xf];
            *p++ = hexdigit[ch & 15];
        }
        else
#else
	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
	if (ch >= 0xD800 && ch < 0xDC00) {
	    Py_UNICODE ch2;
	    Py_UCS4 ucs;

	    ch2 = *s++;
	    size--;
	    if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
		ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
		*p++ = '\\';
		*p++ = 'U';
		*p++ = hexdigit[(ucs >> 28) & 0xf];
		*p++ = hexdigit[(ucs >> 24) & 0xf];
		*p++ = hexdigit[(ucs >> 20) & 0xf];
		*p++ = hexdigit[(ucs >> 16) & 0xf];
		*p++ = hexdigit[(ucs >> 12) & 0xf];
		*p++ = hexdigit[(ucs >> 8) & 0xf];
		*p++ = hexdigit[(ucs >> 4) & 0xf];
		*p++ = hexdigit[ucs & 0xf];
		continue;
	    }
	    /* Fall through: isolated surrogates are copied as-is */
	    s--;
	    size++;
	}
#endif
	/* Map 16-bit characters to '\uxxxx' */
	if (ch >= 256) {
            *p++ = '\\';
            *p++ = 'u';
            *p++ = hexdigit[(ch >> 12) & 0xf];
            *p++ = hexdigit[(ch >> 8) & 0xf];
            *p++ = hexdigit[(ch >> 4) & 0xf];
            *p++ = hexdigit[ch & 15];
        }
	/* Copy everything else as-is */
	else
            *p++ = (char) ch;
    }
    *p = '\0';
    _PyString_Resize(&repr, p - q);
    return repr;
}

PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
					    PyUnicode_GET_SIZE(unicode));
}

/* --- Unicode Internal Codec ------------------------------------------- */

PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
					   Py_ssize_t size,
					   const char *errors)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    const char *end;
    const char *reason;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

#ifdef Py_UNICODE_WIDE
    Py_UNICODE unimax = PyUnicode_GetMax();
#endif

    /* XXX overflow detection missing */
    v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
    if (v == NULL)
	goto onError;
    if (PyUnicode_GetSize((PyObject *)v) == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    end = s + size;

    while (s < end) {
        memcpy(p, s, sizeof(Py_UNICODE));
        /* We have to sanity check the raw data, otherwise doom looms for
           some malformed UCS-4 data. */
        if (
            #ifdef Py_UNICODE_WIDE
            *p > unimax || *p < 0 ||
            #endif
            end-s < Py_UNICODE_SIZE
            )
            {
            startinpos = s - starts;
            if (end-s < Py_UNICODE_SIZE) {
                endinpos = end-starts;
                reason = "truncated input";
            }
            else {
                endinpos = s - starts + Py_UNICODE_SIZE;
                reason = "illegal code point (> 0x10FFFF)";
            }
            outpos = p - PyUnicode_AS_UNICODE(v);
            if (unicode_decode_call_errorhandler(
                    errors, &errorHandler,
                    "unicode_internal", reason,
                    starts, size, &startinpos, &endinpos, &exc, &s,
                    (PyObject **)&v, &outpos, &p)) {
                goto onError;
            }
        }
        else {
            p++;
            s += Py_UNICODE_SIZE;
        }
    }

    if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
        goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

/* --- Latin-1 Codec ------------------------------------------------------ */

PyObject *PyUnicode_DecodeLatin1(const char *s,
				 Py_ssize_t size,
				 const char *errors)
{
    PyUnicodeObject *v;
    Py_UNICODE *p;

    /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
    if (size == 1) {
	Py_UNICODE r = *(unsigned char*)s;
	return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    while (size-- > 0)
	*p++ = (unsigned char)*s++;
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    return NULL;
}

/* create or adjust a UnicodeEncodeError */
static void make_encode_exception(PyObject **exceptionObject,
    const char *encoding,
    const Py_UNICODE *unicode, Py_ssize_t size,
    Py_ssize_t startpos, Py_ssize_t endpos,
    const char *reason)
{
    if (*exceptionObject == NULL) {
	*exceptionObject = PyUnicodeEncodeError_Create(
	    encoding, unicode, size, startpos, endpos, reason);
    }
    else {
	if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
	    goto onError;
	if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
	    goto onError;
	if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
	    goto onError;
	return;
	onError:
	Py_DECREF(*exceptionObject);
	*exceptionObject = NULL;
    }
}

/* raises a UnicodeEncodeError */
static void raise_encode_exception(PyObject **exceptionObject,
    const char *encoding,
    const Py_UNICODE *unicode, Py_ssize_t size,
    Py_ssize_t startpos, Py_ssize_t endpos,
    const char *reason)
{
    make_encode_exception(exceptionObject,
	encoding, unicode, size, startpos, endpos, reason);
    if (*exceptionObject != NULL)
	PyCodec_StrictErrors(*exceptionObject);
}

/* error handling callback helper:
   build arguments, call the callback and check the arguments,
   put the result into newpos and return the replacement string, which
   has to be freed by the caller */
static PyObject *unicode_encode_call_errorhandler(const char *errors,
    PyObject **errorHandler,
    const char *encoding, const char *reason,
    const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
    Py_ssize_t startpos, Py_ssize_t endpos,
    Py_ssize_t *newpos)
{
    static char *argparse = "O!n;encoding error handler must return (unicode, int) tuple";

    PyObject *restuple;
    PyObject *resunicode;

    if (*errorHandler == NULL) {
	*errorHandler = PyCodec_LookupError(errors);
        if (*errorHandler == NULL)
	    return NULL;
    }

    make_encode_exception(exceptionObject,
	encoding, unicode, size, startpos, endpos, reason);
    if (*exceptionObject == NULL)
	return NULL;

    restuple = PyObject_CallFunctionObjArgs(
	*errorHandler, *exceptionObject, NULL);
    if (restuple == NULL)
	return NULL;
    if (!PyTuple_Check(restuple)) {
	PyErr_Format(PyExc_TypeError, &argparse[4]);
	Py_DECREF(restuple);
	return NULL;
    }
    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
	&resunicode, newpos)) {
	Py_DECREF(restuple);
	return NULL;
    }
    if (*newpos<0)
	*newpos = size+*newpos;
    if (*newpos<0 || *newpos>size) {
	PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
	Py_DECREF(restuple);
	return NULL;
    }
    Py_INCREF(resunicode);
    Py_DECREF(restuple);
    return resunicode;
}

static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
				 Py_ssize_t size,
				 const char *errors,
				 int limit)
{
    /* output object */
    PyObject *res;
    /* pointers to the beginning and end+1 of input */
    const Py_UNICODE *startp = p;
    const Py_UNICODE *endp = p + size;
    /* pointer to the beginning of the unencodable characters */
    /* const Py_UNICODE *badp = NULL; */
    /* pointer into the output */
    char *str;
    /* current output position */
    Py_ssize_t respos = 0;
    Py_ssize_t ressize;
    const char *encoding = (limit == 256) ? "latin-1" : "ascii";
    const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    /* the following variable is used for caching string comparisons
     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
    int known_errorHandler = -1;

    /* allocate enough for a simple encoding without
       replacements, if we need more, we'll resize */
    res = PyString_FromStringAndSize(NULL, size);
    if (res == NULL)
        goto onError;
    if (size == 0)
	return res;
    str = PyString_AS_STRING(res);
    ressize = size;

    while (p<endp) {
	Py_UNICODE c = *p;

	/* can we encode this? */
	if (c<limit) {
	    /* no overflow check, because we know that the space is enough */
	    *str++ = (char)c;
	    ++p;
	}
	else {
	    Py_ssize_t unicodepos = p-startp;
	    Py_ssize_t requiredsize;
	    PyObject *repunicode;
	    Py_ssize_t repsize;
	    Py_ssize_t newpos;
	    Py_ssize_t respos;
	    Py_UNICODE *uni2;
	    /* startpos for collecting unencodable chars */
	    const Py_UNICODE *collstart = p;
	    const Py_UNICODE *collend = p;
	    /* find all unecodable characters */
	    while ((collend < endp) && ((*collend)>=limit))
		++collend;
	    /* cache callback name lookup (if not done yet, i.e. it's the first error) */
	    if (known_errorHandler==-1) {
		if ((errors==NULL) || (!strcmp(errors, "strict")))
		    known_errorHandler = 1;
		else if (!strcmp(errors, "replace"))
		    known_errorHandler = 2;
		else if (!strcmp(errors, "ignore"))
		    known_errorHandler = 3;
		else if (!strcmp(errors, "xmlcharrefreplace"))
		    known_errorHandler = 4;
		else
		    known_errorHandler = 0;
	    }
	    switch (known_errorHandler) {
		case 1: /* strict */
		    raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
		    goto onError;
		case 2: /* replace */
		    while (collstart++<collend)
			*str++ = '?'; /* fall through */
		case 3: /* ignore */
		    p = collend;
		    break;
		case 4: /* xmlcharrefreplace */
		    respos = str-PyString_AS_STRING(res);
		    /* determine replacement size (temporarily (mis)uses p) */
		    for (p = collstart, repsize = 0; p < collend; ++p) {
			if (*p<10)
			    repsize += 2+1+1;
			else if (*p<100)
			    repsize += 2+2+1;
			else if (*p<1000)
			    repsize += 2+3+1;
			else if (*p<10000)
			    repsize += 2+4+1;
#ifndef Py_UNICODE_WIDE
			else
			    repsize += 2+5+1;
#else
			else if (*p<100000)
			    repsize += 2+5+1;
			else if (*p<1000000)
			    repsize += 2+6+1;
			else
			    repsize += 2+7+1;
#endif
		    }
		    requiredsize = respos+repsize+(endp-collend);
		    if (requiredsize > ressize) {
			if (requiredsize<2*ressize)
			    requiredsize = 2*ressize;
			if (_PyString_Resize(&res, requiredsize))
			    goto onError;
			str = PyString_AS_STRING(res) + respos;
			ressize = requiredsize;
		    }
		    /* generate replacement (temporarily (mis)uses p) */
		    for (p = collstart; p < collend; ++p) {
			str += sprintf(str, "&#%d;", (int)*p);
		    }
		    p = collend;
		    break;
		default:
		    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
			encoding, reason, startp, size, &exc,
			collstart-startp, collend-startp, &newpos);
		    if (repunicode == NULL)
			goto onError;
		    /* need more space? (at least enough for what we
		       have+the replacement+the rest of the string, so
		       we won't have to check space for encodable characters) */
		    respos = str-PyString_AS_STRING(res);
		    repsize = PyUnicode_GET_SIZE(repunicode);
		    requiredsize = respos+repsize+(endp-collend);
		    if (requiredsize > ressize) {
			if (requiredsize<2*ressize)
			    requiredsize = 2*ressize;
			if (_PyString_Resize(&res, requiredsize)) {
			    Py_DECREF(repunicode);
			    goto onError;
			}
			str = PyString_AS_STRING(res) + respos;
			ressize = requiredsize;
		    }
		    /* check if there is anything unencodable in the replacement
		       and copy it to the output */
		    for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
			c = *uni2;
			if (c >= limit) {
			    raise_encode_exception(&exc, encoding, startp, size,
				unicodepos, unicodepos+1, reason);
			    Py_DECREF(repunicode);
			    goto onError;
			}
			*str = (char)c;
		    }
		    p = startp + newpos;
		    Py_DECREF(repunicode);
	    }
	}
    }
    /* Resize if we allocated to much */
    respos = str-PyString_AS_STRING(res);
    if (respos<ressize)
       /* If this falls res will be NULL */
	_PyString_Resize(&res, respos);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return res;

    onError:
    Py_XDECREF(res);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
				 Py_ssize_t size,
				 const char *errors)
{
    return unicode_encode_ucs1(p, size, errors, 256);
}

PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
				  PyUnicode_GET_SIZE(unicode),
				  NULL);
}

/* --- 7-bit ASCII Codec -------------------------------------------------- */

PyObject *PyUnicode_DecodeASCII(const char *s,
				Py_ssize_t size,
				const char *errors)
{
    const char *starts = s;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;

    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
    if (size == 1 && *(unsigned char*)s < 128) {
	Py_UNICODE r = *(unsigned char*)s;
	return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    e = s + size;
    while (s < e) {
	register unsigned char c = (unsigned char)*s;
	if (c < 128) {
	    *p++ = c;
	    ++s;
	}
	else {
	    startinpos = s-starts;
	    endinpos = startinpos + 1;
	    outpos = p - (Py_UNICODE *)PyUnicode_AS_UNICODE(v);
	    if (unicode_decode_call_errorhandler(
		 errors, &errorHandler,
		 "ascii", "ordinal not in range(128)",
		 starts, size, &startinpos, &endinpos, &exc, &s,
		 (PyObject **)&v, &outpos, &p))
		goto onError;
	}
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
	if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
	    goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return NULL;
}

PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
				Py_ssize_t size,
				const char *errors)
{
    return unicode_encode_ucs1(p, size, errors, 128);
}

PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
				 PyUnicode_GET_SIZE(unicode),
				 NULL);
}

#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)

/* --- MBCS codecs for Windows -------------------------------------------- */

#if SIZEOF_INT < SIZEOF_SSIZE_T
#define NEED_RETRY
#endif

/* XXX This code is limited to "true" double-byte encodings, as
   a) it assumes an incomplete character consists of a single byte, and
   b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte
      encodings, see IsDBCSLeadByteEx documentation. */

static int is_dbcs_lead_byte(const char *s, int offset)
{
    const char *curr = s + offset;

    if (IsDBCSLeadByte(*curr)) {
	const char *prev = CharPrev(s, curr);
	return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2);
    }
    return 0;
}

/*
 * Decode MBCS string into unicode object. If 'final' is set, converts
 * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise.
 */
static int decode_mbcs(PyUnicodeObject **v,
			const char *s, /* MBCS string */
			int size, /* sizeof MBCS string */
			int final)
{
    Py_UNICODE *p;
    Py_ssize_t n = 0;
    int usize = 0;

    assert(size >= 0);

    /* Skip trailing lead-byte unless 'final' is set */
    if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1))
	--size;

    /* First get the size of the result */
    if (size > 0) {
	usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0);
	if (usize == 0) {
	    PyErr_SetFromWindowsErrWithFilename(0, NULL);
	    return -1;
	}
    }

    if (*v == NULL) {
	/* Create unicode object */
	*v = _PyUnicode_New(usize);
	if (*v == NULL)
	    return -1;
    }
    else {
	/* Extend unicode object */
	n = PyUnicode_GET_SIZE(*v);
	if (_PyUnicode_Resize(v, n + usize) < 0)
	    return -1;
    }

    /* Do the conversion */
    if (size > 0) {
	p = PyUnicode_AS_UNICODE(*v) + n;
	if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) {
	    PyErr_SetFromWindowsErrWithFilename(0, NULL);
	    return -1;
	}
    }

    return size;
}

PyObject *PyUnicode_DecodeMBCSStateful(const char *s,
					Py_ssize_t size,
					const char *errors,
					Py_ssize_t *consumed)
{
    PyUnicodeObject *v = NULL;
    int done;

    if (consumed)
	*consumed = 0;

#ifdef NEED_RETRY
  retry:
    if (size > INT_MAX)
	done = decode_mbcs(&v, s, INT_MAX, 0);
    else
#endif
	done = decode_mbcs(&v, s, (int)size, !consumed);

    if (done < 0) {
        Py_XDECREF(v);
	return NULL;
    }

    if (consumed)
	*consumed += done;

#ifdef NEED_RETRY
    if (size > INT_MAX) {
	s += done;
	size -= done;
	goto retry;
    }
#endif

    return (PyObject *)v;
}

PyObject *PyUnicode_DecodeMBCS(const char *s,
				Py_ssize_t size,
				const char *errors)
{
    return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
}

/*
 * Convert unicode into string object (MBCS).
 * Returns 0 if succeed, -1 otherwise.
 */
static int encode_mbcs(PyObject **repr,
			const Py_UNICODE *p, /* unicode */
			int size) /* size of unicode */
{
    int mbcssize = 0;
    Py_ssize_t n = 0;

    assert(size >= 0);

    /* First get the size of the result */
    if (size > 0) {
	mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL);
	if (mbcssize == 0) {
	    PyErr_SetFromWindowsErrWithFilename(0, NULL);
	    return -1;
	}
    }

    if (*repr == NULL) {
	/* Create string object */
	*repr = PyString_FromStringAndSize(NULL, mbcssize);
	if (*repr == NULL)
	    return -1;
    }
    else {
	/* Extend string object */
	n = PyString_Size(*repr);
	if (_PyString_Resize(repr, n + mbcssize) < 0)
	    return -1;
    }

    /* Do the conversion */
    if (size > 0) {
	char *s = PyString_AS_STRING(*repr) + n;
	if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
	    PyErr_SetFromWindowsErrWithFilename(0, NULL);
	    return -1;
	}
    }

    return 0;
}

PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
				Py_ssize_t size,
				const char *errors)
{
    PyObject *repr = NULL;
    int ret;

#ifdef NEED_RETRY
 retry:
    if (size > INT_MAX)
	ret = encode_mbcs(&repr, p, INT_MAX);
    else
#endif
	ret = encode_mbcs(&repr, p, (int)size);

    if (ret < 0) {
	Py_XDECREF(repr);
	return NULL;
    }

#ifdef NEED_RETRY
    if (size > INT_MAX) {
	p += INT_MAX;
	size -= INT_MAX;
	goto retry;
    }
#endif

    return repr;
}

PyObject *PyUnicode_AsMBCSString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
				PyUnicode_GET_SIZE(unicode),
				NULL);
}

#undef NEED_RETRY

#endif /* MS_WINDOWS */

/* --- Character Mapping Codec -------------------------------------------- */

PyObject *PyUnicode_DecodeCharmap(const char *s,
				  Py_ssize_t size,
				  PyObject *mapping,
				  const char *errors)
{
    const char *starts = s;
    Py_ssize_t startinpos;
    Py_ssize_t endinpos;
    Py_ssize_t outpos;
    const char *e;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    Py_ssize_t extrachars = 0;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    Py_UNICODE *mapstring = NULL;
    Py_ssize_t maplen = 0;

    /* Default to Latin-1 */
    if (mapping == NULL)
	return PyUnicode_DecodeLatin1(s, size, errors);

    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    e = s + size;
    if (PyUnicode_CheckExact(mapping)) {
	mapstring = PyUnicode_AS_UNICODE(mapping);
	maplen = PyUnicode_GET_SIZE(mapping);
	while (s < e) {
	    unsigned char ch = *s;
	    Py_UNICODE x = 0xfffe; /* illegal value */

	    if (ch < maplen)
		x = mapstring[ch];

	    if (x == 0xfffe) {
		/* undefined mapping */
		outpos = p-PyUnicode_AS_UNICODE(v);
		startinpos = s-starts;
		endinpos = startinpos+1;
		if (unicode_decode_call_errorhandler(
		     errors, &errorHandler,
		     "charmap", "character maps to <undefined>",
		     starts, size, &startinpos, &endinpos, &exc, &s,
		     (PyObject **)&v, &outpos, &p)) {
		    goto onError;
		}
		continue;
	    }
	    *p++ = x;
	    ++s;
	}
    }
    else {
	while (s < e) {
	    unsigned char ch = *s;
	    PyObject *w, *x;

	    /* Get mapping (char ordinal -> integer, Unicode char or None) */
	    w = PyInt_FromLong((long)ch);
	    if (w == NULL)
		goto onError;
	    x = PyObject_GetItem(mapping, w);
	    Py_DECREF(w);
	    if (x == NULL) {
		if (PyErr_ExceptionMatches(PyExc_LookupError)) {
		    /* No mapping found means: mapping is undefined. */
		    PyErr_Clear();
		    x = Py_None;
		    Py_INCREF(x);
		} else
		    goto onError;
	    }
    
	    /* Apply mapping */
	    if (PyInt_Check(x)) {
		long value = PyInt_AS_LONG(x);
		if (value < 0 || value > 65535) {
		    PyErr_SetString(PyExc_TypeError,
				    "character mapping must be in range(65536)");
		    Py_DECREF(x);
		    goto onError;
		}
		*p++ = (Py_UNICODE)value;
	    }
	    else if (x == Py_None) {
		/* undefined mapping */
		outpos = p-PyUnicode_AS_UNICODE(v);
		startinpos = s-starts;
		endinpos = startinpos+1;
		if (unicode_decode_call_errorhandler(
		     errors, &errorHandler,
		     "charmap", "character maps to <undefined>",
		     starts, size, &startinpos, &endinpos, &exc, &s,
		     (PyObject **)&v, &outpos, &p)) {
		    Py_DECREF(x);
		    goto onError;
		}
		Py_DECREF(x);
		continue;
	    }
	    else if (PyUnicode_Check(x)) {
		Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
    
		if (targetsize == 1)
		    /* 1-1 mapping */
		    *p++ = *PyUnicode_AS_UNICODE(x);
    
		else if (targetsize > 1) {
		    /* 1-n mapping */
		    if (targetsize > extrachars) {
			/* resize first */
			Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);
			Py_ssize_t needed = (targetsize - extrachars) + \
				     (targetsize << 2);
			extrachars += needed;
			/* XXX overflow detection missing */
			if (_PyUnicode_Resize(&v,
					     PyUnicode_GET_SIZE(v) + needed) < 0) {
			    Py_DECREF(x);
			    goto onError;
			}
			p = PyUnicode_AS_UNICODE(v) + oldpos;
		    }
		    Py_UNICODE_COPY(p,
				    PyUnicode_AS_UNICODE(x),
				    targetsize);
		    p += targetsize;
		    extrachars -= targetsize;
		}
		/* 1-0 mapping: skip the character */
	    }
	    else {
		/* wrong return value */
		PyErr_SetString(PyExc_TypeError,
		      "character mapping must return integer, None or unicode");
		Py_DECREF(x);
		goto onError;
	    }
	    Py_DECREF(x);
	    ++s;
	}
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
	if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
	    goto onError;
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return (PyObject *)v;

 onError:
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    Py_XDECREF(v);
    return NULL;
}

/* Charmap encoding: the lookup table */

struct encoding_map{
  PyObject_HEAD
  unsigned char level1[32];
  int count2, count3;
  unsigned char level23[1];
};

static PyObject*
encoding_map_size(PyObject *obj, PyObject* args)
{
    struct encoding_map *map = (struct encoding_map*)obj;
    return PyInt_FromLong(sizeof(*map) - 1 + 16*map->count2 + 
                          128*map->count3);
}

static PyMethodDef encoding_map_methods[] = {
	{"size", encoding_map_size, METH_NOARGS, 
         PyDoc_STR("Return the size (in bytes) of this object") },
        { 0 }
};

static void
encoding_map_dealloc(PyObject* o)
{
	PyObject_FREE(o);
}

static PyTypeObject EncodingMapType = {
	PyVarObject_HEAD_INIT(NULL, 0)
        "EncodingMap",          /*tp_name*/
        sizeof(struct encoding_map),   /*tp_basicsize*/
        0,                      /*tp_itemsize*/
        /* methods */
        encoding_map_dealloc,   /*tp_dealloc*/
        0,                      /*tp_print*/
        0,                      /*tp_getattr*/
        0,                      /*tp_setattr*/
        0,                      /*tp_compare*/
        0,                      /*tp_repr*/
        0,                      /*tp_as_number*/
        0,                      /*tp_as_sequence*/
        0,                      /*tp_as_mapping*/
        0,                      /*tp_hash*/
        0,                      /*tp_call*/
        0,                      /*tp_str*/
        0,                      /*tp_getattro*/
        0,                      /*tp_setattro*/
        0,                      /*tp_as_buffer*/
        Py_TPFLAGS_DEFAULT,     /*tp_flags*/
        0,                      /*tp_doc*/
        0,                      /*tp_traverse*/
        0,                      /*tp_clear*/
        0,                      /*tp_richcompare*/
        0,                      /*tp_weaklistoffset*/
        0,                      /*tp_iter*/
        0,                      /*tp_iternext*/
        encoding_map_methods,   /*tp_methods*/
        0,                      /*tp_members*/
        0,                      /*tp_getset*/
        0,                      /*tp_base*/
        0,                      /*tp_dict*/
        0,                      /*tp_descr_get*/
        0,                      /*tp_descr_set*/
        0,                      /*tp_dictoffset*/
        0,                      /*tp_init*/
        0,                      /*tp_alloc*/
        0,                      /*tp_new*/
        0,                      /*tp_free*/
        0,                      /*tp_is_gc*/
};

PyObject*
PyUnicode_BuildEncodingMap(PyObject* string)
{
    Py_UNICODE *decode;
    PyObject *result;
    struct encoding_map *mresult;
    int i;
    int need_dict = 0;
    unsigned char level1[32];
    unsigned char level2[512];
    unsigned char *mlevel1, *mlevel2, *mlevel3;
    int count2 = 0, count3 = 0;

    if (!PyUnicode_Check(string) || PyUnicode_GetSize(string) != 256) {
        PyErr_BadArgument();
        return NULL;
    }
    decode = PyUnicode_AS_UNICODE(string);
    memset(level1, 0xFF, sizeof level1);
    memset(level2, 0xFF, sizeof level2);

    /* If there isn't a one-to-one mapping of NULL to \0,
       or if there are non-BMP characters, we need to use
       a mapping dictionary. */
    if (decode[0] != 0)
        need_dict = 1;
    for (i = 1; i < 256; i++) {
        int l1, l2;
        if (decode[i] == 0
            #ifdef Py_UNICODE_WIDE
            || decode[i] > 0xFFFF
            #endif
        ) {
            need_dict = 1;
            break;
        }
        if (decode[i] == 0xFFFE)
            /* unmapped character */
            continue;
        l1 = decode[i] >> 11;
        l2 = decode[i] >> 7;
        if (level1[l1] == 0xFF)
            level1[l1] = count2++;
        if (level2[l2] == 0xFF)
            level2[l2] = count3++; 
    }

    if (count2 >= 0xFF || count3 >= 0xFF)
        need_dict = 1;

    if (need_dict) {
        PyObject *result = PyDict_New();
        PyObject *key, *value;
        if (!result)
            return NULL;
        for (i = 0; i < 256; i++) {
            key = value = NULL;
            key = PyInt_FromLong(decode[i]);
            value = PyInt_FromLong(i);
            if (!key || !value)
                goto failed1;
            if (PyDict_SetItem(result, key, value) == -1)
                goto failed1;
            Py_DECREF(key);
            Py_DECREF(value);
        }
        return result;
      failed1:
        Py_XDECREF(key);
        Py_XDECREF(value);
        Py_DECREF(result);
        return NULL;
    }

    /* Create a three-level trie */
    result = PyObject_MALLOC(sizeof(struct encoding_map) +
                             16*count2 + 128*count3 - 1);
    if (!result)
        return PyErr_NoMemory();
    PyObject_Init(result, &EncodingMapType);
    mresult = (struct encoding_map*)result;
    mresult->count2 = count2;
    mresult->count3 = count3;
    mlevel1 = mresult->level1;
    mlevel2 = mresult->level23;
    mlevel3 = mresult->level23 + 16*count2;
    memcpy(mlevel1, level1, 32);
    memset(mlevel2, 0xFF, 16*count2);
    memset(mlevel3, 0, 128*count3);
    count3 = 0;
    for (i = 1; i < 256; i++) {
        int o1, o2, o3, i2, i3;
        if (decode[i] == 0xFFFE)
            /* unmapped character */
            continue;
        o1 = decode[i]>>11;
        o2 = (decode[i]>>7) & 0xF;
        i2 = 16*mlevel1[o1] + o2;
        if (mlevel2[i2] == 0xFF)
            mlevel2[i2] = count3++;
        o3 = decode[i] & 0x7F;
        i3 = 128*mlevel2[i2] + o3;
        mlevel3[i3] = i;
    }
    return result;
}

static int
encoding_map_lookup(Py_UNICODE c, PyObject *mapping)
{
    struct encoding_map *map = (struct encoding_map*)mapping;
    int l1 = c>>11;
    int l2 = (c>>7) & 0xF;
    int l3 = c & 0x7F;
    int i;

#ifdef Py_UNICODE_WIDE
    if (c > 0xFFFF) {
	return -1;
    }
#endif
    if (c == 0)
        return 0;
    /* level 1*/
    i = map->level1[l1];
    if (i == 0xFF) {
        return -1;
    }
    /* level 2*/
    i = map->level23[16*i+l2];
    if (i == 0xFF) {
        return -1;
    }
    /* level 3 */
    i = map->level23[16*map->count2 + 128*i + l3];
    if (i == 0) {
        return -1;
    }
    return i;
}

/* Lookup the character ch in the mapping. If the character
   can't be found, Py_None is returned (or NULL, if another
   error occurred). */
static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
{
    PyObject *w = PyInt_FromLong((long)c);
    PyObject *x;

    if (w == NULL)
	 return NULL;
    x = PyObject_GetItem(mapping, w);
    Py_DECREF(w);
    if (x == NULL) {
	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
	    /* No mapping found means: mapping is undefined. */
	    PyErr_Clear();
	    x = Py_None;
	    Py_INCREF(x);
	    return x;
	} else
	    return NULL;
    }
    else if (x == Py_None)
	return x;
    else if (PyInt_Check(x)) {
	long value = PyInt_AS_LONG(x);
	if (value < 0 || value > 255) {
	    PyErr_SetString(PyExc_TypeError,
			     "character mapping must be in range(256)");
	    Py_DECREF(x);
	    return NULL;
	}
	return x;
    }
    else if (PyString_Check(x))
	return x;
    else {
	/* wrong return value */
	PyErr_SetString(PyExc_TypeError,
	      "character mapping must return integer, None or str");
	Py_DECREF(x);
	return NULL;
    }
}

static int
charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
{
	Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
	/* exponentially overallocate to minimize reallocations */
	if (requiredsize < 2*outsize)
	    requiredsize = 2*outsize;
	if (_PyString_Resize(outobj, requiredsize)) {
	    return 0;
	}
	return 1;
}

typedef enum charmapencode_result { 
  enc_SUCCESS, enc_FAILED, enc_EXCEPTION 
}charmapencode_result;
/* lookup the character, put the result in the output string and adjust
   various state variables. Reallocate the output string if not enough
   space is available. Return a new reference to the object that
   was put in the output buffer, or Py_None, if the mapping was undefined
   (in which case no character was written) or NULL, if a
   reallocation error occurred. The caller must decref the result */
static
charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
    PyObject **outobj, Py_ssize_t *outpos)
{
    PyObject *rep;
    char *outstart;
    Py_ssize_t outsize = PyString_GET_SIZE(*outobj);

    if (Py_TYPE(mapping) == &EncodingMapType) {
        int res = encoding_map_lookup(c, mapping);
	Py_ssize_t requiredsize = *outpos+1;
        if (res == -1)
            return enc_FAILED;
	if (outsize<requiredsize) 
	    if (!charmapencode_resize(outobj, outpos, requiredsize))
		return enc_EXCEPTION;
        outstart = PyString_AS_STRING(*outobj);
	outstart[(*outpos)++] = (char)res;
	return enc_SUCCESS;
    }

    rep = charmapencode_lookup(c, mapping);
    if (rep==NULL)
	return enc_EXCEPTION;
    else if (rep==Py_None) {
	Py_DECREF(rep);
	return enc_FAILED;
    } else {
	if (PyInt_Check(rep)) {
	    Py_ssize_t requiredsize = *outpos+1;
	    if (outsize<requiredsize)
		if (!charmapencode_resize(outobj, outpos, requiredsize)) {
		    Py_DECREF(rep);
		    return enc_EXCEPTION;
		}
            outstart = PyString_AS_STRING(*outobj);
	    outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep);
	}
	else {
	    const char *repchars = PyString_AS_STRING(rep);
	    Py_ssize_t repsize = PyString_GET_SIZE(rep);
	    Py_ssize_t requiredsize = *outpos+repsize;
	    if (outsize<requiredsize)
		if (!charmapencode_resize(outobj, outpos, requiredsize)) {
		    Py_DECREF(rep);
		    return enc_EXCEPTION;
		}
            outstart = PyString_AS_STRING(*outobj);
	    memcpy(outstart + *outpos, repchars, repsize);
	    *outpos += repsize;
	}
    }
    Py_DECREF(rep);
    return enc_SUCCESS;
}

/* handle an error in PyUnicode_EncodeCharmap
   Return 0 on success, -1 on error */
static
int charmap_encoding_error(
    const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
    PyObject **exceptionObject,
    int *known_errorHandler, PyObject **errorHandler, const char *errors,
    PyObject **res, Py_ssize_t *respos)
{
    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
    Py_ssize_t repsize;
    Py_ssize_t newpos;
    Py_UNICODE *uni2;
    /* startpos for collecting unencodable chars */
    Py_ssize_t collstartpos = *inpos;
    Py_ssize_t collendpos = *inpos+1;
    Py_ssize_t collpos;
    char *encoding = "charmap";
    char *reason = "character maps to <undefined>";
    charmapencode_result x;

    /* find all unencodable characters */
    while (collendpos < size) {
        PyObject *rep;
        if (Py_TYPE(mapping) == &EncodingMapType) {
	    int res = encoding_map_lookup(p[collendpos], mapping);
	    if (res != -1)
		break;
	    ++collendpos;
	    continue;
	}
            
	rep = charmapencode_lookup(p[collendpos], mapping);
	if (rep==NULL)
	    return -1;
	else if (rep!=Py_None) {
	    Py_DECREF(rep);
	    break;
	}
	Py_DECREF(rep);
	++collendpos;
    }
    /* cache callback name lookup
     * (if not done yet, i.e. it's the first error) */
    if (*known_errorHandler==-1) {
	if ((errors==NULL) || (!strcmp(errors, "strict")))
	    *known_errorHandler = 1;
	else if (!strcmp(errors, "replace"))
	    *known_errorHandler = 2;
	else if (!strcmp(errors, "ignore"))
	    *known_errorHandler = 3;
	else if (!strcmp(errors, "xmlcharrefreplace"))
	    *known_errorHandler = 4;
	else
	    *known_errorHandler = 0;
    }
    switch (*known_errorHandler) {
	case 1: /* strict */
	    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
	    return -1;
	case 2: /* replace */
	    for (collpos = collstartpos; collpos<collendpos; ++collpos) {
		x = charmapencode_output('?', mapping, res, respos);
		if (x==enc_EXCEPTION) {
		    return -1;
		}
		else if (x==enc_FAILED) {
		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
		    return -1;
		}
	    }
	    /* fall through */
	case 3: /* ignore */
	    *inpos = collendpos;
	    break;
	case 4: /* xmlcharrefreplace */
	    /* generate replacement (temporarily (mis)uses p) */
	    for (collpos = collstartpos; collpos < collendpos; ++collpos) {
		char buffer[2+29+1+1];
		char *cp;
		sprintf(buffer, "&#%d;", (int)p[collpos]);
		for (cp = buffer; *cp; ++cp) {
		    x = charmapencode_output(*cp, mapping, res, respos);
		    if (x==enc_EXCEPTION)
			return -1;
		    else if (x==enc_FAILED) {
			raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
			return -1;
		    }
		}
	    }
	    *inpos = collendpos;
	    break;
	default:
	    repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
		encoding, reason, p, size, exceptionObject,
		collstartpos, collendpos, &newpos);
	    if (repunicode == NULL)
		return -1;
	    /* generate replacement  */
	    repsize = PyUnicode_GET_SIZE(repunicode);
	    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
		x = charmapencode_output(*uni2, mapping, res, respos);
		if (x==enc_EXCEPTION) {
		    return -1;
		}
		else if (x==enc_FAILED) {
		    Py_DECREF(repunicode);
		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
		    return -1;
		}
	    }
	    *inpos = newpos;
	    Py_DECREF(repunicode);
    }
    return 0;
}

PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
				  Py_ssize_t size,
				  PyObject *mapping,
				  const char *errors)
{
    /* output object */
    PyObject *res = NULL;
    /* current input position */
    Py_ssize_t inpos = 0;
    /* current output position */
    Py_ssize_t respos = 0;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    /* the following variable is used for caching string comparisons
     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
     * 3=ignore, 4=xmlcharrefreplace */
    int known_errorHandler = -1;

    /* Default to Latin-1 */
    if (mapping == NULL)
	return PyUnicode_EncodeLatin1(p, size, errors);

    /* allocate enough for a simple encoding without
       replacements, if we need more, we'll resize */
    res = PyString_FromStringAndSize(NULL, size);
    if (res == NULL)
        goto onError;
    if (size == 0)
	return res;

    while (inpos<size) {
	/* try to encode it */
	charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
	if (x==enc_EXCEPTION) /* error */
	    goto onError;
	if (x==enc_FAILED) { /* unencodable character */
	    if (charmap_encoding_error(p, size, &inpos, mapping,
		&exc,
		&known_errorHandler, &errorHandler, errors,
		&res, &respos)) {
		goto onError;
	    }
	}
	else
	    /* done with this character => adjust input position */
	    ++inpos;
    }

    /* Resize if we allocated to much */
    if (respos<PyString_GET_SIZE(res)) {
	if (_PyString_Resize(&res, respos))
	    goto onError;
    }
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return res;

    onError:
    Py_XDECREF(res);
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return NULL;
}

PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
				    PyObject *mapping)
{
    if (!PyUnicode_Check(unicode) || mapping == NULL) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
				   PyUnicode_GET_SIZE(unicode),
				   mapping,
				   NULL);
}

/* create or adjust a UnicodeTranslateError */
static void make_translate_exception(PyObject **exceptionObject,
    const Py_UNICODE *unicode, Py_ssize_t size,
    Py_ssize_t startpos, Py_ssize_t endpos,
    const char *reason)
{
    if (*exceptionObject == NULL) {
    	*exceptionObject = PyUnicodeTranslateError_Create(
	    unicode, size, startpos, endpos, reason);
    }
    else {
	if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
	    goto onError;
	if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
	    goto onError;
	if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
	    goto onError;
	return;
	onError:
	Py_DECREF(*exceptionObject);
	*exceptionObject = NULL;
    }
}

/* raises a UnicodeTranslateError */
static void raise_translate_exception(PyObject **exceptionObject,
    const Py_UNICODE *unicode, Py_ssize_t size,
    Py_ssize_t startpos, Py_ssize_t endpos,
    const char *reason)
{
    make_translate_exception(exceptionObject,
	unicode, size, startpos, endpos, reason);
    if (*exceptionObject != NULL)
	PyCodec_StrictErrors(*exceptionObject);
}

/* error handling callback helper:
   build arguments, call the callback and check the arguments,
   put the result into newpos and return the replacement string, which
   has to be freed by the caller */
static PyObject *unicode_translate_call_errorhandler(const char *errors,
    PyObject **errorHandler,
    const char *reason,
    const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
    Py_ssize_t startpos, Py_ssize_t endpos,
    Py_ssize_t *newpos)
{
    static char *argparse = "O!n;translating error handler must return (unicode, int) tuple";

    Py_ssize_t i_newpos;
    PyObject *restuple;
    PyObject *resunicode;

    if (*errorHandler == NULL) {
	*errorHandler = PyCodec_LookupError(errors);
        if (*errorHandler == NULL)
	    return NULL;
    }

    make_translate_exception(exceptionObject,
	unicode, size, startpos, endpos, reason);
    if (*exceptionObject == NULL)
	return NULL;

    restuple = PyObject_CallFunctionObjArgs(
	*errorHandler, *exceptionObject, NULL);
    if (restuple == NULL)
	return NULL;
    if (!PyTuple_Check(restuple)) {
	PyErr_Format(PyExc_TypeError, &argparse[4]);
	Py_DECREF(restuple);
	return NULL;
    }
    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
	&resunicode, &i_newpos)) {
	Py_DECREF(restuple);
	return NULL;
    }
    if (i_newpos<0)
	*newpos = size+i_newpos;
    else
        *newpos = i_newpos;
    if (*newpos<0 || *newpos>size) {
	PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
	Py_DECREF(restuple);
	return NULL;
    }
    Py_INCREF(resunicode);
    Py_DECREF(restuple);
    return resunicode;
}

/* Lookup the character ch in the mapping and put the result in result,
   which must be decrefed by the caller.
   Return 0 on success, -1 on error */
static
int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
{
    PyObject *w = PyInt_FromLong((long)c);
    PyObject *x;

    if (w == NULL)
	 return -1;
    x = PyObject_GetItem(mapping, w);
    Py_DECREF(w);
    if (x == NULL) {
	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
	    /* No mapping found means: use 1:1 mapping. */
	    PyErr_Clear();
	    *result = NULL;
	    return 0;
	} else
	    return -1;
    }
    else if (x == Py_None) {
	*result = x;
	return 0;
    }
    else if (PyInt_Check(x)) {
	long value = PyInt_AS_LONG(x);
	long max = PyUnicode_GetMax();
	if (value < 0 || value > max) {
	    PyErr_Format(PyExc_TypeError,
			     "character mapping must be in range(0x%lx)", max+1);
	    Py_DECREF(x);
	    return -1;
	}
	*result = x;
	return 0;
    }
    else if (PyUnicode_Check(x)) {
	*result = x;
	return 0;
    }
    else {
	/* wrong return value */
	PyErr_SetString(PyExc_TypeError,
	      "character mapping must return integer, None or unicode");
	Py_DECREF(x);
	return -1;
    }
}
/* ensure that *outobj is at least requiredsize characters long,
if not reallocate and adjust various state variables.
Return 0 on success, -1 on error */
static
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
    Py_ssize_t requiredsize)
{
    Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj);
    if (requiredsize > oldsize) {
	/* remember old output position */
	Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
	/* exponentially overallocate to minimize reallocations */
	if (requiredsize < 2 * oldsize)
	    requiredsize = 2 * oldsize;
	if (_PyUnicode_Resize(outobj, requiredsize) < 0)
	    return -1;
	*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
    }
    return 0;
}
/* lookup the character, put the result in the output string and adjust
   various state variables. Return a new reference to the object that
   was put in the output buffer in *result, or Py_None, if the mapping was
   undefined (in which case no character was written).
   The called must decref result.
   Return 0 on success, -1 on error. */
static
int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
    Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
    PyObject **res)
{
    if (charmaptranslate_lookup(*curinp, mapping, res))
	return -1;
    if (*res==NULL) {
	/* not found => default to 1:1 mapping */
	*(*outp)++ = *curinp;
    }
    else if (*res==Py_None)
	;
    else if (PyInt_Check(*res)) {
	/* no overflow check, because we know that the space is enough */
	*(*outp)++ = (Py_UNICODE)PyInt_AS_LONG(*res);
    }
    else if (PyUnicode_Check(*res)) {
	Py_ssize_t repsize = PyUnicode_GET_SIZE(*res);
	if (repsize==1) {
	    /* no overflow check, because we know that the space is enough */
	    *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
	}
	else if (repsize!=0) {
	    /* more than one character */
	    Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
		(insize - (curinp-startinp)) +
		repsize - 1;
	    if (charmaptranslate_makespace(outobj, outp, requiredsize))
		return -1;
	    memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
	    *outp += repsize;
	}
    }
    else
	return -1;
    return 0;
}

PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
				     Py_ssize_t size,
				     PyObject *mapping,
				     const char *errors)
{
    /* output object */
    PyObject *res = NULL;
    /* pointers to the beginning and end+1 of input */
    const Py_UNICODE *startp = p;
    const Py_UNICODE *endp = p + size;
    /* pointer into the output */
    Py_UNICODE *str;
    /* current output position */
    Py_ssize_t respos = 0;
    char *reason = "character maps to <undefined>";
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    /* the following variable is used for caching string comparisons
     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
     * 3=ignore, 4=xmlcharrefreplace */
    int known_errorHandler = -1;

    if (mapping == NULL) {
	PyErr_BadArgument();
	return NULL;
    }

    /* allocate enough for a simple 1:1 translation without
       replacements, if we need more, we'll resize */
    res = PyUnicode_FromUnicode(NULL, size);
    if (res == NULL)
	goto onError;
    if (size == 0)
	return res;
    str = PyUnicode_AS_UNICODE(res);

    while (p<endp) {
	/* try to encode it */
	PyObject *x = NULL;
	if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
	    Py_XDECREF(x);
	    goto onError;
	}
	Py_XDECREF(x);
	if (x!=Py_None) /* it worked => adjust input pointer */
	    ++p;
	else { /* untranslatable character */
	    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
	    Py_ssize_t repsize;
	    Py_ssize_t newpos;
	    Py_UNICODE *uni2;
	    /* startpos for collecting untranslatable chars */
	    const Py_UNICODE *collstart = p;
	    const Py_UNICODE *collend = p+1;
	    const Py_UNICODE *coll;

	    /* find all untranslatable characters */
	    while (collend < endp) {
		if (charmaptranslate_lookup(*collend, mapping, &x))
		    goto onError;
		Py_XDECREF(x);
		if (x!=Py_None)
		    break;
		++collend;
	    }
	    /* cache callback name lookup
	     * (if not done yet, i.e. it's the first error) */
	    if (known_errorHandler==-1) {
		if ((errors==NULL) || (!strcmp(errors, "strict")))
		    known_errorHandler = 1;
		else if (!strcmp(errors, "replace"))
		    known_errorHandler = 2;
		else if (!strcmp(errors, "ignore"))
		    known_errorHandler = 3;
		else if (!strcmp(errors, "xmlcharrefreplace"))
		    known_errorHandler = 4;
		else
		    known_errorHandler = 0;
	    }
	    switch (known_errorHandler) {
		case 1: /* strict */
		    raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
		    goto onError;
		case 2: /* replace */
		    /* No need to check for space, this is a 1:1 replacement */
		    for (coll = collstart; coll<collend; ++coll)
			*str++ = '?';
		    /* fall through */
		case 3: /* ignore */
		    p = collend;
		    break;
		case 4: /* xmlcharrefreplace */
		    /* generate replacement (temporarily (mis)uses p) */
		    for (p = collstart; p < collend; ++p) {
			char buffer[2+29+1+1];
			char *cp;
			sprintf(buffer, "&#%d;", (int)*p);
			if (charmaptranslate_makespace(&res, &str,
			    (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
			    goto onError;
			for (cp = buffer; *cp; ++cp)
			    *str++ = *cp;
		    }
		    p = collend;
		    break;
		default:
		    repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
			reason, startp, size, &exc,
			collstart-startp, collend-startp, &newpos);
		    if (repunicode == NULL)
			goto onError;
		    /* generate replacement  */
		    repsize = PyUnicode_GET_SIZE(repunicode);
		    if (charmaptranslate_makespace(&res, &str,
			(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
			Py_DECREF(repunicode);
			goto onError;
		    }
		    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
			*str++ = *uni2;
		    p = startp + newpos;
		    Py_DECREF(repunicode);
	    }
	}
    }
    /* Resize if we allocated to much */
    respos = str-PyUnicode_AS_UNICODE(res);
    if (respos<PyUnicode_GET_SIZE(res)) {
	if (_PyUnicode_Resize(&res, respos) < 0)
	    goto onError;
    }
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return res;

    onError:
    Py_XDECREF(res);
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return NULL;
}

PyObject *PyUnicode_Translate(PyObject *str,
			      PyObject *mapping,
			      const char *errors)
{
    PyObject *result;

    str = PyUnicode_FromObject(str);
    if (str == NULL)
	goto onError;
    result = PyUnicode_TranslateCharmap(PyUnicode_AS_UNICODE(str),
					PyUnicode_GET_SIZE(str),
					mapping,
					errors);
    Py_DECREF(str);
    return result;

 onError:
    Py_XDECREF(str);
    return NULL;
}

/* --- Decimal Encoder ---------------------------------------------------- */

int PyUnicode_EncodeDecimal(Py_UNICODE *s,
			    Py_ssize_t length,
			    char *output,
			    const char *errors)
{
    Py_UNICODE *p, *end;
    PyObject *errorHandler = NULL;
    PyObject *exc = NULL;
    const char *encoding = "decimal";
    const char *reason = "invalid decimal Unicode string";
    /* the following variable is used for caching string comparisons
     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
    int known_errorHandler = -1;

    if (output == NULL) {
	PyErr_BadArgument();
	return -1;
    }

    p = s;
    end = s + length;
    while (p < end) {
	register Py_UNICODE ch = *p;
	int decimal;
	PyObject *repunicode;
	Py_ssize_t repsize;
	Py_ssize_t newpos;
	Py_UNICODE *uni2;
	Py_UNICODE *collstart;
	Py_UNICODE *collend;

	if (Py_UNICODE_ISSPACE(ch)) {
	    *output++ = ' ';
	    ++p;
	    continue;
	}
	decimal = Py_UNICODE_TODECIMAL(ch);
	if (decimal >= 0) {
	    *output++ = '0' + decimal;
	    ++p;
	    continue;
	}
	if (0 < ch && ch < 256) {
	    *output++ = (char)ch;
	    ++p;
	    continue;
	}
	/* All other characters are considered unencodable */
	collstart = p;
	collend = p+1;
	while (collend < end) {
	    if ((0 < *collend && *collend < 256) ||
	        !Py_UNICODE_ISSPACE(*collend) ||
	        Py_UNICODE_TODECIMAL(*collend))
		break;
	}
	/* cache callback name lookup
	 * (if not done yet, i.e. it's the first error) */
	if (known_errorHandler==-1) {
	    if ((errors==NULL) || (!strcmp(errors, "strict")))
		known_errorHandler = 1;
	    else if (!strcmp(errors, "replace"))
		known_errorHandler = 2;
	    else if (!strcmp(errors, "ignore"))
		known_errorHandler = 3;
	    else if (!strcmp(errors, "xmlcharrefreplace"))
		known_errorHandler = 4;
	    else
		known_errorHandler = 0;
	}
	switch (known_errorHandler) {
	    case 1: /* strict */
		raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
		goto onError;
	    case 2: /* replace */
		for (p = collstart; p < collend; ++p)
		    *output++ = '?';
		/* fall through */
	    case 3: /* ignore */
		p = collend;
		break;
	    case 4: /* xmlcharrefreplace */
		/* generate replacement (temporarily (mis)uses p) */
		for (p = collstart; p < collend; ++p)
		    output += sprintf(output, "&#%d;", (int)*p);
		p = collend;
		break;
	    default:
		repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
		    encoding, reason, s, length, &exc,
		    collstart-s, collend-s, &newpos);
		if (repunicode == NULL)
		    goto onError;
		/* generate replacement  */
		repsize = PyUnicode_GET_SIZE(repunicode);
		for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
		    Py_UNICODE ch = *uni2;
		    if (Py_UNICODE_ISSPACE(ch))
			*output++ = ' ';
		    else {
			decimal = Py_UNICODE_TODECIMAL(ch);
			if (decimal >= 0)
			    *output++ = '0' + decimal;
			else if (0 < ch && ch < 256)
			    *output++ = (char)ch;
			else {
			    Py_DECREF(repunicode);
			    raise_encode_exception(&exc, encoding,
				s, length, collstart-s, collend-s, reason);
			    goto onError;
			}
		    }
		}
		p = s + newpos;
		Py_DECREF(repunicode);
	}
    }
    /* 0-terminate the output string */
    *output++ = '\0';
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return 0;

 onError:
    Py_XDECREF(exc);
    Py_XDECREF(errorHandler);
    return -1;
}

/* --- Helpers ------------------------------------------------------------ */

#include "stringlib/unicodedefs.h"

#define FROM_UNICODE

#include "stringlib/fastsearch.h"

#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"

/* helper macro to fixup start/end slice values */
#define FIX_START_END(obj)                      \
    if (start < 0)                              \
        start += (obj)->length;                 \
    if (start < 0)                              \
        start = 0;                              \
    if (end > (obj)->length)                    \
        end = (obj)->length;                    \
    if (end < 0)                                \
        end += (obj)->length;                   \
    if (end < 0)                                \
        end = 0;

Py_ssize_t PyUnicode_Count(PyObject *str,
                           PyObject *substr,
                           Py_ssize_t start,
                           Py_ssize_t end)
{
    Py_ssize_t result;
    PyUnicodeObject* str_obj;
    PyUnicodeObject* sub_obj;

    str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
    if (!str_obj)
	return -1;