summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codecencodings_tw.py
blob: 96245b74ed3463884f86dc41c8e8701407b3c32d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python3
#
# test_codecencodings_tw.py
#   Codec encoding tests for ROC encodings.
#

from test import support
from test import multibytecodec_support
import unittest

class Test_Big5(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict",  None),
        (b"abc\xc8", "strict",  None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
    )

def test_main():
    support.run_unittest(__name__)

if __name__ == "__main__":
    test_main()
pan> | | | | | | | | | | | | | | | | * No need to double-check that strings are ready: test already done by PyUnicode_FromObject() * Remove useless kind variable (use kind1 instead) * | | Minor change: fix character in do_strip() for the ASCII caseVictor Stinner2013-04-141-2/+2 | | | * | | Cleanup PyUnicode_Append()Victor Stinner2013-04-141-18/+14 | | | | | | | | | | | | | | | | | | | | | * Check also that right is a Unicode object * call directly resize_compact() instead of unicode_resize() for a more explicit error handling, and to avoid testing some properties twice (ex: unicode_modifiable()) * | | PyUnicode_Join(): move use_memcpy test out of the loop to cleanup and ↵Victor Stinner2013-04-141-20/+28 | | | | | | | | | | | | optimize the code * | | Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is ↵Victor Stinner2013-04-141-69/+78 | | | | | | | | | | | | escaped * | | Optimize ascii(str): don't encode/decode repr if repr is already ASCIIVictor Stinner2013-04-142-1/+4 | | | * | | stringlib: remove unused STRINGLIB_RESIZE macroVictor Stinner2013-04-147-7/+0 | | | * | | Add _PyUnicodeWriter_WriteCharInline()Victor Stinner2013-04-141-71/+35 | | | * | | Issue #16061: Speed up str.replace() for replacing 1-character strings.Serhiy Storchaka2013-04-132-26/+91 | | | * | | Issue #17715: Merge fix from 3.3.Mark Dickinson2013-04-131-0/+2 |\ \ \ | |/ / | * | Issue #17715: Add missing NULL Check to PyNumber_Long.Mark Dickinson2013-04-131-0/+2 | | | * | | Issue #17643: Add __callback__ attribute to weakref.ref.Mark Dickinson2013-04-131-1/+6 | | | * | | Issue #16447: Merge fix from 3.3.Mark Dickinson2013-04-131-1/+4 |\ \ \ | |/ / | * | Issue #16447: Fix potential segfault when setting __name__ on a class.Mark Dickinson2013-04-131-1/+4 | | | * | | Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead ofVictor Stinner2013-04-111-0/+10 | | | | | | | | | | | | | | | | | | the legacy Py_UNICODE API. Add also a new _PyUnicodeWriter_WriteChar() function. * | | Fix supernumerary 's' in sys._debugmallocstats() output.Antoine Pitrou2013-04-111-1/+1 |\ \ \ | |/ / | * | Fix supernumerary 's' in sys._debugmallocstats() output.Antoine Pitrou2013-04-111-1/+1 | | | * | | merge 3.3 (#17669)Benjamin Peterson2013-04-101-1/+1 |\ \ \ | |/ / | * | don't run frame if it has no stack (closes #17669)Benjamin Peterson2013-04-101-1/+1 | | | * | | Issue #17615: On Windows (VS2010), Performances of wmemcmp() to compare UnicodeVictor Stinner2013-04-091-9/+0 | | | | | | | | | | | | | | | | | | | | | | | | strings are not convincing. For UCS2 (16-bit wchar_t type), use a dummy loop instead of wmemcmp(). The dummy loop is as fast, or a little bit faster. wchar_t is only 16-bit long on Windows. wmemcmp() is still used for 32-bit wchar_t. * | | replace(): only call PyUnicode_DATA(u) onceVictor Stinner2013-04-091-3/+4 | | | * | | Write super-fast version of str.strip(), str.lstrip() and str.rstrip() for ↵Victor Stinner2013-04-091-19/+45 | | | | | | | | | | | | pure ASCII * | | Don't calls macros in PyUnicode_WRITE() parametersVictor Stinner2013-04-091-2/+10 | | | | | | | | | | | | PyUnicode_WRITE() expands some parameters twice or more. * | | Fix do_strip(): don't call PyUnicode_READ() in Py_UNICODE_ISSPACE() to not callVictor Stinner2013-04-091-3/+10 | | | | | | | | | | | | it twice * | | Fix _PyUnicode_XStrip()Victor Stinner2013-04-091-10/+18 | | | | | | | | | | | | | | | | | | Inline the BLOOM_MEMBER() to only call PyUnicode_READ() only once (per loop iteration). Store also the length of the seperator in a variable to avoid calls to PyUnicode_GET_LENGTH(). * | | Optimize PyUnicode_DecodeCharmap()Victor Stinner2013-04-091-7/+9 | | | | | | | | | | | | | | | Avoid expensive PyUnicode_READ() and PyUnicode_WRITE(), manipulate pointers instead. * | | Optimize make_bloom_mask(), used by str.strip(), str.lstrip() and str.rstrip()Victor Stinner2013-04-091-5/+27 | | | | | | | | | | | | | | | Write specialized functions per Unicode kind to avoid the expensive PyUnicode_READ() macro. * | | Use PyUnicode_READ() instead of PyUnicode_READ_CHAR()Victor Stinner2013-04-091-6/+22 | | | | | | | | | | | | | | | "PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it calls PyUnicode_KIND() and might call it twice." according to its documentation. * | | Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings:Victor Stinner2013-04-091-1/+26 | | | | | | | | | | | | cp037, cp500 and iso8859_1 codecs * | | Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possibleVictor Stinner2013-04-081-0/+22 | | | | | | | | | | | | | | | wmemcmp() is twice faster than a dummy loop (342 usec vs 744 usec) on Fedora 18/x86_64, GCC 4.7.2. * | | Issue #17615: Expand expensive PyUnicode_READ() macro in unicode_compare():Victor Stinner2013-04-081-17/+77 | | | | | | | | | | | | write specialized functions for each combination of Unicode kinds. * | | Close #13126: "Simplify" FASTSEARCH() code to help the compiler to emit moreVictor Stinner2013-04-071-3/+5 | | | | | | | | | | | | | | | | | | | | | efficient machine code. Patch written by Antoine Pitrou. Without this change, str.find() was 10% slower than str.rfind() in the worst case. * | | Revert a premature patch for issue #14010 (changeset 846bd418aee5).Serhiy Storchaka2013-04-061-5/+2 |\ \ \ | |/ / | * | Revert a premature patch for issue #14010 (changeset aaaf36026511).Serhiy Storchaka2013-04-061-5/+2 | | | * | | Issue #14010: Fix a crash when iterating or deleting deeply nested filtersSerhiy Storchaka2013-04-061-2/+5 |\ \ \ | |/ / | | | | | | (builting and in itertools module, i.e. map(), itertools.chain(), etc). | * | Issue #14010: Fix a crash when iterating or deleting deeply nested filtersSerhiy Storchaka2013-04-061-2/+5 | | | | | | | | | | | | (builting and in itertools module, i.e. map(), itertools.chain(), etc). * | | Issue #17469: Fix _Py_GetAllocatedBlocks() and sys.getallocatedblocks() when ↵Antoine Pitrou2013-04-051-3/+5 | | | | | | | | | | | | running on valgrind. * | | fix unused variableVictor Stinner2013-04-031-1/+0 | | | * | | Close #16757: Avoid calling the expensive _PyUnicode_FindMaxChar() functionVictor Stinner2013-04-031-7/+10 | | | | | | | | | | | | when possible * | | Add _PyUnicodeWriter_WriteSubstring() functionVictor Stinner2013-04-022-21/+45 | | | | | | | | | | | | | | | | | | | | | | | | | | | Write a function to enable more optimizations: * If the substring is the whole string and overallocation is disabled, just keep a reference to the string, don't copy characters * Avoid a call to the expensive _PyUnicode_FindMaxChar() function when possible * | | merge 3.3 (#17610)Benjamin Peterson2013-04-011-112/+99 |\ \ \ | |/ / | * | list slotdefs in offset order rather than sorting them (closes #17610)Benjamin Peterson2013-04-01ss-git/tcl.git/stats/library/tzdata/Europe/Brussels?h=dgp_dup_encoding_fix'>statsplain -rw-r--r--Bucharest7706logstatsplain -rw-r--r--Budapest7975logstatsplain -rw-r--r--Busingen178logstatsplain -rw-r--r--Chisinau7824logstatsplain -rw-r--r--Copenhagen7458logstatsplain -rw-r--r--Dublin9476logstatsplain -rw-r--r--Gibraltar9181logstatsplain -rw-r--r--Guernsey178logstatsplain -rw-r--r--Helsinki7120logstatsplain -rw-r--r--Isle_of_Man181logstatsplain -rw-r--r--Istanbul8793logstatsplain -rw-r--r--Jersey176logstatsplain -rw-r--r--Kaliningrad2397logstatsplain -rw-r--r--Kiev7202logstatsplain -rw-r--r--Lisbon9471logstatsplain -rw-r--r--Ljubljana185logstatsplain -rw-r--r--London9839logstatsplain -rw-r--r--Luxembourg8826logstatsplain -rw-r--r--Madrid8282logstatsplain -rw-r--r--Malta8425logstatsplain -rw-r--r--Mariehamn185logstatsplain -rw-r--r--Minsk2101logstatsplain