diff options
-rw-r--r-- | Lib/test/test_multibytecodec_support.py | 36 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_hk.c | 93 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_jp.c | 10 | ||||
-rw-r--r-- | Python/import.c | 12 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | configure.in | 1 |
6 files changed, 95 insertions, 59 deletions
diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 7735976..ef63b69 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -58,11 +58,16 @@ class TestBase: result = func(source, scheme)[0] if func is self.decode: self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertTrue(type(result) is bytes, type(result)) - self.assertEqual(result, expected, - '%a.decode(%r)=%a != %a' - % (source, self.encoding, result, expected)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertRaises(UnicodeError, func, source, scheme) @@ -279,6 +284,7 @@ class TestBase_Mapping(unittest.TestCase): pass_enctest = [] pass_dectest = [] supmaps = [] + codectests = [] def __init__(self, *args, **kw): unittest.TestCase.__init__(self, *args, **kw) @@ -348,6 +354,30 @@ class TestBase_Mapping(unittest.TestCase): if (csetch, unich) not in self.pass_dectest: self.assertEqual(str(csetch, self.encoding), unich) + def test_errorhandle(self): + for source, scheme, expected in self.codectests: + if isinstance(source, bytes): + func = source.decode + else: + func = source.encode + if expected: + if isinstance(source, bytes): + result = func(self.encoding, scheme) + self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + result = func(self.encoding, scheme) + self.assertTrue(type(result) is bytes, type(result)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + self.assertRaises(UnicodeError, func, self.encoding, scheme) + def load_teststring(name): dir = os.path.join(os.path.dirname(__file__), 'cjkencodings') with open(os.path.join(dir, name + '.txt'), 'rb') as f: diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index aaf103d..558a42f 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -115,55 +115,56 @@ DECODER(big5hkscs) REQUIRE_INBUF(2) - if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) - goto hkscsdec; + if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + continue; + } + } + + TRYMAP_DEC(big5hkscs, decoded, c, IN2) + { + int s = BH2S(c, IN2); + const unsigned char *hintbase; + + assert(0x87 <= c && c <= 0xfe); + assert(0x40 <= IN2 && IN2 <= 0xfe); + + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + hintbase = big5hkscs_phint_0; + s -= BH2S(0x87, 0x40); + } + else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ + hintbase = big5hkscs_phint_12130; + s -= BH2S(0xc6, 0xa1); + } + else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ + hintbase = big5hkscs_phint_21924; + s -= BH2S(0xf9, 0xd6); + } + else + return MBERR_INTERNAL; - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + if (hintbase[s >> 3] & (1 << (s & 7))) { + WRITEUCS4(decoded | 0x20000) + NEXT_IN(2) + } + else { + OUT1(decoded) + NEXT(2, 1) + } + continue; } - else -hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { - int s = BH2S(c, IN2); - const unsigned char *hintbase; - - assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); - - if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { - hintbase = big5hkscs_phint_0; - s -= BH2S(0x87, 0x40); - } - else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_12130; - s -= BH2S(0xc6, 0xa1); - } - else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21924; - s -= BH2S(0xf9, 0xd6); - } - else - return MBERR_INTERNAL; - - if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) - } - else { - OUT1(decoded) - NEXT(2, 1) - } - } - else { - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; - } - - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 2; } + + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 901d3be..a05e01b 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -371,11 +371,11 @@ DECODER(euc_jp) REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } if (c == 0x8e) { /* JIS X 0201 half-width katakana */ diff --git a/Python/import.c b/Python/import.c index 1f28d22..aaf649f 100644 --- a/Python/import.c +++ b/Python/import.c @@ -110,8 +110,12 @@ typedef unsigned short mode_t; TAG and PYC_TAG_UNICODE must change for each major Python release. The magic number will take care of any bytecode changes that occur during development. */ +#define QUOTE(arg) #arg +#define STRIFY(name) QUOTE(name) +#define MAJOR STRIFY(PY_MAJOR_VERSION) +#define MINOR STRIFY(PY_MINOR_VERSION) #define MAGIC (3180 | ((long)'\r'<<16) | ((long)'\n'<<24)) -#define TAG "cpython-32" +#define TAG "cpython-" MAJOR MINOR; #define CACHEDIR "__pycache__" static const Py_UNICODE CACHEDIR_UNICODE[] = { '_', '_', 'p', 'y', 'c', 'a', 'c', 'h', 'e', '_', '_', '\0'}; @@ -119,7 +123,11 @@ static const Py_UNICODE CACHEDIR_UNICODE[] = { static long pyc_magic = MAGIC; static const char *pyc_tag = TAG; static const Py_UNICODE PYC_TAG_UNICODE[] = { - 'c', 'p', 'y', 't', 'h', 'o', 'n', '-', '3', '2', '\0'}; + 'c', 'p', 'y', 't', 'h', 'o', 'n', '-', PY_MAJOR_VERSION + 48, PY_MINOR_VERSION + 48, '\0'}; +#undef QUOTE +#undef STRIFY +#undef MAJOR +#undef MINOR /* See _PyImport_FixupExtensionObject() below */ static PyObject *extensions = NULL; @@ -1,5 +1,4 @@ #! /bin/sh -# From configure.in HGVERSION. # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.65 for python 3.3. # @@ -2746,7 +2745,6 @@ else fi - ac_config_headers="$ac_config_headers pyconfig.h" diff --git a/configure.in b/configure.in index 71f488d..274c68d 100644 --- a/configure.in +++ b/configure.in @@ -30,7 +30,6 @@ else HGBRANCH="" fi -AC_REVISION(HGVERSION) AC_CONFIG_SRCDIR([Include/object.h]) AC_CONFIG_HEADER(pyconfig.h) |