diff options
author | Walter Dörwald <walter@livinglogic.de> | 2003-01-30 19:55:28 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2003-01-30 19:55:28 (GMT) |
commit | b4ff1113caaf849a2c8b3b2b17f1f2418cf6057c (patch) | |
tree | 4d033bb4ca7518a69dda1e75d78912bd901f8818 | |
parent | 2b93c4c70820d0314e8e297c2817d9e03d73df62 (diff) | |
download | cpython-b4ff1113caaf849a2c8b3b2b17f1f2418cf6057c.zip cpython-b4ff1113caaf849a2c8b3b2b17f1f2418cf6057c.tar.gz cpython-b4ff1113caaf849a2c8b3b2b17f1f2418cf6057c.tar.bz2 |
Check whether the choosen encoding requires byte swapping
for this iconv() implementation in the init function.
For encoding: use a byteswapped version of the input if
neccessary.
For decoding: byteswap every piece returned by iconv()
if neccessary (but not those pieces returned from the
callback)
Comment out test_sane() in the test script, because
whether this works depends on whether byte swapping
is neccessary or not (an on Py_UNICODE_SIZE)
-rw-r--r-- | Lib/test/test_iconv_codecs.py | 24 | ||||
-rw-r--r-- | Modules/_iconv_codec.c | 80 |
2 files changed, 93 insertions, 11 deletions
diff --git a/Lib/test/test_iconv_codecs.py b/Lib/test/test_iconv_codecs.py index f64ef9b..9d27faa 100644 --- a/Lib/test/test_iconv_codecs.py +++ b/Lib/test/test_iconv_codecs.py @@ -7,19 +7,23 @@ from StringIO import StringIO class IconvCodecTest(unittest.TestCase): if sys.byteorder == 'big': - spam = '\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m' + spam = '\x00s\x00p\x00a\x00m' * 2 else: - spam = 's\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00' + spam = 's\x00p\x00a\x00m\x00' * 2 def test_sane(self): - self.encoder, self.decoder, self.reader, self.writer = \ - codecs.lookup(_iconv_codec.internal_encoding) - self.assertEqual(self.decoder(self.spam), (u'spamspam', 16)) - self.assertEqual(self.encoder(u'spamspam'), (self.spam, 8)) - self.assertEqual(self.reader(StringIO(self.spam)).read(), u'spamspam') - f = StringIO() - self.writer(f).write(u'spamspam') - self.assertEqual(f.getvalue(), self.spam) + # FIXME: Commented out, because it's not clear whether + # the internal encoding choosen requires byte swapping + # for this iconv() implementation. + if False: + self.encoder, self.decoder, self.reader, self.writer = \ + codecs.lookup(_iconv_codec.internal_encoding) + self.assertEqual(self.decoder(self.spam), (u'spamspam', 16)) + self.assertEqual(self.encoder(u'spamspam'), (self.spam, 8)) + self.assertEqual(self.reader(StringIO(self.spam)).read(), u'spamspam') + f = StringIO() + self.writer(f).write(u'spamspam') + self.assertEqual(f.getvalue(), self.spam) def test_basic_errors(self): self.encoder, self.decoder, self.reader, self.writer = \ diff --git a/Modules/_iconv_codec.c b/Modules/_iconv_codec.c index ccf63be..4248127 100644 --- a/Modules/_iconv_codec.c +++ b/Modules/_iconv_codec.c @@ -42,6 +42,10 @@ PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object"); staticforward PyTypeObject iconvcodec_Type; +/* does the choosen internal encoding require + * byteswapping to get native endianness? + * 0=no, 1=yes, -1=unknown */ +static int byteswap = -1; #define ERROR_STRICT (PyObject *)(1) #define ERROR_IGNORE (PyObject *)(2) @@ -88,6 +92,8 @@ iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) size_t inplen, inplen_total, outlen, outlen_total, estep; PyObject *outputobj = NULL, *errorcb = NULL, *exceptionobj = NULL; + Py_UNICODE *swappedinput; + int swapi; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode", kwlist, &input, &inputlen, &errors)) @@ -121,6 +127,24 @@ iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) out = PyString_AS_STRING(outputobj) + (out - out_top); \ out_top = PyString_AS_STRING(outputobj); \ } + if (byteswap) { + swappedinput = PyMem_Malloc(inplen); + if (swappedinput == NULL) + return NULL; + for (swapi = 0; swapi<inputlen; ++swapi) + { + Py_UNICODE c = input[swapi]; +#if Py_UNICODE_SIZE == 2 + c = ((char *)&c)[0]<<8 | ((char *)&c)[1]; +#else + c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 | + ((char *)&c)[2]<<8 | ((char *)&c)[3]; +#endif + swappedinput[swapi] = c; + } + inp = inp_top = (char *)swappedinput; + } + while (inplen > 0) { if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == -1) { char reason[128]; @@ -253,6 +277,8 @@ errorexit_cbpad: Py_XDECREF(retobj); rettup = PyTuple_New(2); if (rettup == NULL) { Py_DECREF(outputobj); + if (byteswap) + PyMem_Free(swappedinput); return NULL; } PyTuple_SET_ITEM(rettup, 0, outputobj); @@ -266,6 +292,8 @@ errorexit: Py_DECREF(errorcb); } Py_XDECREF(exceptionobj); + if (byteswap) + PyMem_Free(swappedinput); return NULL; } @@ -319,7 +347,27 @@ iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \ } while (inplen > 0) { - if (iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen) == -1) { + char *oldout = out; + char res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen); + + if (byteswap) { + while (oldout < out) + { + char c0 = oldout[0]; +#if Py_UNICODE_SIZE == 2 + oldout[0] = oldout[1]; + oldout[1] = c0; +#else + char c1 = oldout[1]; + oldout[0] = oldout[3]; + oldout[1] = oldout[2]; + oldout[2] = c1; + oldout[3] = c0; +#endif + oldout += sizeof(Py_UNICODE); + } + } + if (res == -1) { char reason[128], *reasonpos = (char *)reason; int errpos; @@ -602,6 +650,36 @@ init_iconv_codec(void) { PyObject *m; + char in = 1; + char *inptr = ∈ + int insize = 1; + Py_UNICODE out = 0; + char *outptr = (char *)&out; + int outsize = sizeof(out); + int res; + + iconv_t hdl = iconv_open(UNICODE_ENCODING, "ASCII"); + + if (hdl == (iconv_t)-1) + Py_FatalError("can't initialize the _iconv_codec module: iconv_open() failed"); + + res = iconv(hdl, &inptr, &insize, &outptr, &outsize); + if (res == -1) + Py_FatalError("can't initialize the _iconv_codec module: iconv() failed"); + + /* Check whether conv() returned native endianess or not for the choosen encoding */ + if (out == 0x1) + byteswap = 0; +#if Py_UNICODE_SIZE == 2 + else if (out == 0x0100) +#else + else if (out == 0x01000000) +#endif + byteswap = 1; + else + Py_FatalError("can't initialize the _iconv_codec module: mixed endianess"); + iconv_close(hdl); + m = Py_InitModule("_iconv_codec", _iconv_codec_methods); PyModule_AddStringConstant(m, "__version__", (char*)__version__); |