diff options
-rw-r--r-- | Include/unicodeobject.h | 7 | ||||
-rw-r--r-- | Lib/encodings/utf_7.py | 23 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 14 | ||||
-rw-r--r-- | Modules/_codecsmodule.c | 19 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 21 |
5 files changed, 61 insertions, 23 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 3e9dcee..a774d52 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( const char *errors /* error handling */ ); +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py index d78d192..8e0567f 100644 --- a/Lib/encodings/utf_7.py +++ b/Lib/encodings/utf_7.py @@ -6,34 +6,31 @@ import codecs ### Codec APIs -class Codec(codecs.Codec): +encode = codecs.utf_7_encode - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.utf_7_encode - decode = codecs.utf_7_decode +def decode(input, errors='strict'): + return codecs.utf_7_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.utf_7_encode(input, self.errors)[0] class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, input, errors, final): - return codecs.utf_7_decode(input, self.errors) + _buffer_decode = codecs.utf_7_decode -class StreamWriter(Codec,codecs.StreamWriter): - pass +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_7_encode -class StreamReader(Codec,codecs.StreamReader): - pass +class StreamReader(codecs.StreamReader): + decode = codecs.utf_7_decode ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-7', - encode=Codec.encode, - decode=Codec.decode, + encode=encode, + decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index bfb417c..cee819c 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase): self.assertEqual(d.decode("", True), u"") self.assertEqual(d.buffer, "") - # Check whether the rest method works properly + # Check whether the reset method works properly d.reset() result = u"" for (c, partialresult) in zip(input.encode(self.encoding), partialresults): @@ -491,7 +491,17 @@ class UTF8Test(ReadTest): class UTF7Test(ReadTest): encoding = "utf-7" - # No test_partial() yet, because UTF-7 doesn't support it. + def test_partial(self): + self.check_partial( + u"a+-b", + [ + u"a", + u"a", + u"a+", + u"a+-", + u"a+-b", + ] + ) class UTF16ExTest(unittest.TestCase): diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 0716f3a..d4eb0d5 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -230,18 +230,25 @@ unicode_internal_decode(PyObject *self, static PyObject * utf_7_decode(PyObject *self, - PyObject *args) + PyObject *args) { const char *data; Py_ssize_t size; const char *errors = NULL; + int final = 0; + Py_ssize_t consumed; + PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode", - &data, &size, &errors)) - return NULL; + if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode", + &data, &size, &errors, &final)) + return NULL; + consumed = size; - return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors), - size); + decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors, + final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); } static PyObject * diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7043d5f..18b861b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -944,6 +944,14 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, Py_ssize_t size, const char *errors) { + return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); +} + +PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; @@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, unicode = _PyUnicode_New(size); if (!unicode) return NULL; - if (size == 0) + if (size == 0) { + if (consumed) + *consumed = 0; return (PyObject *)unicode; + } p = unicode->str; e = s + size; @@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, goto onError; } - if (inShift) { + if (inShift && !consumed) { outpos = p-PyUnicode_AS_UNICODE(unicode); endinpos = size; if (unicode_decode_call_errorhandler( @@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, if (s < e) goto restart; } + if (consumed) { + if(inShift) + *consumed = startinpos; + else + *consumed = s-starts; + } if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0) goto onError; |