summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2007-11-20 23:31:27 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2007-11-20 23:31:27 (GMT)
commit5087980c1e7e6733983245cd0f209d8770f9686e (patch)
tree21cb48e04503c87abd856791ba2a249a40ff1433
parent8c4592a77ae6b71a4bab8d40bbdcea72a6378cb4 (diff)
downloadcpython-5087980c1e7e6733983245cd0f209d8770f9686e.zip
cpython-5087980c1e7e6733983245cd0f209d8770f9686e.tar.gz
cpython-5087980c1e7e6733983245cd0f209d8770f9686e.tar.bz2
The incremental decoder for utf-7 must preserve its state between calls.
Solves issue1460. Might not be a backport candidate: a new API function was added, and some code may rely on details in utf-7.py.
-rw-r--r--Include/unicodeobject.h7
-rw-r--r--Lib/encodings/utf_7.py23
-rw-r--r--Lib/test/test_codecs.py14
-rw-r--r--Modules/_codecsmodule.c19
-rw-r--r--Objects/unicodeobject.c21
5 files changed, 61 insertions, 23 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 3e9dcee..a774d52 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
const char *errors /* error handling */
);
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
+ const char *string, /* UTF-7 encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed /* bytes consumed */
+ );
+
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py
index d78d192..8e0567f 100644
--- a/Lib/encodings/utf_7.py
+++ b/Lib/encodings/utf_7.py
@@ -6,34 +6,31 @@ import codecs
### Codec APIs
-class Codec(codecs.Codec):
+encode = codecs.utf_7_encode
- # Note: Binding these as C functions will result in the class not
- # converting them to methods. This is intended.
- encode = codecs.utf_7_encode
- decode = codecs.utf_7_decode
+def decode(input, errors='strict'):
+ return codecs.utf_7_decode(input, errors, True)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.utf_7_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
- def _buffer_decode(self, input, errors, final):
- return codecs.utf_7_decode(input, self.errors)
+ _buffer_decode = codecs.utf_7_decode
-class StreamWriter(Codec,codecs.StreamWriter):
- pass
+class StreamWriter(codecs.StreamWriter):
+ encode = codecs.utf_7_encode
-class StreamReader(Codec,codecs.StreamReader):
- pass
+class StreamReader(codecs.StreamReader):
+ decode = codecs.utf_7_decode
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='utf-7',
- encode=Codec.encode,
- decode=Codec.decode,
+ encode=encode,
+ decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index bfb417c..cee819c 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase):
self.assertEqual(d.decode("", True), u"")
self.assertEqual(d.buffer, "")
- # Check whether the rest method works properly
+ # Check whether the reset method works properly
d.reset()
result = u""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
@@ -491,7 +491,17 @@ class UTF8Test(ReadTest):
class UTF7Test(ReadTest):
encoding = "utf-7"
- # No test_partial() yet, because UTF-7 doesn't support it.
+ def test_partial(self):
+ self.check_partial(
+ u"a+-b",
+ [
+ u"a",
+ u"a",
+ u"a+",
+ u"a+-",
+ u"a+-b",
+ ]
+ )
class UTF16ExTest(unittest.TestCase):
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 0716f3a..d4eb0d5 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -230,18 +230,25 @@ unicode_internal_decode(PyObject *self,
static PyObject *
utf_7_decode(PyObject *self,
- PyObject *args)
+ PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
+ int final = 0;
+ Py_ssize_t consumed;
+ PyObject *decoded = NULL;
- if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
- &data, &size, &errors))
- return NULL;
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
+ &data, &size, &errors, &final))
+ return NULL;
+ consumed = size;
- return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
- size);
+ decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
+ final ? NULL : &consumed);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
}
static PyObject *
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7043d5f..18b861b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -944,6 +944,14 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
Py_ssize_t size,
const char *errors)
{
+ return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
+}
+
+PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
@@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
unicode = _PyUnicode_New(size);
if (!unicode)
return NULL;
- if (size == 0)
+ if (size == 0) {
+ if (consumed)
+ *consumed = 0;
return (PyObject *)unicode;
+ }
p = unicode->str;
e = s + size;
@@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
goto onError;
}
- if (inShift) {
+ if (inShift && !consumed) {
outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = size;
if (unicode_decode_call_errorhandler(
@@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
if (s < e)
goto restart;
}
+ if (consumed) {
+ if(inShift)
+ *consumed = startinpos;
+ else
+ *consumed = s-starts;
+ }
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
goto onError;