From 5d14c2b8f87b23a9257c97b5f98dd097ca289c78 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 20 Nov 2007 23:38:09 +0000 Subject: Merged revisions 59056-59076 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r59064 | christian.heimes | 2007-11-20 02:48:48 +0100 (Tue, 20 Nov 2007) | 1 line Fixed bug #1470 ........ r59066 | martin.v.loewis | 2007-11-20 03:46:02 +0100 (Tue, 20 Nov 2007) | 2 lines Patch #1468: Package Lib/test/*.pem. ........ r59068 | christian.heimes | 2007-11-20 04:21:02 +0100 (Tue, 20 Nov 2007) | 1 line Another fix for test_shutil. Martin pointed out that it breaks some build bots ........ r59073 | nick.coghlan | 2007-11-20 15:55:57 +0100 (Tue, 20 Nov 2007) | 1 line Backport some main.c cleanup from the py3k branch ........ r59076 | amaury.forgeotdarc | 2007-11-21 00:31:27 +0100 (Wed, 21 Nov 2007) | 6 lines The incremental decoder for utf-7 must preserve its state between calls. Solves issue1460. Might not be a backport candidate: a new API function was added, and some code may rely on details in utf-7.py. ........ --- Include/unicodeobject.h | 7 +++++++ Lib/encodings/utf_7.py | 23 ++++++++++------------- Lib/test/test_codecs.py | 12 +++++++++++- Modules/_codecsmodule.c | 19 +++++++++++++------ Objects/unicodeobject.c | 21 +++++++++++++++++++-- Tools/msi/msi.py | 1 + 6 files changed, 61 insertions(+), 22 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 203dcef..f3c37fe 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -751,6 +751,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( const char *errors /* error handling */ ); +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( const Py_UNICODE *data, /* Unicode char buffer */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */ diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py index d78d192..8e0567f 100644 --- a/Lib/encodings/utf_7.py +++ b/Lib/encodings/utf_7.py @@ -6,34 +6,31 @@ import codecs ### Codec APIs -class Codec(codecs.Codec): +encode = codecs.utf_7_encode - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.utf_7_encode - decode = codecs.utf_7_decode +def decode(input, errors='strict'): + return codecs.utf_7_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.utf_7_encode(input, self.errors)[0] class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, input, errors, final): - return codecs.utf_7_decode(input, self.errors) + _buffer_decode = codecs.utf_7_decode -class StreamWriter(Codec,codecs.StreamWriter): - pass +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_7_encode -class StreamReader(Codec,codecs.StreamReader): - pass +class StreamReader(codecs.StreamReader): + decode = codecs.utf_7_decode ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-7', - encode=Codec.encode, - decode=Codec.decode, + encode=encode, + decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 413a5aa..a828edf 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -544,7 +544,17 @@ class UTF8Test(ReadTest): class UTF7Test(ReadTest): encoding = "utf-7" - # No test_partial() yet, because UTF-7 doesn't support it. + def test_partial(self): + self.check_partial( + "a+-b", + [ + "a", + "a", + "a+", + "a+-", + "a+-b", + ] + ) class UTF16ExTest(unittest.TestCase): diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index caee3fd..caaac58 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -250,18 +250,25 @@ unicode_internal_decode(PyObject *self, static PyObject * utf_7_decode(PyObject *self, - PyObject *args) + PyObject *args) { const char *data; Py_ssize_t size; const char *errors = NULL; + int final = 0; + Py_ssize_t consumed; + PyObject *decoded = NULL; - if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode", - &data, &size, &errors)) - return NULL; + if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode", + &data, &size, &errors, &final)) + return NULL; + consumed = size; - return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors), - size); + decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors, + final ? NULL : &consumed); + if (decoded == NULL) + return NULL; + return codec_tuple(decoded, consumed); } static PyObject * diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 426dc07..65c2980 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1519,6 +1519,14 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, Py_ssize_t size, const char *errors) { + return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); +} + +PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ const char *starts = s; Py_ssize_t startinpos; Py_ssize_t endinpos; @@ -1537,8 +1545,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, unicode = _PyUnicode_New(size); if (!unicode) return NULL; - if (size == 0) + if (size == 0) { + if (consumed) + *consumed = 0; return (PyObject *)unicode; + } p = unicode->str; e = s + size; @@ -1624,7 +1635,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, goto onError; } - if (inShift) { + if (inShift && !consumed) { outpos = p-PyUnicode_AS_UNICODE(unicode); endinpos = size; if (unicode_decode_call_errorhandler( @@ -1636,6 +1647,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, if (s < e) goto restart; } + if (consumed) { + if(inShift) + *consumed = startinpos; + else + *consumed = s-starts; + } if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0) goto onError; diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py index dffd02e..45f5a2e 100644 --- a/Tools/msi/msi.py +++ b/Tools/msi/msi.py @@ -974,6 +974,7 @@ def add_files(db): lib.add_file("check_soundcard.vbs") lib.add_file("empty.vbs") lib.glob("*.uue") + lib.glob("*.pem") lib.add_file("readme.txt", src="README") if dir=='decimaltestdata': lib.glob("*.decTest") -- cgit v0.12