summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-22 19:26:01 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-22 19:26:01 (GMT)
commit5a77fe92bd26db11684e4922707d80f654be31dc (patch)
tree47fcd8c286d773367b084741c88df5a8accbfa8e
parentf678e822406a5063b294036f5521c2294a6e20bd (diff)
downloadcpython-5a77fe92bd26db11684e4922707d80f654be31dc.zip
cpython-5a77fe92bd26db11684e4922707d80f654be31dc.tar.gz
cpython-5a77fe92bd26db11684e4922707d80f654be31dc.tar.bz2
Issue #1548891: The cStringIO.StringIO() constructor now encodes unicode
arguments with the system default encoding just like the write() method does, instead of converting it to a raw buffer.
-rw-r--r--Doc/library/stringio.rst5
-rw-r--r--Lib/test/test_StringIO.py21
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/cStringIO.c6
4 files changed, 31 insertions, 5 deletions
diff --git a/Doc/library/stringio.rst b/Doc/library/stringio.rst
index 0177da4..612ddcd 100644
--- a/Doc/library/stringio.rst
+++ b/Doc/library/stringio.rst
@@ -82,10 +82,7 @@ instead.
those cases.
Unlike the :mod:`StringIO` module, this module is not able to accept Unicode
- strings that cannot be encoded as plain ASCII strings. Calling
- :func:`StringIO` with a Unicode string parameter populates the object with
- the buffer representation of the Unicode string instead of encoding the
- string.
+ strings that cannot be encoded as plain ASCII strings.
Another difference from the :mod:`StringIO` module is that calling
:func:`StringIO` with a string parameter creates a read-only object. Unlike an
diff --git a/Lib/test/test_StringIO.py b/Lib/test/test_StringIO.py
index 1459e61..bf0c733 100644
--- a/Lib/test/test_StringIO.py
+++ b/Lib/test/test_StringIO.py
@@ -134,6 +134,27 @@ class TestcStringIO(TestGenericStringIO):
f = self.MODULE.StringIO(a)
self.assertEqual(f.getvalue(), '\x00\x01\x02')
+ def test_unicode(self):
+
+ if not test_support.have_unicode: return
+
+ # The cStringIO module converts Unicode strings to character
+ # strings when writing them to cStringIO objects.
+ # Check that this works.
+
+ f = self.MODULE.StringIO()
+ f.write(u'abcde')
+ s = f.getvalue()
+ self.assertEqual(s, 'abcde')
+ self.assertEqual(type(s), str)
+
+ f = self.MODULE.StringIO(u'abcde')
+ s = f.getvalue()
+ self.assertEqual(s, 'abcde')
+ self.assertEqual(type(s), str)
+
+ self.assertRaises(UnicodeEncodeError, self.MODULE.StringIO, u'\xf4')
+
import sys
if sys.platform.startswith('java'):
diff --git a/Misc/NEWS b/Misc/NEWS
index dd2874b..b4f9be2 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -66,6 +66,10 @@ Core and Builtins
Library
-------
+- Issue #1548891: The cStringIO.StringIO() constructor now encodes unicode
+ arguments with the system default encoding just like the write() method
+ does, instead of converting it to a raw buffer.
+
- Issue #9168: now smtpd is able to bind privileged port.
- Issue #12529: fix cgi.parse_header issue on strings with double-quotes and
diff --git a/Modules/cStringIO.c b/Modules/cStringIO.c
index be9ad50..89f1dd6 100644
--- a/Modules/cStringIO.c
+++ b/Modules/cStringIO.c
@@ -661,7 +661,11 @@ newIobject(PyObject *s) {
char *buf;
Py_ssize_t size;
- if (PyObject_AsReadBuffer(s, (const void **)&buf, &size)) {
+ if (PyUnicode_Check(s)) {
+ if (PyObject_AsCharBuffer(s, (const char **)&buf, &size) != 0)
+ return NULL;
+ }
+ else if (PyObject_AsReadBuffer(s, (const void **)&buf, &size)) {
PyErr_Format(PyExc_TypeError, "expected read buffer, %.200s found",
s->ob_type->tp_name);
return NULL;