diff options
author | Guido van Rossum <guido@python.org> | 2007-12-03 22:54:21 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-12-03 22:54:21 (GMT) |
commit | e7fc50f2d03a6b62e4b4201c89b2c0185c90f697 (patch) | |
tree | 836034eb187c29177ffaabb74b39ba16ed71ddd1 | |
parent | c6fe37bab927bd00e0f2fed8a431adb7d2b6d303 (diff) | |
download | cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.zip cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.tar.gz cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.tar.bz2 |
Add an errors parameter to open() and TextIOWrapper() to specify error handling.
-rw-r--r-- | Include/fileobject.h | 2 | ||||
-rw-r--r-- | Lib/io.py | 36 | ||||
-rw-r--r-- | Lib/test/test_io.py | 40 | ||||
-rw-r--r-- | Objects/complexobject.c | 1 | ||||
-rw-r--r-- | Objects/fileobject.c | 7 | ||||
-rw-r--r-- | Python/import.c | 2 | ||||
-rw-r--r-- | Python/pythonrun.c | 6 |
7 files changed, 77 insertions, 17 deletions
diff --git a/Include/fileobject.h b/Include/fileobject.h index 0f40089..00ec9be 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -9,7 +9,7 @@ extern "C" { #define PY_STDIOTEXTMODE "b" PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *, - int); + char *, int); PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); @@ -49,8 +49,8 @@ class BlockingIOError(IOError): self.characters_written = characters_written -def open(file, mode="r", buffering=None, encoding=None, newline=None, - closefd=True): +def open(file, mode="r", buffering=None, encoding=None, errors=None, + newline=None, closefd=True): r"""Replacement for the built-in open function. Args: @@ -61,6 +61,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, can be: 0 = unbuffered, 1 = line buffered, larger = fully buffered. encoding: optional string giving the text encoding. + errors: optional string giving the encoding error handling. newline: optional newlines specifier; must be None, '', '\n', '\r' or '\r\n'; all other values are illegal. It controls the handling of line endings. It works as follows: @@ -99,7 +100,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, 'U': universal newline mode (for backwards compatibility) Constraints: - - encoding must not be given when a binary mode is given + - encoding or errors must not be given when a binary mode is given - buffering must not be zero when a text mode is given Returns: @@ -115,6 +116,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, raise TypeError("invalid buffering: %r" % buffering) if encoding is not None and not isinstance(encoding, str): raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) modes = set(mode) if modes - set("arwb+tU") or len(mode) > len(modes): raise ValueError("invalid mode: %r" % mode) @@ -136,6 +139,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, raise ValueError("must have exactly one of read/write/append mode") if binary and encoding is not None: raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") if binary and newline is not None: raise ValueError("binary mode doesn't take a newline argument") raw = FileIO(file, @@ -177,7 +182,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, buffer.name = file buffer.mode = mode return buffer - text = TextIOWrapper(buffer, encoding, newline) + text = TextIOWrapper(buffer, encoding, errors, newline) text.name = file text.mode = mode return text @@ -1128,7 +1133,7 @@ class TextIOWrapper(TextIOBase): _CHUNK_SIZE = 128 - def __init__(self, buffer, encoding=None, newline=None): + def __init__(self, buffer, encoding=None, errors=None, newline=None): if newline not in (None, "", "\n", "\r", "\r\n"): raise ValueError("illegal newline value: %r" % (newline,)) if encoding is None: @@ -1148,8 +1153,15 @@ class TextIOWrapper(TextIOBase): if not isinstance(encoding, str): raise ValueError("invalid encoding: %r" % encoding) + if errors is None: + errors = "strict" + else: + if not isinstance(errors, str): + raise ValueError("invalid errors: %r" % errors) + self.buffer = buffer self._encoding = encoding + self._errors = errors self._readuniversal = not newline self._readtranslate = newline is None self._readnl = newline @@ -1164,6 +1176,10 @@ class TextIOWrapper(TextIOBase): def encoding(self): return self._encoding + @property + def errors(self): + return self._errors + # A word about _snapshot. This attribute is either None, or a # tuple (decoder_state, readahead, pending) where decoder_state is # the second (integer) item of the decoder state, readahead is the @@ -1206,7 +1222,7 @@ class TextIOWrapper(TextIOBase): if haslf and self._writetranslate and self._writenl != "\n": s = s.replace("\n", self._writenl) # XXX What if we were just reading? - b = s.encode(self._encoding) + b = s.encode(self._encoding, self._errors) self.buffer.write(b) if haslf and self.isatty(): self.flush() @@ -1220,7 +1236,7 @@ class TextIOWrapper(TextIOBase): if make_decoder is None: raise IOError("Can't find an incremental decoder for encoding %s" % self._encoding) - decoder = make_decoder() # XXX: errors + decoder = make_decoder(self._errors) if self._readuniversal: decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) self._decoder = decoder @@ -1447,9 +1463,11 @@ class StringIO(TextIOWrapper): # XXX This is really slow, but fully functional - def __init__(self, initial_value="", encoding="utf-8", newline="\n"): + def __init__(self, initial_value="", encoding="utf-8", + errors="strict", newline="\n"): super(StringIO, self).__init__(BytesIO(), encoding=encoding, + errors=errors, newline=newline) if initial_value: if not isinstance(initial_value, str): @@ -1459,4 +1477,4 @@ class StringIO(TextIOWrapper): def getvalue(self): self.flush() - return self.buffer.getvalue().decode(self._encoding) + return self.buffer.getvalue().decode(self._encoding, self._errors) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 7ca3fbb..36aaf14 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -496,6 +496,46 @@ class TextIOWrapperTest(unittest.TestCase): def tearDown(self): test_support.unlink(test_support.TESTFN) + def testEncodingErrorsReading(self): + # (1) default + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii") + self.assertRaises(UnicodeError, t.read) + # (2) explicit strict + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + self.assertRaises(UnicodeError, t.read) + # (3) ignore + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + self.assertEquals(t.read(), "abc\n\n") + # (4) replace + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="replace") + self.assertEquals(t.read(), "abc\n\ufffd\n") + + def testEncodingErrorsWriting(self): + # (1) default + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii") + self.assertRaises(UnicodeError, t.write, "\xff") + # (2) explicit strict + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + self.assertRaises(UnicodeError, t.write, "\xff") + # (3) ignore + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + t.write("abc\xffdef\n") + t.flush() + self.assertEquals(b.getvalue(), b"abcdef\n") + # (4) replace + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="replace") + t.write("abc\xffdef\n") + t.flush() + self.assertEquals(b.getvalue(), b"abc?def\n") + def testNewlinesInput(self): testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 458d0ba..de4641c 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -915,6 +915,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } cr.real = PyFloat_AsDouble(tmp); + cr.imag = 0.0; /* Shut up compiler warning */ Py_DECREF(tmp); } if (i == NULL) { diff --git a/Objects/fileobject.c b/Objects/fileobject.c index f740977..9b3ff3e 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -27,15 +27,16 @@ extern "C" { PyObject * PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, - char *newline, int closefd) + char *errors, char *newline, int closefd) { PyObject *io, *stream, *nameobj = NULL; io = PyImport_ImportModule("io"); if (io == NULL) return NULL; - stream = PyObject_CallMethod(io, "open", "isissi", fd, mode, - buffering, encoding, newline, closefd); + stream = PyObject_CallMethod(io, "open", "isisssi", fd, mode, + buffering, encoding, errors, + newline, closefd); Py_DECREF(io); if (stream == NULL) return NULL; diff --git a/Python/import.c b/Python/import.c index 221c2dd..b7a9752 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2602,7 +2602,7 @@ call_find_module(char *name, PyObject *path) (char*)PyUnicode_GetDefaultEncoding(); } fob = PyFile_FromFd(fd, pathname, fdp->mode, -1, - (char*)encoding, NULL, 1); + (char*)encoding, NULL, NULL, 1); if (fob == NULL) { close(fd); PyMem_FREE(found_encoding); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index f46b90e..14fe783 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -770,7 +770,7 @@ initstdio(void) #endif } else { - if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL, + if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL, NULL, "\n", 0))) { goto error; } @@ -790,7 +790,7 @@ initstdio(void) #endif } else { - if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL, + if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL, NULL, "\n", 0))) { goto error; } @@ -811,7 +811,7 @@ initstdio(void) #endif } else { - if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL, + if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL, NULL, "\n", 0))) { goto error; } |