diff options
author | Guido van Rossum <guido@python.org> | 2007-12-03 22:54:21 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-12-03 22:54:21 (GMT) |
commit | e7fc50f2d03a6b62e4b4201c89b2c0185c90f697 (patch) | |
tree | 836034eb187c29177ffaabb74b39ba16ed71ddd1 /Lib | |
parent | c6fe37bab927bd00e0f2fed8a431adb7d2b6d303 (diff) | |
download | cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.zip cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.tar.gz cpython-e7fc50f2d03a6b62e4b4201c89b2c0185c90f697.tar.bz2 |
Add an errors parameter to open() and TextIOWrapper() to specify error handling.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/io.py | 36 | ||||
-rw-r--r-- | Lib/test/test_io.py | 40 |
2 files changed, 67 insertions, 9 deletions
@@ -49,8 +49,8 @@ class BlockingIOError(IOError): self.characters_written = characters_written -def open(file, mode="r", buffering=None, encoding=None, newline=None, - closefd=True): +def open(file, mode="r", buffering=None, encoding=None, errors=None, + newline=None, closefd=True): r"""Replacement for the built-in open function. Args: @@ -61,6 +61,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, can be: 0 = unbuffered, 1 = line buffered, larger = fully buffered. encoding: optional string giving the text encoding. + errors: optional string giving the encoding error handling. newline: optional newlines specifier; must be None, '', '\n', '\r' or '\r\n'; all other values are illegal. It controls the handling of line endings. It works as follows: @@ -99,7 +100,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, 'U': universal newline mode (for backwards compatibility) Constraints: - - encoding must not be given when a binary mode is given + - encoding or errors must not be given when a binary mode is given - buffering must not be zero when a text mode is given Returns: @@ -115,6 +116,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, raise TypeError("invalid buffering: %r" % buffering) if encoding is not None and not isinstance(encoding, str): raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) modes = set(mode) if modes - set("arwb+tU") or len(mode) > len(modes): raise ValueError("invalid mode: %r" % mode) @@ -136,6 +139,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, raise ValueError("must have exactly one of read/write/append mode") if binary and encoding is not None: raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") if binary and newline is not None: raise ValueError("binary mode doesn't take a newline argument") raw = FileIO(file, @@ -177,7 +182,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None, buffer.name = file buffer.mode = mode return buffer - text = TextIOWrapper(buffer, encoding, newline) + text = TextIOWrapper(buffer, encoding, errors, newline) text.name = file text.mode = mode return text @@ -1128,7 +1133,7 @@ class TextIOWrapper(TextIOBase): _CHUNK_SIZE = 128 - def __init__(self, buffer, encoding=None, newline=None): + def __init__(self, buffer, encoding=None, errors=None, newline=None): if newline not in (None, "", "\n", "\r", "\r\n"): raise ValueError("illegal newline value: %r" % (newline,)) if encoding is None: @@ -1148,8 +1153,15 @@ class TextIOWrapper(TextIOBase): if not isinstance(encoding, str): raise ValueError("invalid encoding: %r" % encoding) + if errors is None: + errors = "strict" + else: + if not isinstance(errors, str): + raise ValueError("invalid errors: %r" % errors) + self.buffer = buffer self._encoding = encoding + self._errors = errors self._readuniversal = not newline self._readtranslate = newline is None self._readnl = newline @@ -1164,6 +1176,10 @@ class TextIOWrapper(TextIOBase): def encoding(self): return self._encoding + @property + def errors(self): + return self._errors + # A word about _snapshot. This attribute is either None, or a # tuple (decoder_state, readahead, pending) where decoder_state is # the second (integer) item of the decoder state, readahead is the @@ -1206,7 +1222,7 @@ class TextIOWrapper(TextIOBase): if haslf and self._writetranslate and self._writenl != "\n": s = s.replace("\n", self._writenl) # XXX What if we were just reading? - b = s.encode(self._encoding) + b = s.encode(self._encoding, self._errors) self.buffer.write(b) if haslf and self.isatty(): self.flush() @@ -1220,7 +1236,7 @@ class TextIOWrapper(TextIOBase): if make_decoder is None: raise IOError("Can't find an incremental decoder for encoding %s" % self._encoding) - decoder = make_decoder() # XXX: errors + decoder = make_decoder(self._errors) if self._readuniversal: decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) self._decoder = decoder @@ -1447,9 +1463,11 @@ class StringIO(TextIOWrapper): # XXX This is really slow, but fully functional - def __init__(self, initial_value="", encoding="utf-8", newline="\n"): + def __init__(self, initial_value="", encoding="utf-8", + errors="strict", newline="\n"): super(StringIO, self).__init__(BytesIO(), encoding=encoding, + errors=errors, newline=newline) if initial_value: if not isinstance(initial_value, str): @@ -1459,4 +1477,4 @@ class StringIO(TextIOWrapper): def getvalue(self): self.flush() - return self.buffer.getvalue().decode(self._encoding) + return self.buffer.getvalue().decode(self._encoding, self._errors) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 7ca3fbb..36aaf14 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -496,6 +496,46 @@ class TextIOWrapperTest(unittest.TestCase): def tearDown(self): test_support.unlink(test_support.TESTFN) + def testEncodingErrorsReading(self): + # (1) default + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii") + self.assertRaises(UnicodeError, t.read) + # (2) explicit strict + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + self.assertRaises(UnicodeError, t.read) + # (3) ignore + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + self.assertEquals(t.read(), "abc\n\n") + # (4) replace + b = io.BytesIO(b"abc\n\xff\n") + t = io.TextIOWrapper(b, encoding="ascii", errors="replace") + self.assertEquals(t.read(), "abc\n\ufffd\n") + + def testEncodingErrorsWriting(self): + # (1) default + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii") + self.assertRaises(UnicodeError, t.write, "\xff") + # (2) explicit strict + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + self.assertRaises(UnicodeError, t.write, "\xff") + # (3) ignore + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + t.write("abc\xffdef\n") + t.flush() + self.assertEquals(b.getvalue(), b"abcdef\n") + # (4) replace + b = io.BytesIO() + t = io.TextIOWrapper(b, encoding="ascii", errors="replace") + t.write("abc\xffdef\n") + t.flush() + self.assertEquals(b.getvalue(), b"abc?def\n") + def testNewlinesInput(self): testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") |