diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-05-22 17:01:13 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-05-22 17:01:13 (GMT) |
commit | b64d0eba50eeae7fe7ce3b969a8cc80c4578d805 (patch) | |
tree | 0cc759d972ab6789a4dccb359048d79e47b214cc | |
parent | 37b8200608c8c78b45d41b032aed4f9d9e5c8151 (diff) | |
download | cpython-b64d0eba50eeae7fe7ce3b969a8cc80c4578d805.zip cpython-b64d0eba50eeae7fe7ce3b969a8cc80c4578d805.tar.gz cpython-b64d0eba50eeae7fe7ce3b969a8cc80c4578d805.tar.bz2 |
Merged revisions 81474 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
................
r81474 | victor.stinner | 2010-05-22 18:59:09 +0200 (sam., 22 mai 2010) | 20 lines
Merged revisions 81471-81472 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r81471 | victor.stinner | 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) | 7 lines
Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32
* Fix seek() method of codecs.open(), don't write the BOM twice after seek(0)
* Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes
* test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by
Solaris or Windows, but does it really exist? I found it the in the issue.
........
r81472 | victor.stinner | 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) | 4 lines
Fix my last commit (r81471) about codecs
Rememder: don't touch the code just before a commit
........
................
-rw-r--r-- | Lib/codecs.py | 13 | ||||
-rw-r--r-- | Lib/encodings/utf_16.py | 20 | ||||
-rw-r--r-- | Lib/encodings/utf_32.py | 20 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 40 | ||||
-rw-r--r-- | Misc/NEWS | 5 |
5 files changed, 77 insertions, 21 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index 9490602..f6c2448 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -374,6 +374,11 @@ class StreamWriter(Codec): """ pass + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + if whence == 0 and offset == 0: + self.reset() + def __getattr__(self, name, getattr=getattr): @@ -606,8 +611,8 @@ class StreamReader(Codec): Resets the codec buffers used for keeping state. """ - self.reset() self.stream.seek(offset, whence) + self.reset() def __next__(self): @@ -700,8 +705,10 @@ class StreamReaderWriter: self.writer.reset() def seek(self, offset, whence=0): - self.reader.seek(offset, whence) - self.writer.seek(offset, whence) + self.stream.seek(offset, whence) + self.reader.reset() + if whence == 0 and offset == 0: + self.writer.reset() def __getattr__(self, name, getattr=getattr): diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index 5500c06..809bc9a 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -103,17 +103,23 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): class StreamWriter(codecs.StreamWriter): def __init__(self, stream, errors='strict'): - self.bom_written = False codecs.StreamWriter.__init__(self, stream, errors) + self.encoder = None + + def reset(self): + codecs.StreamWriter.reset(self) + self.encoder = None def encode(self, input, errors='strict'): - self.bom_written = True - result = codecs.utf_16_encode(input, errors) - if sys.byteorder == 'little': - self.encode = codecs.utf_16_le_encode + if self.encoder is None: + result = codecs.utf_16_encode(input, errors) + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + return result else: - self.encode = codecs.utf_16_be_encode - return result + return self.encoder(input, errors) class StreamReader(codecs.StreamReader): diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py index f0b7709..c052928 100644 --- a/Lib/encodings/utf_32.py +++ b/Lib/encodings/utf_32.py @@ -98,17 +98,23 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): class StreamWriter(codecs.StreamWriter): def __init__(self, stream, errors='strict'): - self.bom_written = False + self.encoder = None codecs.StreamWriter.__init__(self, stream, errors) + def reset(self): + codecs.StreamWriter.reset(self) + self.encoder = None + def encode(self, input, errors='strict'): - self.bom_written = True - result = codecs.utf_32_encode(input, errors) - if sys.byteorder == 'little': - self.encode = codecs.utf_32_le_encode + if self.encoder is None: + result = codecs.utf_32_encode(input, errors) + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + return result else: - self.encode = codecs.utf_32_be_encode - return result + return self.encoder(input, errors) class StreamReader(codecs.StreamReader): diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 1316572..5d6b545 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1602,8 +1602,8 @@ class BomTest(unittest.TestCase): "utf-32-le", "utf-32-be") for encoding in tests: - with codecs.open('foo', 'w+', encoding=encoding) as f: - # Check if the BOM is written only once + # Check if the BOM is written only once + with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.write(data) f.seek(0) @@ -1611,6 +1611,42 @@ class BomTest(unittest.TestCase): f.seek(0) self.assertEquals(f.read(), data * 2) + # Check that the BOM is written after a seek(0) + with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f: + f.write(data[0]) + self.assertNotEquals(f.tell(), 0) + f.seek(0) + f.write(data) + f.seek(0) + self.assertEquals(f.read(), data) + + # (StreamWriter) Check that the BOM is written after a seek(0) + with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f: + f.writer.write(data[0]) + self.assertNotEquals(f.writer.tell(), 0) + f.writer.seek(0) + f.writer.write(data) + f.seek(0) + self.assertEquals(f.read(), data) + + # Check that the BOM is not written after a seek() at a position + # different than the start + with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f: + f.write(data) + f.seek(f.tell()) + f.write(data) + f.seek(0) + self.assertEquals(f.read(), data * 2) + + # (StreamWriter) Check that the BOM is not written after a seek() + # at a position different than the start + with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f: + f.writer.write(data) + f.writer.seek(f.writer.tell()) + f.writer.write(data) + f.seek(0) + self.assertEquals(f.read(), data * 2) + def test_main(): support.run_unittest( @@ -54,8 +54,9 @@ C-API Library ------- -- Issue #6268: Fix seek() method of codecs.open(), don't read the BOM twice - after seek(0) +- Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM + twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and + StreamWriter classes. - Issue #8782: Add a trailing newline in linecache.updatecache to the last line of files without one. |