diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-12-14 16:36:46 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-12-14 16:36:46 (GMT) |
commit | 180a336f1afdcef332189e6bcee314576cadc2bf (patch) | |
tree | 645f10b148e39a1a9a602d426dbe9846a48c62f7 /Lib/io.py | |
parent | ff94552763d5ceb33dd646a534b4d1b56e6162cb (diff) | |
download | cpython-180a336f1afdcef332189e6bcee314576cadc2bf.zip cpython-180a336f1afdcef332189e6bcee314576cadc2bf.tar.gz cpython-180a336f1afdcef332189e6bcee314576cadc2bf.tar.bz2 |
Issue #4574: reading an UTF16-encoded text file crashes if \r on 64-char boundary.
Diffstat (limited to 'Lib/io.py')
-rw-r--r-- | Lib/io.py | 27 |
1 files changed, 12 insertions, 15 deletions
@@ -1282,25 +1282,23 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder): """ def __init__(self, decoder, translate, errors='strict'): codecs.IncrementalDecoder.__init__(self, errors=errors) - self.buffer = b'' self.translate = translate self.decoder = decoder self.seennl = 0 + self.pendingcr = False def decode(self, input, final=False): # decode input (with the eventual \r from a previous pass) - if self.buffer: - input = self.buffer + input - output = self.decoder.decode(input, final=final) + if self.pendingcr and (output or final): + output = "\r" + output + self.pendingcr = False # retain last \r even when not translating data: # then readline() is sure to get \r\n in one pass if output.endswith("\r") and not final: output = output[:-1] - self.buffer = b'\r' - else: - self.buffer = b'' + self.pendingcr = True # Record which newlines are read crlf = output.count('\r\n') @@ -1319,20 +1317,19 @@ class IncrementalNewlineDecoder(codecs.IncrementalDecoder): def getstate(self): buf, flag = self.decoder.getstate() - return buf + self.buffer, flag + flag <<= 1 + if self.pendingcr: + flag |= 1 + return buf, flag def setstate(self, state): buf, flag = state - if buf.endswith(b'\r'): - self.buffer = b'\r' - buf = buf[:-1] - else: - self.buffer = b'' - self.decoder.setstate((buf, flag)) + self.pendingcr = bool(flag & 1) + self.decoder.setstate((buf, flag >> 1)) def reset(self): self.seennl = 0 - self.buffer = b'' + self.pendingcr = False self.decoder.reset() _LF = 1 |