summaryrefslogtreecommitdiffstats
path: root/Lib/codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py105
1 files changed, 54 insertions, 51 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index f4cd60a..b150d64 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -7,13 +7,13 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
-import __builtin__, sys
+import builtins, sys
### Registry and builtin stateless codec functions
try:
from _codecs import *
-except ImportError, why:
+except ImportError as why:
raise SystemError('Failed to load the builtin codecs: %s' % why)
__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
@@ -33,19 +33,19 @@ __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
#
# UTF-8
-BOM_UTF8 = '\xef\xbb\xbf'
+BOM_UTF8 = b'\xef\xbb\xbf'
# UTF-16, little endian
-BOM_LE = BOM_UTF16_LE = '\xff\xfe'
+BOM_LE = BOM_UTF16_LE = b'\xff\xfe'
# UTF-16, big endian
-BOM_BE = BOM_UTF16_BE = '\xfe\xff'
+BOM_BE = BOM_UTF16_BE = b'\xfe\xff'
# UTF-32, little endian
-BOM_UTF32_LE = '\xff\xfe\x00\x00'
+BOM_UTF32_LE = b'\xff\xfe\x00\x00'
# UTF-32, big endian
-BOM_UTF32_BE = '\x00\x00\xfe\xff'
+BOM_UTF32_BE = b'\x00\x00\xfe\xff'
if sys.byteorder == 'little':
@@ -87,7 +87,9 @@ class CodecInfo(tuple):
return self
def __repr__(self):
- return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+ return "<%s.%s object for encoding %s at 0x%x>" % \
+ (self.__class__.__module__, self.__class__.__name__,
+ self.name, id(self))
class Codec:
@@ -155,9 +157,9 @@ class Codec:
class IncrementalEncoder(object):
"""
- An IncrementalEncoder encodes an input in multiple steps. The input can be
- passed piece by piece to the encode() method. The IncrementalEncoder remembers
- the state of the Encoding process between calls to encode().
+ An IncrementalEncoder encodes an input in multiple steps. The input can
+ be passed piece by piece to the encode() method. The IncrementalEncoder
+ remembers the state of the encoding process between calls to encode().
"""
def __init__(self, errors='strict'):
"""
@@ -201,7 +203,8 @@ class BufferedIncrementalEncoder(IncrementalEncoder):
"""
def __init__(self, errors='strict'):
IncrementalEncoder.__init__(self, errors)
- self.buffer = "" # unencoded input that is kept between calls to encode()
+ # unencoded input that is kept between calls to encode()
+ self.buffer = ""
def _buffer_encode(self, input, errors, final):
# Overwrite this method in subclasses: It must encode input
@@ -228,13 +231,13 @@ class BufferedIncrementalEncoder(IncrementalEncoder):
class IncrementalDecoder(object):
"""
- An IncrementalDecoder decodes an input in multiple steps. The input can be
- passed piece by piece to the decode() method. The IncrementalDecoder
+ An IncrementalDecoder decodes an input in multiple steps. The input can
+ be passed piece by piece to the decode() method. The IncrementalDecoder
remembers the state of the decoding process between calls to decode().
"""
def __init__(self, errors='strict'):
"""
- Creates a IncrementalDecoder instance.
+ Create a IncrementalDecoder instance.
The IncrementalDecoder may use different error handling schemes by
providing the errors keyword argument. See the module docstring
@@ -244,13 +247,13 @@ class IncrementalDecoder(object):
def decode(self, input, final=False):
"""
- Decodes input and returns the resulting object.
+ Decode input and returns the resulting object.
"""
raise NotImplementedError
def reset(self):
"""
- Resets the decoder to the initial state.
+ Reset the decoder to the initial state.
"""
def getstate(self):
@@ -278,12 +281,13 @@ class IncrementalDecoder(object):
class BufferedIncrementalDecoder(IncrementalDecoder):
"""
This subclass of IncrementalDecoder can be used as the baseclass for an
- incremental decoder if the decoder must be able to handle incomplete byte
- sequences.
+ incremental decoder if the decoder must be able to handle incomplete
+ byte sequences.
"""
def __init__(self, errors='strict'):
IncrementalDecoder.__init__(self, errors)
- self.buffer = "" # undecoded input that is kept between calls to decode()
+ # undecoded input that is kept between calls to decode()
+ self.buffer = b""
def _buffer_decode(self, input, errors, final):
# Overwrite this method in subclasses: It must decode input
@@ -300,7 +304,7 @@ class BufferedIncrementalDecoder(IncrementalDecoder):
def reset(self):
IncrementalDecoder.reset(self)
- self.buffer = ""
+ self.buffer = b""
def getstate(self):
# additional state info is always 0
@@ -392,6 +396,8 @@ class StreamWriter(Codec):
class StreamReader(Codec):
+ charbuffertype = str
+
def __init__(self, stream, errors='strict'):
""" Creates a StreamReader instance.
@@ -412,10 +418,9 @@ class StreamReader(Codec):
"""
self.stream = stream
self.errors = errors
- self.bytebuffer = ""
- # For str->str decoding this will stay a str
- # For str->unicode decoding the first read will promote it to unicode
- self.charbuffer = ""
+ self.bytebuffer = b""
+ self._empty_charbuffer = self.charbuffertype()
+ self.charbuffer = self._empty_charbuffer
self.linebuffer = None
def decode(self, input, errors='strict'):
@@ -451,7 +456,7 @@ class StreamReader(Codec):
"""
# If we have lines cached, first merge them back into characters
if self.linebuffer:
- self.charbuffer = "".join(self.linebuffer)
+ self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
self.linebuffer = None
# read until we get the required number of characters (if available)
@@ -475,9 +480,10 @@ class StreamReader(Codec):
data = self.bytebuffer + newdata
try:
newchars, decodedbytes = self.decode(data, self.errors)
- except UnicodeDecodeError, exc:
+ except UnicodeDecodeError as exc:
if firstline:
- newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
+ newchars, decodedbytes = \
+ self.decode(data[:exc.start], self.errors)
lines = newchars.splitlines(True)
if len(lines)<=1:
raise
@@ -493,7 +499,7 @@ class StreamReader(Codec):
if chars < 0:
# Return everything we've got
result = self.charbuffer
- self.charbuffer = ""
+ self.charbuffer = self._empty_charbuffer
else:
# Return the first chars characters
result = self.charbuffer[:chars]
@@ -524,7 +530,7 @@ class StreamReader(Codec):
return line
readsize = size or 72
- line = ""
+ line = self._empty_charbuffer
# If size is given, we call read() only once
while True:
data = self.read(readsize, firstline=True)
@@ -532,7 +538,8 @@ class StreamReader(Codec):
# If we're at a "\r" read one extra character (which might
# be a "\n") to get a proper line ending. If the stream is
# temporarily exhausted we return the wrong line ending.
- if data.endswith("\r"):
+ if (isinstance(data, str) and data.endswith("\r")) or \
+ (isinstance(data, bytes) and data.endswith(b"\r")):
data += self.read(size=1, chars=1)
line += data
@@ -558,7 +565,8 @@ class StreamReader(Codec):
line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end
# Put the rest back together and keep it until the next call
- self.charbuffer = "".join(lines[1:]) + self.charbuffer
+ self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
+ self.charbuffer
if keepends:
line = line0withend
else:
@@ -569,7 +577,7 @@ class StreamReader(Codec):
if line and not keepends:
line = line.splitlines(False)[0]
break
- if readsize<8000:
+ if readsize < 8000:
readsize *= 2
return line
@@ -597,8 +605,8 @@ class StreamReader(Codec):
from decoding errors.
"""
- self.bytebuffer = ""
- self.charbuffer = u""
+ self.bytebuffer = b""
+ self.charbuffer = self._empty_charbuffer
self.linebuffer = None
def seek(self, offset, whence=0):
@@ -609,7 +617,7 @@ class StreamReader(Codec):
self.stream.seek(offset, whence)
self.reset()
- def next(self):
+ def __next__(self):
""" Return the next decoded line from the input stream."""
line = self.readline()
@@ -678,10 +686,10 @@ class StreamReaderWriter:
return self.reader.readlines(sizehint)
- def next(self):
+ def __next__(self):
""" Return the next decoded line from the input stream."""
- return self.reader.next()
+ return next(self.reader)
def __iter__(self):
return self
@@ -797,10 +805,10 @@ class StreamRecoder:
data, bytesencoded = self.encode(data, self.errors)
return data.splitlines(1)
- def next(self):
+ def __next__(self):
""" Return the next decoded line from the input stream."""
- data = self.reader.next()
+ data = next(self.reader)
data, bytesencoded = self.encode(data, self.errors)
return data
@@ -869,16 +877,11 @@ def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
parameter.
"""
- if encoding is not None:
- if 'U' in mode:
- # No automatic conversion of '\n' is done on reading and writing
- mode = mode.strip().replace('U', '')
- if mode[:1] not in set('rwa'):
- mode = 'r' + mode
- if 'b' not in mode:
- # Force opening of the file in binary mode
- mode = mode + 'b'
- file = __builtin__.open(filename, mode, buffering)
+ if encoding is not None and \
+ 'b' not in mode:
+ # Force opening of the file in binary mode
+ mode = mode + 'b'
+ file = builtins.open(filename, mode, buffering)
if encoding is None:
return file
info = lookup(encoding)
@@ -1025,7 +1028,7 @@ def iterdecode(iterator, encoding, errors='strict', **kwargs):
output = decoder.decode(input)
if output:
yield output
- output = decoder.decode("", True)
+ output = decoder.decode(b"", True)
if output:
yield output