diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pyio.py | 1831 | ||||
-rw-r--r-- | Lib/importlib/__init__.py | 4 | ||||
-rw-r--r-- | Lib/importlib/_bootstrap.py | 4 | ||||
-rw-r--r-- | Lib/io.py | 2118 | ||||
-rw-r--r-- | Lib/test/test_bufio.py | 21 | ||||
-rw-r--r-- | Lib/test/test_descr.py | 4 | ||||
-rw-r--r-- | Lib/test/test_file.py | 47 | ||||
-rw-r--r-- | Lib/test/test_fileio.py | 54 | ||||
-rw-r--r-- | Lib/test/test_io.py | 1518 | ||||
-rw-r--r-- | Lib/test/test_largefile.py | 35 | ||||
-rw-r--r-- | Lib/test/test_memoryio.py | 189 | ||||
-rw-r--r-- | Lib/test/test_univnewlines.py | 36 | ||||
-rw-r--r-- | Lib/test/test_uu.py | 2 |
13 files changed, 3280 insertions, 2583 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py new file mode 100644 index 0000000..5f2e400 --- /dev/null +++ b/Lib/_pyio.py @@ -0,0 +1,1831 @@ +""" +Python implementation of the io module. +""" + +import os +import abc +import codecs +# Import _thread instead of threading to reduce startup cost +try: + from _thread import allocate_lock as Lock +except ImportError: + from _dummy_thread import allocate_lock as Lock + +import io +from io import __all__ + +# open() uses st_blksize whenever we can +DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes + +# NOTE: Base classes defined here are registered with the "official" ABCs +# defined in io.py. We don't use real inheritance though, because we don't +# want to inherit the C implementations. + + +class BlockingIOError(IOError): + + """Exception raised when I/O would block on a non-blocking I/O stream.""" + + def __init__(self, errno, strerror, characters_written=0): + super().__init__(errno, strerror) + if not isinstance(characters_written, int): + raise TypeError("characters_written must be a integer") + self.characters_written = characters_written + + +def open(file, mode="r", buffering=None, encoding=None, errors=None, + newline=None, closefd=True): + + r"""Open file and return a stream. Raise IOError upon failure. + + file is either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened or an integer file descriptor of the file to be + wrapped. (If a file descriptor is given, it is closed when the + returned I/O object is closed, unless closefd is set to False.) + + mode is an optional string that specifies the mode in which the file + is opened. It defaults to 'r' which means open for reading in text + mode. Other common values are 'w' for writing (truncating the file if + it already exists), and 'a' for appending (which on some Unix systems, + means that all writes append to the end of the file regardless of the + current seek position). In text mode, if encoding is not specified the + encoding used is platform dependent. (For reading and writing raw + bytes use binary mode and leave encoding unspecified.) The available + modes are: + + ========= =============================================================== + Character Meaning + --------- --------------------------------------------------------------- + 'r' open for reading (default) + 'w' open for writing, truncating the file first + 'a' open for writing, appending to the end of the file if it exists + 'b' binary mode + 't' text mode (default) + '+' open a disk file for updating (reading and writing) + 'U' universal newline mode (for backwards compatibility; unneeded + for new code) + ========= =============================================================== + + The default mode is 'rt' (open for reading text). For binary random + access, the mode 'w+b' opens and truncates the file to 0 bytes, while + 'r+b' opens the file without truncation. + + Python distinguishes between files opened in binary and text modes, + even when the underlying operating system doesn't. Files opened in + binary mode (appending 'b' to the mode argument) return contents as + bytes objects without any decoding. In text mode (the default, or when + 't' is appended to the mode argument), the contents of the file are + returned as strings, the bytes having been first decoded using a + platform-dependent encoding or using the specified encoding if given. + + buffering is an optional integer used to set the buffering policy. By + default full buffering is on. Pass 0 to switch buffering off (only + allowed in binary mode), 1 to set line buffering, and an integer > 1 + for full buffering. + + encoding is the name of the encoding used to decode or encode the + file. This should only be used in text mode. The default encoding is + platform dependent, but any encoding supported by Python can be + passed. See the codecs module for the list of supported encodings. + + errors is an optional string that specifies how encoding errors are to + be handled---this argument should not be used in binary mode. Pass + 'strict' to raise a ValueError exception if there is an encoding error + (the default of None has the same effect), or pass 'ignore' to ignore + errors. (Note that ignoring encoding errors can lead to data loss.) + See the documentation for codecs.register for a list of the permitted + encoding error strings. + + newline controls how universal newlines works (it only applies to text + mode). It can be None, '', '\n', '\r', and '\r\n'. It works as + follows: + + * On input, if newline is None, universal newlines mode is + enabled. Lines in the input can end in '\n', '\r', or '\r\n', and + these are translated into '\n' before being returned to the + caller. If it is '', universal newline mode is enabled, but line + endings are returned to the caller untranslated. If it has any of + the other legal values, input lines are only terminated by the given + string, and the line ending is returned to the caller untranslated. + + * On output, if newline is None, any '\n' characters written are + translated to the system default line separator, os.linesep. If + newline is '', no translation takes place. If newline is any of the + other legal values, any '\n' characters written are translated to + the given string. + + If closefd is False, the underlying file descriptor will be kept open + when the file is closed. This does not work when a file name is given + and must be True in that case. + + open() returns a file object whose type depends on the mode, and + through which the standard file operations such as reading and writing + are performed. When open() is used to open a file in a text mode ('w', + 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open + a file in a binary mode, the returned class varies: in read binary + mode, it returns a BufferedReader; in write binary and append binary + modes, it returns a BufferedWriter, and in read/write mode, it returns + a BufferedRandom. + + It is also possible to use a string or bytearray as a file for both + reading and writing. For strings StringIO can be used like a file + opened in a text mode, and for bytes a BytesIO can be used like a file + opened in a binary mode. + """ + if not isinstance(file, (str, bytes, int)): + raise TypeError("invalid file: %r" % file) + if not isinstance(mode, str): + raise TypeError("invalid mode: %r" % mode) + if buffering is not None and not isinstance(buffering, int): + raise TypeError("invalid buffering: %r" % buffering) + if encoding is not None and not isinstance(encoding, str): + raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) + modes = set(mode) + if modes - set("arwb+tU") or len(mode) > len(modes): + raise ValueError("invalid mode: %r" % mode) + reading = "r" in modes + writing = "w" in modes + appending = "a" in modes + updating = "+" in modes + text = "t" in modes + binary = "b" in modes + if "U" in modes: + if writing or appending: + raise ValueError("can't use U and writing mode at once") + reading = True + if text and binary: + raise ValueError("can't have text and binary mode at once") + if reading + writing + appending > 1: + raise ValueError("can't have read/write/append mode at once") + if not (reading or writing or appending): + raise ValueError("must have exactly one of read/write/append mode") + if binary and encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + if binary and newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + raw = FileIO(file, + (reading and "r" or "") + + (writing and "w" or "") + + (appending and "a" or "") + + (updating and "+" or ""), + closefd) + if buffering is None: + buffering = -1 + line_buffering = False + if buffering == 1 or buffering < 0 and raw.isatty(): + buffering = -1 + line_buffering = True + if buffering < 0: + buffering = DEFAULT_BUFFER_SIZE + try: + bs = os.fstat(raw.fileno()).st_blksize + except (os.error, AttributeError): + pass + else: + if bs > 1: + buffering = bs + if buffering < 0: + raise ValueError("invalid buffering size") + if buffering == 0: + if binary: + return raw + raise ValueError("can't have unbuffered text I/O") + if updating: + buffer = BufferedRandom(raw, buffering) + elif writing or appending: + buffer = BufferedWriter(raw, buffering) + elif reading: + buffer = BufferedReader(raw, buffering) + else: + raise ValueError("unknown mode: %r" % mode) + if binary: + return buffer + text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) + text.mode = mode + return text + + +class DocDescriptor: + """Helper for builtins.open.__doc__ + """ + def __get__(self, obj, typ): + return ( + "open(file, mode='r', buffering=None, encoding=None, " + "errors=None, newline=None, closefd=True)\n\n" + + open.__doc__) + +class OpenWrapper: + """Wrapper for builtins.open + + Trick so that open won't become a bound method when stored + as a class variable (as dbm.dumb does). + + See initstdio() in Python/pythonrun.c. + """ + __doc__ = DocDescriptor() + + def __new__(cls, *args, **kwargs): + return open(*args, **kwargs) + + +class UnsupportedOperation(ValueError, IOError): + pass + + +class IOBase(metaclass=abc.ABCMeta): + + """The abstract base class for all I/O classes, acting on streams of + bytes. There is no public constructor. + + This class provides dummy implementations for many methods that + derived classes can override selectively; the default implementations + represent a file that cannot be read, written or seeked. + + Even though IOBase does not declare read, readinto, or write because + their signatures will vary, implementations and clients should + consider those methods part of the interface. Also, implementations + may raise a IOError when operations they do not support are called. + + The basic type used for binary data read from or written to a file is + bytes. bytearrays are accepted too, and in some cases (such as + readinto) needed. Text I/O classes work with str data. + + Note that calling any method (even inquiries) on a closed stream is + undefined. Implementations may raise IOError in this case. + + IOBase (and its subclasses) support the iterator protocol, meaning + that an IOBase object can be iterated over yielding the lines in a + stream. + + IOBase also supports the :keyword:`with` statement. In this example, + fp is closed after the suite of the with statement is complete: + + with open('spam.txt', 'r') as fp: + fp.write('Spam and eggs!') + """ + + ### Internal ### + + def _unsupported(self, name: str) -> IOError: + """Internal: raise an exception for unsupported operations.""" + raise UnsupportedOperation("%s.%s() not supported" % + (self.__class__.__name__, name)) + + ### Positioning ### + + def seek(self, pos: int, whence: int = 0) -> int: + """Change stream position. + + Change the stream position to byte offset offset. offset is + interpreted relative to the position indicated by whence. Values + for whence are: + + * 0 -- start of stream (the default); offset should be zero or positive + * 1 -- current stream position; offset may be negative + * 2 -- end of stream; offset is usually negative + + Return the new absolute position. + """ + self._unsupported("seek") + + def tell(self) -> int: + """Return current stream position.""" + return self.seek(0, 1) + + def truncate(self, pos: int = None) -> int: + """Truncate file to size bytes. + + Size defaults to the current IO position as reported by tell(). Return + the new size. + """ + self._unsupported("truncate") + + ### Flush and close ### + + def flush(self) -> None: + """Flush write buffers, if applicable. + + This is not implemented for read-only and non-blocking streams. + """ + # XXX Should this return the number of bytes written??? + + __closed = False + + def close(self) -> None: + """Flush and close the IO object. + + This method has no effect if the file is already closed. + """ + if not self.__closed: + try: + self.flush() + except IOError: + pass # If flush() fails, just give up + self.__closed = True + + def __del__(self) -> None: + """Destructor. Calls close().""" + # The try/except block is in case this is called at program + # exit time, when it's possible that globals have already been + # deleted, and then the close() call might fail. Since + # there's nothing we can do about such failures and they annoy + # the end users, we suppress the traceback. + try: + self.close() + except: + pass + + ### Inquiries ### + + def seekable(self) -> bool: + """Return whether object supports random access. + + If False, seek(), tell() and truncate() will raise IOError. + This method may need to do a test seek(). + """ + return False + + def _checkSeekable(self, msg=None): + """Internal: raise an IOError if file is not seekable + """ + if not self.seekable(): + raise IOError("File or stream is not seekable." + if msg is None else msg) + + + def readable(self) -> bool: + """Return whether object was opened for reading. + + If False, read() will raise IOError. + """ + return False + + def _checkReadable(self, msg=None): + """Internal: raise an IOError if file is not readable + """ + if not self.readable(): + raise IOError("File or stream is not readable." + if msg is None else msg) + + def writable(self) -> bool: + """Return whether object was opened for writing. + + If False, write() and truncate() will raise IOError. + """ + return False + + def _checkWritable(self, msg=None): + """Internal: raise an IOError if file is not writable + """ + if not self.writable(): + raise IOError("File or stream is not writable." + if msg is None else msg) + + @property + def closed(self): + """closed: bool. True iff the file has been closed. + + For backwards compatibility, this is a property, not a predicate. + """ + return self.__closed + + def _checkClosed(self, msg=None): + """Internal: raise an ValueError if file is closed + """ + if self.closed: + raise ValueError("I/O operation on closed file." + if msg is None else msg) + + ### Context manager ### + + def __enter__(self) -> "IOBase": # That's a forward reference + """Context management protocol. Returns self.""" + self._checkClosed() + return self + + def __exit__(self, *args) -> None: + """Context management protocol. Calls close()""" + self.close() + + ### Lower-level APIs ### + + # XXX Should these be present even if unimplemented? + + def fileno(self) -> int: + """Returns underlying file descriptor if one exists. + + An IOError is raised if the IO object does not use a file descriptor. + """ + self._unsupported("fileno") + + def isatty(self) -> bool: + """Return whether this is an 'interactive' stream. + + Return False if it can't be determined. + """ + self._checkClosed() + return False + + ### Readline[s] and writelines ### + + def readline(self, limit: int = -1) -> bytes: + r"""Read and return a line from the stream. + + If limit is specified, at most limit bytes will be read. + + The line terminator is always b'\n' for binary files; for text + files, the newlines argument to open can be used to select the line + terminator(s) recognized. + """ + # For backwards compatibility, a (slowish) readline(). + if hasattr(self, "peek"): + def nreadahead(): + readahead = self.peek(1) + if not readahead: + return 1 + n = (readahead.find(b"\n") + 1) or len(readahead) + if limit >= 0: + n = min(n, limit) + return n + else: + def nreadahead(): + return 1 + if limit is None: + limit = -1 + res = bytearray() + while limit < 0 or len(res) < limit: + b = self.read(nreadahead()) + if not b: + break + res += b + if res.endswith(b"\n"): + break + return bytes(res) + + def __iter__(self): + self._checkClosed() + return self + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def readlines(self, hint=None): + """Return a list of lines from the stream. + + hint can be specified to control the number of lines read: no more + lines will be read if the total size (in bytes/characters) of all + lines so far exceeds hint. + """ + if hint is None or hint <= 0: + return list(self) + n = 0 + lines = [] + for line in self: + lines.append(line) + n += len(line) + if n >= hint: + break + return lines + + def writelines(self, lines): + self._checkClosed() + for line in lines: + self.write(line) + +io.IOBase.register(IOBase) + + +class RawIOBase(IOBase): + + """Base class for raw binary I/O.""" + + # The read() method is implemented by calling readinto(); derived + # classes that want to support read() only need to implement + # readinto() as a primitive operation. In general, readinto() can be + # more efficient than read(). + + # (It would be tempting to also provide an implementation of + # readinto() in terms of read(), in case the latter is a more suitable + # primitive operation, but that would lead to nasty recursion in case + # a subclass doesn't implement either.) + + def read(self, n: int = -1) -> bytes: + """Read and return up to n bytes. + + Returns an empty bytes object on EOF, or None if the object is + set not to block and has no data to read. + """ + if n is None: + n = -1 + if n < 0: + return self.readall() + b = bytearray(n.__index__()) + n = self.readinto(b) + del b[n:] + return bytes(b) + + def readall(self): + """Read until EOF, using multiple read() call.""" + res = bytearray() + while True: + data = self.read(DEFAULT_BUFFER_SIZE) + if not data: + break + res += data + return bytes(res) + + def readinto(self, b: bytearray) -> int: + """Read up to len(b) bytes into b. + + Returns number of bytes read (0 for EOF), or None if the object + is set not to block as has no data to read. + """ + self._unsupported("readinto") + + def write(self, b: bytes) -> int: + """Write the given buffer to the IO stream. + + Returns the number of bytes written, which may be less than len(b). + """ + self._unsupported("write") + +io.RawIOBase.register(RawIOBase) +from _io import FileIO +RawIOBase.register(FileIO) + + +class BufferedIOBase(IOBase): + + """Base class for buffered IO objects. + + The main difference with RawIOBase is that the read() method + supports omitting the size argument, and does not have a default + implementation that defers to readinto(). + + In addition, read(), readinto() and write() may raise + BlockingIOError if the underlying raw stream is in non-blocking + mode and not ready; unlike their raw counterparts, they will never + return None. + + A typical implementation should not inherit from a RawIOBase + implementation, but wrap one. + """ + + def read(self, n: int = None) -> bytes: + """Read and return up to n bytes. + + If the argument is omitted, None, or negative, reads and + returns all data until EOF. + + If the argument is positive, and the underlying raw stream is + not 'interactive', multiple raw reads may be issued to satisfy + the byte count (unless EOF is reached first). But for + interactive raw streams (XXX and for pipes?), at most one raw + read will be issued, and a short result does not imply that + EOF is imminent. + + Returns an empty bytes array on EOF. + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + self._unsupported("read") + + def read1(self, n: int=None) -> bytes: + """Read up to n bytes with at most one read() system call.""" + self._unsupported("read1") + + def readinto(self, b: bytearray) -> int: + """Read up to len(b) bytes into b. + + Like read(), this may issue multiple reads to the underlying raw + stream, unless the latter is 'interactive'. + + Returns the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + # XXX This ought to work with anything that supports the buffer API + data = self.read(len(b)) + n = len(data) + try: + b[:n] = data + except TypeError as err: + import array + if not isinstance(b, array.array): + raise err + b[:n] = array.array('b', data) + return n + + def write(self, b: bytes) -> int: + """Write the given buffer to the IO stream. + + Return the number of bytes written, which is never less than + len(b). + + Raises BlockingIOError if the buffer is full and the + underlying raw stream cannot accept more data at the moment. + """ + self._unsupported("write") + +io.BufferedIOBase.register(BufferedIOBase) + + +class _BufferedIOMixin(BufferedIOBase): + + """A mixin implementation of BufferedIOBase with an underlying raw stream. + + This passes most requests on to the underlying raw stream. It + does *not* provide implementations of read(), readinto() or + write(). + """ + + def __init__(self, raw): + self.raw = raw + + ### Positioning ### + + def seek(self, pos, whence=0): + new_position = self.raw.seek(pos, whence) + if new_position < 0: + raise IOError("seek() returned an invalid position") + return new_position + + def tell(self): + pos = self.raw.tell() + if pos < 0: + raise IOError("tell() returned an invalid position") + return pos + + def truncate(self, pos=None): + # Flush the stream. We're mixing buffered I/O with lower-level I/O, + # and a flush may be necessary to synch both views of the current + # file state. + self.flush() + + if pos is None: + pos = self.tell() + # XXX: Should seek() be used, instead of passing the position + # XXX directly to truncate? + return self.raw.truncate(pos) + + ### Flush and close ### + + def flush(self): + self.raw.flush() + + def close(self): + if not self.closed: + try: + self.flush() + except IOError: + pass # If flush() fails, just give up + self.raw.close() + + ### Inquiries ### + + def seekable(self): + return self.raw.seekable() + + def readable(self): + return self.raw.readable() + + def writable(self): + return self.raw.writable() + + @property + def closed(self): + return self.raw.closed + + @property + def name(self): + return self.raw.name + + @property + def mode(self): + return self.raw.mode + + ### Lower-level APIs ### + + def fileno(self): + return self.raw.fileno() + + def isatty(self): + return self.raw.isatty() + + +class BytesIO(BufferedIOBase): + + """Buffered I/O implementation using an in-memory bytes buffer.""" + + def __init__(self, initial_bytes=None): + buf = bytearray() + if initial_bytes is not None: + buf += initial_bytes + self._buffer = buf + self._pos = 0 + + def getvalue(self): + """Return the bytes value (contents) of the buffer + """ + if self.closed: + raise ValueError("getvalue on closed file") + return bytes(self._buffer) + + def read(self, n=None): + if self.closed: + raise ValueError("read from closed file") + if n is None: + n = -1 + if n < 0: + n = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + n) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) + + def read1(self, n): + """This is the same as read. + """ + return self.read(n) + + def write(self, b): + if self.closed: + raise ValueError("write to closed file") + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + n = len(b) + if n == 0: + return 0 + pos = self._pos + if pos > len(self._buffer): + # Inserts null bytes between the current end of the file + # and the new write position. + padding = b'\x00' * (pos - len(self._buffer)) + self._buffer += padding + self._buffer[pos:pos + n] = b + self._pos += n + return n + + def seek(self, pos, whence=0): + if self.closed: + raise ValueError("seek on closed file") + try: + pos = pos.__index__() + except AttributeError as err: + raise TypeError("an integer is required") from err + if whence == 0: + if pos < 0: + raise ValueError("negative seek position %r" % (pos,)) + self._pos = pos + elif whence == 1: + self._pos = max(0, self._pos + pos) + elif whence == 2: + self._pos = max(0, len(self._buffer) + pos) + else: + raise ValueError("invalid whence value") + return self._pos + + def tell(self): + if self.closed: + raise ValueError("tell on closed file") + return self._pos + + def truncate(self, pos=None): + if self.closed: + raise ValueError("truncate on closed file") + if pos is None: + pos = self._pos + elif pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] + return self.seek(pos) + + def readable(self): + return True + + def writable(self): + return True + + def seekable(self): + return True + + +class BufferedReader(_BufferedIOMixin): + + """BufferedReader(raw[, buffer_size]) + + A buffer for a readable, sequential BaseRawIO object. + + The constructor creates a BufferedReader for the given readable raw + stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE + is used. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + """Create a new buffered reader using the given readable raw IO object. + """ + raw._checkReadable() + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._reset_read_buf() + self._read_lock = Lock() + + def _reset_read_buf(self): + self._read_buf = b"" + self._read_pos = 0 + + def read(self, n=None): + """Read n bytes. + + Returns exactly n bytes of data unless the underlying raw IO + stream reaches EOF or if the call would block in non-blocking + mode. If n is negative, read until EOF or until read() would + block. + """ + if n is not None and n < -1: + raise ValueError("invalid number of bytes to read") + with self._read_lock: + return self._read_unlocked(n) + + def _read_unlocked(self, n=None): + nodata_val = b"" + empty_values = (b"", None) + buf = self._read_buf + pos = self._read_pos + + # Special case for when the number of bytes to read is unspecified. + if n is None or n == -1: + self._reset_read_buf() + chunks = [buf[pos:]] # Strip the consumed bytes. + current_size = 0 + while True: + # Read until EOF or until read() would block. + chunk = self.raw.read() + if chunk in empty_values: + nodata_val = chunk + break + current_size += len(chunk) + chunks.append(chunk) + return b"".join(chunks) or nodata_val + + # The number of bytes to read is specified, return at most n bytes. + avail = len(buf) - pos # Length of the available buffered data. + if n <= avail: + # Fast path: the data to read is fully buffered. + self._read_pos += n + return buf[pos:pos+n] + # Slow path: read from the stream until enough bytes are read, + # or until an EOF occurs or until read() would block. + chunks = [buf[pos:]] + wanted = max(self.buffer_size, n) + while avail < n: + chunk = self.raw.read(wanted) + if chunk in empty_values: + nodata_val = chunk + break + avail += len(chunk) + chunks.append(chunk) + # n is more then avail only when an EOF occurred or when + # read() would have blocked. + n = min(n, avail) + out = b"".join(chunks) + self._read_buf = out[n:] # Save the extra data in the buffer. + self._read_pos = 0 + return out[:n] if out else nodata_val + + def peek(self, n=0): + """Returns buffered bytes without advancing the position. + + The argument indicates a desired minimal number of bytes; we + do at most one raw read to satisfy it. We never return more + than self.buffer_size. + """ + with self._read_lock: + return self._peek_unlocked(n) + + def _peek_unlocked(self, n=0): + want = min(n, self.buffer_size) + have = len(self._read_buf) - self._read_pos + if have < want or have <= 0: + to_read = self.buffer_size - have + current = self.raw.read(to_read) + if current: + self._read_buf = self._read_buf[self._read_pos:] + current + self._read_pos = 0 + return self._read_buf[self._read_pos:] + + def read1(self, n): + """Reads up to n bytes, with at most one read() system call.""" + # Returns up to n bytes. If at least one byte is buffered, we + # only return buffered bytes. Otherwise, we do one raw read. + if n < 0: + raise ValueError("number of bytes to read must be positive") + if n == 0: + return b"" + with self._read_lock: + self._peek_unlocked(1) + return self._read_unlocked( + min(n, len(self._read_buf) - self._read_pos)) + + def tell(self): + return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence value") + with self._read_lock: + if whence == 1: + pos -= len(self._read_buf) - self._read_pos + pos = _BufferedIOMixin.seek(self, pos, whence) + self._reset_read_buf() + return pos + +class BufferedWriter(_BufferedIOMixin): + + """A buffer for a writeable sequential RawIO object. + + The constructor creates a BufferedWriter for the given writeable raw + stream. If the buffer_size is not given, it defaults to + DEFAULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to + twice the buffer size. + """ + + def __init__(self, raw, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + raw._checkWritable() + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self.max_buffer_size = (2*buffer_size + if max_buffer_size is None + else max_buffer_size) + self._write_buf = bytearray() + self._write_lock = Lock() + + def write(self, b): + if self.closed: + raise ValueError("write to closed file") + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + with self._write_lock: + # XXX we can implement some more tricks to try and avoid + # partial writes + if len(self._write_buf) > self.buffer_size: + # We're full, so let's pre-flush the buffer + try: + self._flush_unlocked() + except BlockingIOError as e: + # We can't accept anything else. + # XXX Why not just let the exception pass through? + raise BlockingIOError(e.errno, e.strerror, 0) + before = len(self._write_buf) + self._write_buf.extend(b) + written = len(self._write_buf) - before + if len(self._write_buf) > self.buffer_size: + try: + self._flush_unlocked() + except BlockingIOError as e: + if len(self._write_buf) > self.max_buffer_size: + # We've hit max_buffer_size. We have to accept a + # partial write and cut back our buffer. + overage = len(self._write_buf) - self.max_buffer_size + written -= overage + self._write_buf = self._write_buf[:self.max_buffer_size] + raise BlockingIOError(e.errno, e.strerror, written) + return written + + def truncate(self, pos=None): + with self._write_lock: + self._flush_unlocked() + if pos is None: + pos = self.raw.tell() + return self.raw.truncate(pos) + + def flush(self): + with self._write_lock: + self._flush_unlocked() + + def _flush_unlocked(self): + if self.closed: + raise ValueError("flush of closed file") + written = 0 + try: + while self._write_buf: + n = self.raw.write(self._write_buf) + if n > len(self._write_buf) or n < 0: + raise IOError("write() returned incorrect number of bytes") + del self._write_buf[:n] + written += n + except BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + written += n + raise BlockingIOError(e.errno, e.strerror, written) + + def tell(self): + return _BufferedIOMixin.tell(self) + len(self._write_buf) + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence") + with self._write_lock: + self._flush_unlocked() + return _BufferedIOMixin.seek(self, pos, whence) + + +class BufferedRWPair(BufferedIOBase): + + """A buffered reader and writer object together. + + A buffered reader object and buffered writer object put together to + form a sequential IO object that can read and write. This is typically + used with a socket or two-way pipe. + + reader and writer are RawIOBase objects that are readable and + writeable respectively. If the buffer_size is omitted it defaults to + DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer) + defaults to twice the buffer size. + """ + + # XXX The usefulness of this (compared to having two separate IO + # objects) is questionable. + + def __init__(self, reader, writer, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + """Constructor. + + The arguments are two RawIO instances. + """ + reader._checkReadable() + writer._checkWritable() + self.reader = BufferedReader(reader, buffer_size) + self.writer = BufferedWriter(writer, buffer_size, max_buffer_size) + + def read(self, n=None): + if n is None: + n = -1 + return self.reader.read(n) + + def readinto(self, b): + return self.reader.readinto(b) + + def write(self, b): + return self.writer.write(b) + + def peek(self, n=0): + return self.reader.peek(n) + + def read1(self, n): + return self.reader.read1(n) + + def readable(self): + return self.reader.readable() + + def writable(self): + return self.writer.writable() + + def flush(self): + return self.writer.flush() + + def close(self): + self.writer.close() + self.reader.close() + + def isatty(self): + return self.reader.isatty() or self.writer.isatty() + + @property + def closed(self): + return self.writer.closed + + +class BufferedRandom(BufferedWriter, BufferedReader): + + """A buffered interface to random access streams. + + The constructor creates a reader and writer for a seekable stream, + raw, given in the first argument. If the buffer_size is omitted it + defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered + writer) defaults to twice the buffer size. + """ + + def __init__(self, raw, + buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): + raw._checkSeekable() + BufferedReader.__init__(self, raw, buffer_size) + BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) + + def seek(self, pos, whence=0): + if not (0 <= whence <= 2): + raise ValueError("invalid whence") + self.flush() + if self._read_buf: + # Undo read ahead. + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + # First do the raw seek, then empty the read buffer, so that + # if the raw seek fails, we don't lose buffered data forever. + pos = self.raw.seek(pos, whence) + with self._read_lock: + self._reset_read_buf() + if pos < 0: + raise IOError("seek() returned invalid position") + return pos + + def tell(self): + if self._write_buf: + return BufferedWriter.tell(self) + else: + return BufferedReader.tell(self) + + def truncate(self, pos=None): + if pos is None: + pos = self.tell() + # Use seek to flush the read buffer. + self.seek(pos) + return BufferedWriter.truncate(self) + + def read(self, n=None): + if n is None: + n = -1 + self.flush() + return BufferedReader.read(self, n) + + def readinto(self, b): + self.flush() + return BufferedReader.readinto(self, b) + + def peek(self, n=0): + self.flush() + return BufferedReader.peek(self, n) + + def read1(self, n): + self.flush() + return BufferedReader.read1(self, n) + + def write(self, b): + if self._read_buf: + # Undo readahead + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + self._reset_read_buf() + return BufferedWriter.write(self, b) + + +class TextIOBase(IOBase): + + """Base class for text I/O. + + This class provides a character and line based interface to stream + I/O. There is no readinto method because Python's character strings + are immutable. There is no public constructor. + """ + + def read(self, n: int = -1) -> str: + """Read at most n characters from stream. + + Read from underlying buffer until we have n characters or we hit EOF. + If n is negative or omitted, read until EOF. + """ + self._unsupported("read") + + def write(self, s: str) -> int: + """Write string s to stream.""" + self._unsupported("write") + + def truncate(self, pos: int = None) -> int: + """Truncate size to pos.""" + self._unsupported("truncate") + + def readline(self) -> str: + """Read until newline or EOF. + + Returns an empty string if EOF is hit immediately. + """ + self._unsupported("readline") + + @property + def encoding(self): + """Subclasses should override.""" + return None + + @property + def newlines(self): + """Line endings translated so far. + + Only line endings translated during reading are considered. + + Subclasses should override. + """ + return None + +io.TextIOBase.register(TextIOBase) + + +class IncrementalNewlineDecoder(codecs.IncrementalDecoder): + r"""Codec used when reading a file in universal newlines mode. It wraps + another incremental decoder, translating \r\n and \r into \n. It also + records the types of newlines encountered. When used with + translate=False, it ensures that the newline sequence is returned in + one piece. + """ + def __init__(self, decoder, translate, errors='strict'): + codecs.IncrementalDecoder.__init__(self, errors=errors) + self.translate = translate + self.decoder = decoder + self.seennl = 0 + self.pendingcr = False + + def decode(self, input, final=False): + # decode input (with the eventual \r from a previous pass) + if self.decoder is None: + output = input + else: + output = self.decoder.decode(input, final=final) + if self.pendingcr and (output or final): + output = "\r" + output + self.pendingcr = False + + # retain last \r even when not translating data: + # then readline() is sure to get \r\n in one pass + if output.endswith("\r") and not final: + output = output[:-1] + self.pendingcr = True + + # Record which newlines are read + crlf = output.count('\r\n') + cr = output.count('\r') - crlf + lf = output.count('\n') - crlf + self.seennl |= (lf and self._LF) | (cr and self._CR) \ + | (crlf and self._CRLF) + + if self.translate: + if crlf: + output = output.replace("\r\n", "\n") + if cr: + output = output.replace("\r", "\n") + + return output + + def getstate(self): + if self.decoder is None: + buf = b"" + flag = 0 + else: + buf, flag = self.decoder.getstate() + flag <<= 1 + if self.pendingcr: + flag |= 1 + return buf, flag + + def setstate(self, state): + buf, flag = state + self.pendingcr = bool(flag & 1) + if self.decoder is not None: + self.decoder.setstate((buf, flag >> 1)) + + def reset(self): + self.seennl = 0 + self.pendingcr = False + if self.decoder is not None: + self.decoder.reset() + + _LF = 1 + _CR = 2 + _CRLF = 4 + + @property + def newlines(self): + return (None, + "\n", + "\r", + ("\r", "\n"), + "\r\n", + ("\n", "\r\n"), + ("\r", "\r\n"), + ("\r", "\n", "\r\n") + )[self.seennl] + + +class TextIOWrapper(TextIOBase): + + r"""Character and line based layer over a BufferedIOBase object, buffer. + + encoding gives the name of the encoding that the stream will be + decoded or encoded with. It defaults to locale.getpreferredencoding. + + errors determines the strictness of encoding and decoding (see the + codecs.register) and defaults to "strict". + + newline can be None, '', '\n', '\r', or '\r\n'. It controls the + handling of line endings. If it is None, universal newlines is + enabled. With this enabled, on input, the lines endings '\n', '\r', + or '\r\n' are translated to '\n' before being returned to the + caller. Conversely, on output, '\n' is translated to the system + default line seperator, os.linesep. If newline is any other of its + legal values, that newline becomes the newline when the file is read + and it is returned untranslated. On output, '\n' is converted to the + newline. + + If line_buffering is True, a call to flush is implied when a call to + write contains a newline character. + """ + + _CHUNK_SIZE = 2048 + + def __init__(self, buffer, encoding=None, errors=None, newline=None, + line_buffering=False): + if newline is not None and not isinstance(newline, str): + raise TypeError("illegal newline type: %r" % (type(newline),)) + if newline not in (None, "", "\n", "\r", "\r\n"): + raise ValueError("illegal newline value: %r" % (newline,)) + if encoding is None: + try: + encoding = os.device_encoding(buffer.fileno()) + except (AttributeError, UnsupportedOperation): + pass + if encoding is None: + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + encoding = "ascii" + else: + encoding = locale.getpreferredencoding() + + if not isinstance(encoding, str): + raise ValueError("invalid encoding: %r" % encoding) + + if errors is None: + errors = "strict" + else: + if not isinstance(errors, str): + raise ValueError("invalid errors: %r" % errors) + + self.buffer = buffer + self._line_buffering = line_buffering + self._encoding = encoding + self._errors = errors + self._readuniversal = not newline + self._readtranslate = newline is None + self._readnl = newline + self._writetranslate = newline != '' + self._writenl = newline or os.linesep + self._encoder = None + self._decoder = None + self._decoded_chars = '' # buffer for text returned from decoder + self._decoded_chars_used = 0 # offset into _decoded_chars for read() + self._snapshot = None # info for reconstructing decoder state + self._seekable = self._telling = self.buffer.seekable() + + # self._snapshot is either None, or a tuple (dec_flags, next_input) + # where dec_flags is the second (integer) item of the decoder state + # and next_input is the chunk of input bytes that comes next after the + # snapshot point. We use this to reconstruct decoder states in tell(). + + # Naming convention: + # - "bytes_..." for integer variables that count input bytes + # - "chars_..." for integer variables that count decoded characters + + @property + def encoding(self): + return self._encoding + + @property + def errors(self): + return self._errors + + @property + def line_buffering(self): + return self._line_buffering + + def seekable(self): + return self._seekable + + def readable(self): + return self.buffer.readable() + + def writable(self): + return self.buffer.writable() + + def flush(self): + self.buffer.flush() + self._telling = self._seekable + + def close(self): + try: + self.flush() + except: + pass # If flush() fails, just give up + self.buffer.close() + + @property + def closed(self): + return self.buffer.closed + + @property + def name(self): + return self.buffer.name + + def fileno(self): + return self.buffer.fileno() + + def isatty(self): + return self.buffer.isatty() + + def write(self, s: str): + if self.closed: + raise ValueError("write to closed file") + if not isinstance(s, str): + raise TypeError("can't write %s to text stream" % + s.__class__.__name__) + length = len(s) + haslf = (self._writetranslate or self._line_buffering) and "\n" in s + if haslf and self._writetranslate and self._writenl != "\n": + s = s.replace("\n", self._writenl) + encoder = self._encoder or self._get_encoder() + # XXX What if we were just reading? + b = encoder.encode(s) + self.buffer.write(b) + if self._line_buffering and (haslf or "\r" in s): + self.flush() + self._snapshot = None + if self._decoder: + self._decoder.reset() + return length + + def _get_encoder(self): + make_encoder = codecs.getincrementalencoder(self._encoding) + self._encoder = make_encoder(self._errors) + return self._encoder + + def _get_decoder(self): + make_decoder = codecs.getincrementaldecoder(self._encoding) + decoder = make_decoder(self._errors) + if self._readuniversal: + decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) + self._decoder = decoder + return decoder + + # The following three methods implement an ADT for _decoded_chars. + # Text returned from the decoder is buffered here until the client + # requests it by calling our read() or readline() method. + def _set_decoded_chars(self, chars): + """Set the _decoded_chars buffer.""" + self._decoded_chars = chars + self._decoded_chars_used = 0 + + def _get_decoded_chars(self, n=None): + """Advance into the _decoded_chars buffer.""" + offset = self._decoded_chars_used + if n is None: + chars = self._decoded_chars[offset:] + else: + chars = self._decoded_chars[offset:offset + n] + self._decoded_chars_used += len(chars) + return chars + + def _rewind_decoded_chars(self, n): + """Rewind the _decoded_chars buffer.""" + if self._decoded_chars_used < n: + raise AssertionError("rewind decoded_chars out of bounds") + self._decoded_chars_used -= n + + def _read_chunk(self): + """ + Read and decode the next chunk of data from the BufferedReader. + """ + + # The return value is True unless EOF was reached. The decoded + # string is placed in self._decoded_chars (replacing its previous + # value). The entire input chunk is sent to the decoder, though + # some of it may remain buffered in the decoder, yet to be + # converted. + + if self._decoder is None: + raise ValueError("no decoder") + + if self._telling: + # To prepare for tell(), we need to snapshot a point in the + # file where the decoder's input buffer is empty. + + dec_buffer, dec_flags = self._decoder.getstate() + # Given this, we know there was a valid snapshot point + # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + + # Read a chunk, decode it, and put the result in self._decoded_chars. + input_chunk = self.buffer.read1(self._CHUNK_SIZE) + eof = not input_chunk + self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) + + if self._telling: + # At the snapshot point, len(dec_buffer) bytes before the read, + # the next input to be decoded is dec_buffer + input_chunk. + self._snapshot = (dec_flags, dec_buffer + input_chunk) + + return not eof + + def _pack_cookie(self, position, dec_flags=0, + bytes_to_feed=0, need_eof=0, chars_to_skip=0): + # The meaning of a tell() cookie is: seek to position, set the + # decoder flags to dec_flags, read bytes_to_feed bytes, feed them + # into the decoder with need_eof as the EOF flag, then skip + # chars_to_skip characters of the decoded result. For most simple + # decoders, tell() will often just give a byte offset in the file. + return (position | (dec_flags<<64) | (bytes_to_feed<<128) | + (chars_to_skip<<192) | bool(need_eof)<<256) + + def _unpack_cookie(self, bigint): + rest, position = divmod(bigint, 1<<64) + rest, dec_flags = divmod(rest, 1<<64) + rest, bytes_to_feed = divmod(rest, 1<<64) + need_eof, chars_to_skip = divmod(rest, 1<<64) + return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip + + def tell(self): + if not self._seekable: + raise IOError("underlying stream is not seekable") + if not self._telling: + raise IOError("telling position disabled by next() call") + self.flush() + position = self.buffer.tell() + decoder = self._decoder + if decoder is None or self._snapshot is None: + if self._decoded_chars: + # This should never happen. + raise AssertionError("pending decoded text") + return position + + # Skip backward to the snapshot point (see _read_chunk). + dec_flags, next_input = self._snapshot + position -= len(next_input) + + # How many decoded characters have been used up since the snapshot? + chars_to_skip = self._decoded_chars_used + if chars_to_skip == 0: + # We haven't moved from the snapshot point. + return self._pack_cookie(position, dec_flags) + + # Starting from the snapshot position, we will walk the decoder + # forward until it gives us enough decoded characters. + saved_state = decoder.getstate() + try: + # Note our initial start point. + decoder.setstate((b'', dec_flags)) + start_pos = position + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + need_eof = 0 + + # Feed the decoder one byte at a time. As we go, note the + # nearest "safe start point" before the current location + # (a point where the decoder has nothing buffered, so seek() + # can safely start from there and advance to this location). + next_byte = bytearray(1) + for next_byte[0] in next_input: + bytes_fed += 1 + chars_decoded += len(decoder.decode(next_byte)) + dec_buffer, dec_flags = decoder.getstate() + if not dec_buffer and chars_decoded <= chars_to_skip: + # Decoder buffer is empty, so this is a safe start point. + start_pos += bytes_fed + chars_to_skip -= chars_decoded + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + if chars_decoded >= chars_to_skip: + break + else: + # We didn't get enough decoded data; signal EOF to get more. + chars_decoded += len(decoder.decode(b'', final=True)) + need_eof = 1 + if chars_decoded < chars_to_skip: + raise IOError("can't reconstruct logical file position") + + # The returned cookie corresponds to the last safe start point. + return self._pack_cookie( + start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) + finally: + decoder.setstate(saved_state) + + def truncate(self, pos=None): + self.flush() + if pos is None: + pos = self.tell() + self.seek(pos) + return self.buffer.truncate() + + def seek(self, cookie, whence=0): + if self.closed: + raise ValueError("tell on closed file") + if not self._seekable: + raise IOError("underlying stream is not seekable") + if whence == 1: # seek relative to current position + if cookie != 0: + raise IOError("can't do nonzero cur-relative seeks") + # Seeking to the current position should attempt to + # sync the underlying buffer with the current position. + whence = 0 + cookie = self.tell() + if whence == 2: # seek relative to end of file + if cookie != 0: + raise IOError("can't do nonzero end-relative seeks") + self.flush() + position = self.buffer.seek(0, 2) + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + return position + if whence != 0: + raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % + (whence,)) + if cookie < 0: + raise ValueError("negative seek position %r" % (cookie,)) + self.flush() + + # The strategy of seek() is to go back to the safe start point + # and replay the effect of read(chars_to_skip) from there. + start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ + self._unpack_cookie(cookie) + + # Seek back to the safe start point. + self.buffer.seek(start_pos) + self._set_decoded_chars('') + self._snapshot = None + + # Restore the decoder to its state from the safe start point. + if self._decoder or dec_flags or chars_to_skip: + self._decoder = self._decoder or self._get_decoder() + self._decoder.setstate((b'', dec_flags)) + self._snapshot = (dec_flags, b'') + + if chars_to_skip: + # Just like _read_chunk, feed the decoder and save a snapshot. + input_chunk = self.buffer.read(bytes_to_feed) + self._set_decoded_chars( + self._decoder.decode(input_chunk, need_eof)) + self._snapshot = (dec_flags, input_chunk) + + # Skip chars_to_skip of the decoded characters. + if len(self._decoded_chars) < chars_to_skip: + raise IOError("can't restore logical file position") + self._decoded_chars_used = chars_to_skip + + return cookie + + def read(self, n=None): + if n is None: + n = -1 + decoder = self._decoder or self._get_decoder() + if n < 0: + # Read everything. + result = (self._get_decoded_chars() + + decoder.decode(self.buffer.read(), final=True)) + self._set_decoded_chars('') + self._snapshot = None + return result + else: + # Keep reading chunks until we have n characters to return. + eof = False + result = self._get_decoded_chars(n) + while len(result) < n and not eof: + eof = not self._read_chunk() + result += self._get_decoded_chars(n - len(result)) + return result + + def __next__(self): + self._telling = False + line = self.readline() + if not line: + self._snapshot = None + self._telling = self._seekable + raise StopIteration + return line + + def readline(self, limit=None): + if self.closed: + raise ValueError("read from closed file") + if limit is None: + limit = -1 + + # Grab all the decoded text (we will rewind any extra bits later). + line = self._get_decoded_chars() + + start = 0 + # Make the decoder if it doesn't already exist. + if not self._decoder: + self._get_decoder() + + pos = endpos = None + while True: + if self._readtranslate: + # Newlines are already translated, only search for \n + pos = line.find('\n', start) + if pos >= 0: + endpos = pos + 1 + break + else: + start = len(line) + + elif self._readuniversal: + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + + # In C we'd look for these in parallel of course. + nlpos = line.find("\n", start) + crpos = line.find("\r", start) + if crpos == -1: + if nlpos == -1: + # Nothing found + start = len(line) + else: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == -1: + # Found lone \r + endpos = crpos + 1 + break + elif nlpos < crpos: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == crpos + 1: + # Found \r\n + endpos = crpos + 2 + break + else: + # Found \r + endpos = crpos + 1 + break + else: + # non-universal + pos = line.find(self._readnl) + if pos >= 0: + endpos = pos + len(self._readnl) + break + + if limit >= 0 and len(line) >= limit: + endpos = limit # reached length limit + break + + # No line ending seen yet - get more data' + while self._read_chunk(): + if self._decoded_chars: + break + if self._decoded_chars: + line += self._get_decoded_chars() + else: + # end of file + self._set_decoded_chars('') + self._snapshot = None + return line + + if limit >= 0 and endpos > limit: + endpos = limit # don't exceed limit + + # Rewind _decoded_chars to just after the line ending we found. + self._rewind_decoded_chars(len(line) - endpos) + return line[:endpos] + + @property + def newlines(self): + return self._decoder.newlines if self._decoder else None + + +class StringIO(TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. + """ + + # XXX This is really slow, but fully functional + + def __init__(self, initial_value="", newline="\n"): + super(StringIO, self).__init__(BytesIO(), + encoding="utf-8", + errors="strict", + newline=newline) + if initial_value: + if not isinstance(initial_value, str): + initial_value = str(initial_value) + self.write(initial_value) + self.seek(0) + + def getvalue(self): + self.flush() + return self.buffer.getvalue().decode(self._encoding, self._errors) diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py index 62e046e..7688ec2 100644 --- a/Lib/importlib/__init__.py +++ b/Lib/importlib/__init__.py @@ -93,12 +93,12 @@ except ImportError: except ImportError: raise ImportError('posix, nt, or os2 module required for importlib') _bootstrap._os = _os -import imp, sys, marshal, errno, _fileio +import imp, sys, marshal, errno, _io _bootstrap.imp = imp _bootstrap.sys = sys _bootstrap.marshal = marshal _bootstrap.errno = errno -_bootstrap._fileio = _fileio +_bootstrap._io = _io import _warnings _bootstrap._warnings = _warnings diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index c61fe2d..c7cdfcf 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -473,7 +473,7 @@ class PyFileLoader(PyLoader): if source_path is None: return None import tokenize - with closing(_fileio._FileIO(source_path, 'r')) as file: + with closing(_io.FileIO(source_path, 'r')) as file: encoding, lines = tokenize.detect_encoding(file.readline) # XXX Will fail when passed to compile() if the encoding is # anything other than UTF-8. @@ -527,7 +527,7 @@ class PyPycFileLoader(PyPycLoader, PyFileLoader): bytecode_path = self.bytecode_path(name) if not bytecode_path: bytecode_path = self._base_path + suffix_list(imp.PY_COMPILED)[0] - file = _fileio._FileIO(bytecode_path, 'w') + file = _io.FileIO(bytecode_path, 'w') try: with closing(file) as bytecode_file: bytecode_file.write(data) @@ -34,9 +34,6 @@ DEFAULT_BUFFER_SIZE """ # New I/O library conforming to PEP 3116. -# This is a prototype; hopefully eventually some of this will be -# reimplemented in C. - # XXX edge cases when switching between reading/writing # XXX need to support 1 meaning line-buffered # XXX whenever an argument is None, use the default value @@ -48,2091 +45,58 @@ DEFAULT_BUFFER_SIZE __author__ = ("Guido van Rossum <guido@python.org>, " "Mike Verdone <mike.verdone@gmail.com>, " - "Mark Russell <mark.russell@zen.co.uk>") + "Mark Russell <mark.russell@zen.co.uk>, " + "Antoine Pitrou <solipsis@pitrou.net>, " + "Amaury Forgeotdarc <amauryfa@gmail.com>") __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO", "BytesIO", "StringIO", "BufferedIOBase", "BufferedReader", "BufferedWriter", "BufferedRWPair", "BufferedRandom", "TextIOBase", "TextIOWrapper"] -import os -import abc -import codecs -import _fileio -# Import _thread instead of threading to reduce startup cost -try: - from _thread import allocate_lock as Lock -except ImportError: - from _dummy_thread import allocate_lock as Lock +import _io +import abc # open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes - - -class BlockingIOError(IOError): - - """Exception raised when I/O would block on a non-blocking I/O stream.""" - - def __init__(self, errno, strerror, characters_written=0): - IOError.__init__(self, errno, strerror) - self.characters_written = characters_written - - -def open(file, mode="r", buffering=None, encoding=None, errors=None, - newline=None, closefd=True): - - r"""Open file and return a stream. Raise IOError upon failure. - - file is either a text or byte string giving the name (and the path - if the file isn't in the current working directory) of the file to - be opened or an integer file descriptor of the file to be - wrapped. (If a file descriptor is given, it is closed when the - returned I/O object is closed, unless closefd is set to False.) - - mode is an optional string that specifies the mode in which the file - is opened. It defaults to 'r' which means open for reading in text - mode. Other common values are 'w' for writing (truncating the file if - it already exists), and 'a' for appending (which on some Unix systems, - means that all writes append to the end of the file regardless of the - current seek position). In text mode, if encoding is not specified the - encoding used is platform dependent. (For reading and writing raw - bytes use binary mode and leave encoding unspecified.) The available - modes are: - - ========= =============================================================== - Character Meaning - --------- --------------------------------------------------------------- - 'r' open for reading (default) - 'w' open for writing, truncating the file first - 'a' open for writing, appending to the end of the file if it exists - 'b' binary mode - 't' text mode (default) - '+' open a disk file for updating (reading and writing) - 'U' universal newline mode (for backwards compatibility; unneeded - for new code) - ========= =============================================================== - - The default mode is 'rt' (open for reading text). For binary random - access, the mode 'w+b' opens and truncates the file to 0 bytes, while - 'r+b' opens the file without truncation. - - Python distinguishes between files opened in binary and text modes, - even when the underlying operating system doesn't. Files opened in - binary mode (appending 'b' to the mode argument) return contents as - bytes objects without any decoding. In text mode (the default, or when - 't' is appended to the mode argument), the contents of the file are - returned as strings, the bytes having been first decoded using a - platform-dependent encoding or using the specified encoding if given. - - buffering is an optional integer used to set the buffering policy. By - default full buffering is on. Pass 0 to switch buffering off (only - allowed in binary mode), 1 to set line buffering, and an integer > 1 - for full buffering. - - encoding is the name of the encoding used to decode or encode the - file. This should only be used in text mode. The default encoding is - platform dependent, but any encoding supported by Python can be - passed. See the codecs module for the list of supported encodings. - - errors is an optional string that specifies how encoding errors are to - be handled---this argument should not be used in binary mode. Pass - 'strict' to raise a ValueError exception if there is an encoding error - (the default of None has the same effect), or pass 'ignore' to ignore - errors. (Note that ignoring encoding errors can lead to data loss.) - See the documentation for codecs.register for a list of the permitted - encoding error strings. - - newline controls how universal newlines works (it only applies to text - mode). It can be None, '', '\n', '\r', and '\r\n'. It works as - follows: - - * On input, if newline is None, universal newlines mode is - enabled. Lines in the input can end in '\n', '\r', or '\r\n', and - these are translated into '\n' before being returned to the - caller. If it is '', universal newline mode is enabled, but line - endings are returned to the caller untranslated. If it has any of - the other legal values, input lines are only terminated by the given - string, and the line ending is returned to the caller untranslated. - - * On output, if newline is None, any '\n' characters written are - translated to the system default line separator, os.linesep. If - newline is '', no translation takes place. If newline is any of the - other legal values, any '\n' characters written are translated to - the given string. - - If closefd is False, the underlying file descriptor will be kept open - when the file is closed. This does not work when a file name is given - and must be True in that case. - - open() returns a file object whose type depends on the mode, and - through which the standard file operations such as reading and writing - are performed. When open() is used to open a file in a text mode ('w', - 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open - a file in a binary mode, the returned class varies: in read binary - mode, it returns a BufferedReader; in write binary and append binary - modes, it returns a BufferedWriter, and in read/write mode, it returns - a BufferedRandom. - - It is also possible to use a string or bytearray as a file for both - reading and writing. For strings StringIO can be used like a file - opened in a text mode, and for bytes a BytesIO can be used like a file - opened in a binary mode. - """ - if not isinstance(file, (str, bytes, int)): - raise TypeError("invalid file: %r" % file) - if not isinstance(mode, str): - raise TypeError("invalid mode: %r" % mode) - if buffering is not None and not isinstance(buffering, int): - raise TypeError("invalid buffering: %r" % buffering) - if encoding is not None and not isinstance(encoding, str): - raise TypeError("invalid encoding: %r" % encoding) - if errors is not None and not isinstance(errors, str): - raise TypeError("invalid errors: %r" % errors) - modes = set(mode) - if modes - set("arwb+tU") or len(mode) > len(modes): - raise ValueError("invalid mode: %r" % mode) - reading = "r" in modes - writing = "w" in modes - appending = "a" in modes - updating = "+" in modes - text = "t" in modes - binary = "b" in modes - if "U" in modes: - if writing or appending: - raise ValueError("can't use U and writing mode at once") - reading = True - if text and binary: - raise ValueError("can't have text and binary mode at once") - if reading + writing + appending > 1: - raise ValueError("can't have read/write/append mode at once") - if not (reading or writing or appending): - raise ValueError("must have exactly one of read/write/append mode") - if binary and encoding is not None: - raise ValueError("binary mode doesn't take an encoding argument") - if binary and errors is not None: - raise ValueError("binary mode doesn't take an errors argument") - if binary and newline is not None: - raise ValueError("binary mode doesn't take a newline argument") - raw = FileIO(file, - (reading and "r" or "") + - (writing and "w" or "") + - (appending and "a" or "") + - (updating and "+" or ""), - closefd) - if buffering is None: - buffering = -1 - line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): - buffering = -1 - line_buffering = True - if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (os.error, AttributeError): - pass - else: - if bs > 1: - buffering = bs - if buffering < 0: - raise ValueError("invalid buffering size") - if buffering == 0: - if binary: - return raw - raise ValueError("can't have unbuffered text I/O") - if updating: - buffer = BufferedRandom(raw, buffering) - elif writing or appending: - buffer = BufferedWriter(raw, buffering) - elif reading: - buffer = BufferedReader(raw, buffering) - else: - raise ValueError("unknown mode: %r" % mode) - if binary: - return buffer - text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) - text.mode = mode - return text - -class _DocDescriptor: - """Helper for builtins.open.__doc__ - """ - def __get__(self, obj, typ): - return ( - "open(file, mode='r', buffering=None, encoding=None, " - "errors=None, newline=None, closefd=True)\n\n" + - open.__doc__) - -class OpenWrapper: - """Wrapper for builtins.open - - Trick so that open won't become a bound method when stored - as a class variable (as dbm.dumb does). - - See initstdio() in Python/pythonrun.c. - """ - __doc__ = _DocDescriptor() - - def __new__(cls, *args, **kwargs): - return open(*args, **kwargs) - - -class UnsupportedOperation(ValueError, IOError): +DEFAULT_BUFFER_SIZE = _io.DEFAULT_BUFFER_SIZE +BlockingIOError = _io.BlockingIOError +UnsupportedOperation = _io.UnsupportedOperation +open = _io.open +OpenWrapper = _io.open + +# Declaring ABCs in C is tricky so we do it here. +# Method descriptions and default implementations are inherited from the C +# version however. +class IOBase(_io._IOBase, metaclass=abc.ABCMeta): pass +class RawIOBase(_io._RawIOBase, IOBase): + pass -class IOBase(metaclass=abc.ABCMeta): - - """The abstract base class for all I/O classes, acting on streams of - bytes. There is no public constructor. - - This class provides dummy implementations for many methods that - derived classes can override selectively; the default implementations - represent a file that cannot be read, written or seeked. - - Even though IOBase does not declare read, readinto, or write because - their signatures will vary, implementations and clients should - consider those methods part of the interface. Also, implementations - may raise a IOError when operations they do not support are called. - - The basic type used for binary data read from or written to a file is - bytes. bytearrays are accepted too, and in some cases (such as - readinto) needed. Text I/O classes work with str data. - - Note that calling any method (even inquiries) on a closed stream is - undefined. Implementations may raise IOError in this case. - - IOBase (and its subclasses) support the iterator protocol, meaning - that an IOBase object can be iterated over yielding the lines in a - stream. - - IOBase also supports the :keyword:`with` statement. In this example, - fp is closed after the suite of the with statment is complete: - - with open('spam.txt', 'r') as fp: - fp.write('Spam and eggs!') - """ - - ### Internal ### - - def _unsupported(self, name: str) -> IOError: - """Internal: raise an exception for unsupported operations.""" - raise UnsupportedOperation("%s.%s() not supported" % - (self.__class__.__name__, name)) - - ### Positioning ### - - def seek(self, pos: int, whence: int = 0) -> int: - """Change stream position. - - Change the stream position to byte offset offset. offset is - interpreted relative to the position indicated by whence. Values - for whence are: - - * 0 -- start of stream (the default); offset should be zero or positive - * 1 -- current stream position; offset may be negative - * 2 -- end of stream; offset is usually negative - - Return the new absolute position. - """ - self._unsupported("seek") - - def tell(self) -> int: - """Return current stream position.""" - self._checkClosed() - return self.seek(0, 1) - - def truncate(self, pos: int = None) -> int: - """Truncate file to size bytes. - - Size defaults to the current IO position as reported by tell(). Return - the new size. - """ - self._unsupported("truncate") - - ### Flush and close ### - - def flush(self) -> None: - """Flush write buffers, if applicable. - - This is not implemented for read-only and non-blocking streams. - """ - # XXX Should this return the number of bytes written??? - if self.__closed: - raise ValueError("I/O operation on closed file.") - - __closed = False - - def close(self) -> None: - """Flush and close the IO object. - - This method has no effect if the file is already closed. - """ - if not self.__closed: - try: - self.flush() - except IOError: - pass # If flush() fails, just give up - self.__closed = True - - def __del__(self) -> None: - """Destructor. Calls close().""" - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass - - ### Inquiries ### - - def seekable(self) -> bool: - """Return whether object supports random access. - - If False, seek(), tell() and truncate() will raise IOError. - This method may need to do a test seek(). - """ - return False - - def _checkSeekable(self, msg=None): - """Internal: raise an IOError if file is not seekable - """ - if not self.seekable(): - raise IOError("File or stream is not seekable." - if msg is None else msg) - - - def readable(self) -> bool: - """Return whether object was opened for reading. - - If False, read() will raise IOError. - """ - return False - - def _checkReadable(self, msg=None): - """Internal: raise an IOError if file is not readable - """ - if not self.readable(): - raise IOError("File or stream is not readable." - if msg is None else msg) - - def writable(self) -> bool: - """Return whether object was opened for writing. - - If False, write() and truncate() will raise IOError. - """ - return False - - def _checkWritable(self, msg=None): - """Internal: raise an IOError if file is not writable - """ - if not self.writable(): - raise IOError("File or stream is not writable." - if msg is None else msg) - - @property - def closed(self): - """closed: bool. True iff the file has been closed. - - For backwards compatibility, this is a property, not a predicate. - """ - return self.__closed - - def _checkClosed(self, msg=None): - """Internal: raise an ValueError if file is closed - """ - if self.closed: - raise ValueError("I/O operation on closed file." - if msg is None else msg) - - ### Context manager ### - - def __enter__(self) -> "IOBase": # That's a forward reference - """Context management protocol. Returns self.""" - self._checkClosed() - return self - - def __exit__(self, *args) -> None: - """Context management protocol. Calls close()""" - self.close() - - ### Lower-level APIs ### - - # XXX Should these be present even if unimplemented? - - def fileno(self) -> int: - """Returns underlying file descriptor if one exists. - - An IOError is raised if the IO object does not use a file descriptor. - """ - self._unsupported("fileno") - - def isatty(self) -> bool: - """Return whether this is an 'interactive' stream. - - Return False if it can't be determined. - """ - self._checkClosed() - return False - - ### Readline[s] and writelines ### - - def readline(self, limit: int = -1) -> bytes: - r"""Read and return a line from the stream. - - If limit is specified, at most limit bytes will be read. - - The line terminator is always b'\n' for binary files; for text - files, the newlines argument to open can be used to select the line - terminator(s) recognized. - """ - # For backwards compatibility, a (slowish) readline(). - if hasattr(self, "peek"): - def nreadahead(): - readahead = self.peek(1) - if not readahead: - return 1 - n = (readahead.find(b"\n") + 1) or len(readahead) - if limit >= 0: - n = min(n, limit) - return n - else: - def nreadahead(): - return 1 - if limit is None: - limit = -1 - res = bytearray() - while limit < 0 or len(res) < limit: - b = self.read(nreadahead()) - if not b: - break - res += b - if res.endswith(b"\n"): - break - return bytes(res) - - def __iter__(self): - self._checkClosed() - return self - - def __next__(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def readlines(self, hint=None): - """Return a list of lines from the stream. - - hint can be specified to control the number of lines read: no more - lines will be read if the total size (in bytes/characters) of all - lines so far exceeds hint. - """ - if hint is None or hint <= 0: - return list(self) - n = 0 - lines = [] - for line in self: - lines.append(line) - n += len(line) - if n >= hint: - break - return lines - - def writelines(self, lines): - self._checkClosed() - for line in lines: - self.write(line) - - -class RawIOBase(IOBase): - - """Base class for raw binary I/O.""" - - # The read() method is implemented by calling readinto(); derived - # classes that want to support read() only need to implement - # readinto() as a primitive operation. In general, readinto() can be - # more efficient than read(). - - # (It would be tempting to also provide an implementation of - # readinto() in terms of read(), in case the latter is a more suitable - # primitive operation, but that would lead to nasty recursion in case - # a subclass doesn't implement either.) - - def read(self, n: int = -1) -> bytes: - """Read and return up to n bytes. - - Returns an empty bytes object on EOF, or None if the object is - set not to block and has no data to read. - """ - self._checkClosed() - if n is None: - n = -1 - if n < 0: - return self.readall() - b = bytearray(n.__index__()) - n = self.readinto(b) - del b[n:] - return bytes(b) - - def readall(self): - """Read until EOF, using multiple read() call.""" - self._checkClosed() - res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break - res += data - return bytes(res) - - def readinto(self, b: bytearray) -> int: - """Read up to len(b) bytes into b. - - Returns number of bytes read (0 for EOF), or None if the object - is set not to block as has no data to read. - """ - self._unsupported("readinto") - - def write(self, b: bytes) -> int: - """Write the given buffer to the IO stream. - - Returns the number of bytes written, which may be less than len(b). - """ - self._unsupported("write") - - -class FileIO(_fileio._FileIO, RawIOBase): - - """Raw I/O implementation for OS files.""" - - # This multiply inherits from _FileIO and RawIOBase to make - # isinstance(io.FileIO(), io.RawIOBase) return True without requiring - # that _fileio._FileIO inherits from io.RawIOBase (which would be hard - # to do since _fileio.c is written in C). - - def __init__(self, name, mode="r", closefd=True): - _fileio._FileIO.__init__(self, name, mode, closefd) - self._name = name - - def close(self): - _fileio._FileIO.close(self) - RawIOBase.close(self) - - @property - def name(self): - return self._name - - -class BufferedIOBase(IOBase): - - """Base class for buffered IO objects. - - The main difference with RawIOBase is that the read() method - supports omitting the size argument, and does not have a default - implementation that defers to readinto(). - - In addition, read(), readinto() and write() may raise - BlockingIOError if the underlying raw stream is in non-blocking - mode and not ready; unlike their raw counterparts, they will never - return None. - - A typical implementation should not inherit from a RawIOBase - implementation, but wrap one. - """ - - def read(self, n: int = None) -> bytes: - """Read and return up to n bytes. - - If the argument is omitted, None, or negative, reads and - returns all data until EOF. - - If the argument is positive, and the underlying raw stream is - not 'interactive', multiple raw reads may be issued to satisfy - the byte count (unless EOF is reached first). But for - interactive raw streams (XXX and for pipes?), at most one raw - read will be issued, and a short result does not imply that - EOF is imminent. - - Returns an empty bytes array on EOF. - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - self._unsupported("read") - - def readinto(self, b: bytearray) -> int: - """Read up to len(b) bytes into b. - - Like read(), this may issue multiple reads to the underlying raw - stream, unless the latter is 'interactive'. - - Returns the number of bytes read (0 for EOF). - - Raises BlockingIOError if the underlying raw stream has no - data at the moment. - """ - # XXX This ought to work with anything that supports the buffer API - self._checkClosed() - data = self.read(len(b)) - n = len(data) - try: - b[:n] = data - except TypeError as err: - import array - if not isinstance(b, array.array): - raise err - b[:n] = array.array('b', data) - return n - - def write(self, b: bytes) -> int: - """Write the given buffer to the IO stream. - - Return the number of bytes written, which is never less than - len(b). - - Raises BlockingIOError if the buffer is full and the - underlying raw stream cannot accept more data at the moment. - """ - self._unsupported("write") - - -class _BufferedIOMixin(BufferedIOBase): - - """A mixin implementation of BufferedIOBase with an underlying raw stream. - - This passes most requests on to the underlying raw stream. It - does *not* provide implementations of read(), readinto() or - write(). - """ - - def __init__(self, raw): - self.raw = raw - - ### Positioning ### - - def seek(self, pos, whence=0): - return self.raw.seek(pos, whence) - - def tell(self): - return self.raw.tell() - - def truncate(self, pos=None): - # Flush the stream. We're mixing buffered I/O with lower-level I/O, - # and a flush may be necessary to synch both views of the current - # file state. - self.flush() - - if pos is None: - pos = self.tell() - # XXX: Should seek() be used, instead of passing the position - # XXX directly to truncate? - return self.raw.truncate(pos) - - ### Flush and close ### - - def flush(self): - self.raw.flush() - - def close(self): - if not self.closed: - try: - self.flush() - except IOError: - pass # If flush() fails, just give up - self.raw.close() - - ### Inquiries ### - - def seekable(self): - return self.raw.seekable() - - def readable(self): - return self.raw.readable() - - def writable(self): - return self.raw.writable() - - @property - def closed(self): - return self.raw.closed - - @property - def name(self): - return self.raw.name - - @property - def mode(self): - return self.raw.mode - - ### Lower-level APIs ### - - def fileno(self): - return self.raw.fileno() - - def isatty(self): - return self.raw.isatty() - - -class _BytesIO(BufferedIOBase): - - """Buffered I/O implementation using an in-memory bytes buffer.""" - - def __init__(self, initial_bytes=None): - buf = bytearray() - if initial_bytes is not None: - buf += initial_bytes - self._buffer = buf - self._pos = 0 - - def getvalue(self): - """Return the bytes value (contents) of the buffer - """ - self._checkClosed() - return bytes(self._buffer) - - def read(self, n=None): - self._checkClosed() - if n is None: - n = -1 - if n < 0: - n = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + n) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) - - def read1(self, n): - """This is the same as read. - """ - return self.read(n) - - def write(self, b): - self._checkClosed() - if isinstance(b, str): - raise TypeError("can't write str to binary stream") - n = len(b) - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n - return n - - def seek(self, pos, whence=0): - self._checkClosed() - try: - pos = pos.__index__() - except AttributeError as err: - raise TypeError("an integer is required") from err - if whence == 0: - if pos < 0: - raise ValueError("negative seek position %r" % (pos,)) - self._pos = pos - elif whence == 1: - self._pos = max(0, self._pos + pos) - elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) - else: - raise ValueError("invalid whence value") - return self._pos - - def tell(self): - self._checkClosed() - return self._pos - - def truncate(self, pos=None): - self._checkClosed() - if pos is None: - pos = self._pos - elif pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] - return self.seek(pos) - - def readable(self): - return True - - def writable(self): - return True - - def seekable(self): - return True - -# Use the faster implementation of BytesIO if available -try: - import _bytesio - - class BytesIO(_bytesio._BytesIO, BufferedIOBase): - __doc__ = _bytesio._BytesIO.__doc__ - -except ImportError: - BytesIO = _BytesIO - - -class BufferedReader(_BufferedIOMixin): - - """BufferedReader(raw[, buffer_size]) - - A buffer for a readable, sequential BaseRawIO object. - - The constructor creates a BufferedReader for the given readable raw - stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE - is used. - """ - - def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): - """Create a new buffered reader using the given readable raw IO object. - """ - raw._checkReadable() - _BufferedIOMixin.__init__(self, raw) - self.buffer_size = buffer_size - self._reset_read_buf() - self._read_lock = Lock() - - def _reset_read_buf(self): - self._read_buf = b"" - self._read_pos = 0 - - def read(self, n=None): - """Read n bytes. - - Returns exactly n bytes of data unless the underlying raw IO - stream reaches EOF or if the call would block in non-blocking - mode. If n is negative, read until EOF or until read() would - block. - """ - self._checkClosed() - with self._read_lock: - return self._read_unlocked(n) - - def _read_unlocked(self, n=None): - nodata_val = b"" - empty_values = (b"", None) - buf = self._read_buf - pos = self._read_pos - - # Special case for when the number of bytes to read is unspecified. - if n is None or n == -1: - self._reset_read_buf() - chunks = [buf[pos:]] # Strip the consumed bytes. - current_size = 0 - while True: - # Read until EOF or until read() would block. - chunk = self.raw.read() - if chunk in empty_values: - nodata_val = chunk - break - current_size += len(chunk) - chunks.append(chunk) - return b"".join(chunks) or nodata_val - - # The number of bytes to read is specified, return at most n bytes. - avail = len(buf) - pos # Length of the available buffered data. - if n <= avail: - # Fast path: the data to read is fully buffered. - self._read_pos += n - return buf[pos:pos+n] - # Slow path: read from the stream until enough bytes are read, - # or until an EOF occurs or until read() would block. - chunks = [buf[pos:]] - wanted = max(self.buffer_size, n) - while avail < n: - chunk = self.raw.read(wanted) - if chunk in empty_values: - nodata_val = chunk - break - avail += len(chunk) - chunks.append(chunk) - # n is more then avail only when an EOF occurred or when - # read() would have blocked. - n = min(n, avail) - out = b"".join(chunks) - self._read_buf = out[n:] # Save the extra data in the buffer. - self._read_pos = 0 - return out[:n] if out else nodata_val - - def peek(self, n=0): - """Returns buffered bytes without advancing the position. - - The argument indicates a desired minimal number of bytes; we - do at most one raw read to satisfy it. We never return more - than self.buffer_size. - """ - self._checkClosed() - with self._read_lock: - return self._peek_unlocked(n) - - def _peek_unlocked(self, n=0): - want = min(n, self.buffer_size) - have = len(self._read_buf) - self._read_pos - if have < want: - to_read = self.buffer_size - have - current = self.raw.read(to_read) - if current: - self._read_buf = self._read_buf[self._read_pos:] + current - self._read_pos = 0 - return self._read_buf[self._read_pos:] - - def read1(self, n): - """Reads up to n bytes, with at most one read() system call.""" - # Returns up to n bytes. If at least one byte is buffered, we - # only return buffered bytes. Otherwise, we do one raw read. - self._checkClosed() - if n <= 0: - return b"" - with self._read_lock: - self._peek_unlocked(1) - return self._read_unlocked( - min(n, len(self._read_buf) - self._read_pos)) - - def tell(self): - self._checkClosed() - return self.raw.tell() - len(self._read_buf) + self._read_pos - - def seek(self, pos, whence=0): - self._checkClosed() - with self._read_lock: - if whence == 1: - pos -= len(self._read_buf) - self._read_pos - pos = self.raw.seek(pos, whence) - self._reset_read_buf() - return pos - - -class BufferedWriter(_BufferedIOMixin): - - """A buffer for a writeable sequential RawIO object. - - The constructor creates a BufferedWriter for the given writeable raw - stream. If the buffer_size is not given, it defaults to - DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to - twice the buffer size. - """ - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - raw._checkWritable() - _BufferedIOMixin.__init__(self, raw) - self.buffer_size = buffer_size - self.max_buffer_size = (2*buffer_size - if max_buffer_size is None - else max_buffer_size) - self._write_buf = bytearray() - self._write_lock = Lock() - - def write(self, b): - self._checkClosed() - if isinstance(b, str): - raise TypeError("can't write str to binary stream") - with self._write_lock: - # XXX we can implement some more tricks to try and avoid - # partial writes - if len(self._write_buf) > self.buffer_size: - # We're full, so let's pre-flush the buffer - try: - self._flush_unlocked() - except BlockingIOError as e: - # We can't accept anything else. - # XXX Why not just let the exception pass through? - raise BlockingIOError(e.errno, e.strerror, 0) - before = len(self._write_buf) - self._write_buf.extend(b) - written = len(self._write_buf) - before - if len(self._write_buf) > self.buffer_size: - try: - self._flush_unlocked() - except BlockingIOError as e: - if len(self._write_buf) > self.max_buffer_size: - # We've hit max_buffer_size. We have to accept a - # partial write and cut back our buffer. - overage = len(self._write_buf) - self.max_buffer_size - self._write_buf = self._write_buf[:self.max_buffer_size] - raise BlockingIOError(e.errno, e.strerror, overage) - return written - - def truncate(self, pos=None): - self._checkClosed() - with self._write_lock: - self._flush_unlocked() - if pos is None: - pos = self.raw.tell() - return self.raw.truncate(pos) - - def flush(self): - self._checkClosed() - with self._write_lock: - self._flush_unlocked() - - def _flush_unlocked(self): - written = 0 - try: - while self._write_buf: - n = self.raw.write(self._write_buf) - del self._write_buf[:n] - written += n - except BlockingIOError as e: - n = e.characters_written - del self._write_buf[:n] - written += n - raise BlockingIOError(e.errno, e.strerror, written) - - def tell(self): - self._checkClosed() - return self.raw.tell() + len(self._write_buf) - - def seek(self, pos, whence=0): - self._checkClosed() - with self._write_lock: - self._flush_unlocked() - return self.raw.seek(pos, whence) - - -class BufferedRWPair(BufferedIOBase): - - """A buffered reader and writer object together. - - A buffered reader object and buffered writer object put together to - form a sequential IO object that can read and write. This is typically - used with a socket or two-way pipe. - - reader and writer are RawIOBase objects that are readable and - writeable respectively. If the buffer_size is omitted it defaults to - DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer) - defaults to twice the buffer size. - """ - - # XXX The usefulness of this (compared to having two separate IO - # objects) is questionable. - - def __init__(self, reader, writer, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - """Constructor. - - The arguments are two RawIO instances. - """ - reader._checkReadable() - writer._checkWritable() - self.reader = BufferedReader(reader, buffer_size) - self.writer = BufferedWriter(writer, buffer_size, max_buffer_size) - - def read(self, n=None): - if n is None: - n = -1 - return self.reader.read(n) - - def readinto(self, b): - return self.reader.readinto(b) - - def write(self, b): - return self.writer.write(b) - - def peek(self, n=0): - return self.reader.peek(n) - - def read1(self, n): - return self.reader.read1(n) - - def readable(self): - return self.reader.readable() - - def writable(self): - return self.writer.writable() - - def flush(self): - return self.writer.flush() - - def close(self): - self.writer.close() - self.reader.close() - - def isatty(self): - return self.reader.isatty() or self.writer.isatty() - - @property - def closed(self): - return self.writer.closed - - -class BufferedRandom(BufferedWriter, BufferedReader): - - """A buffered interface to random access streams. - - The constructor creates a reader and writer for a seekable stream, - raw, given in the first argument. If the buffer_size is omitted it - defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered - writer) defaults to twice the buffer size. - """ - - def __init__(self, raw, - buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): - raw._checkSeekable() - BufferedReader.__init__(self, raw, buffer_size) - BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) - - def seek(self, pos, whence=0): - self.flush() - # First do the raw seek, then empty the read buffer, so that - # if the raw seek fails, we don't lose buffered data forever. - pos = self.raw.seek(pos, whence) - with self._read_lock: - self._reset_read_buf() - return pos - - def tell(self): - self._checkClosed() - if self._write_buf: - return self.raw.tell() + len(self._write_buf) - else: - return BufferedReader.tell(self) - - def truncate(self, pos=None): - if pos is None: - pos = self.tell() - # Use seek to flush the read buffer. - self.seek(pos) - return BufferedWriter.truncate(self) - - def read(self, n=None): - if n is None: - n = -1 - self.flush() - return BufferedReader.read(self, n) - - def readinto(self, b): - self.flush() - return BufferedReader.readinto(self, b) - - def peek(self, n=0): - self.flush() - return BufferedReader.peek(self, n) - - def read1(self, n): - self.flush() - return BufferedReader.read1(self, n) - - def write(self, b): - self._checkClosed() - if self._read_buf: - # Undo readahead - with self._read_lock: - self.raw.seek(self._read_pos - len(self._read_buf), 1) - self._reset_read_buf() - return BufferedWriter.write(self, b) - - -class TextIOBase(IOBase): - - """Base class for text I/O. - - This class provides a character and line based interface to stream - I/O. There is no readinto method because Python's character strings - are immutable. There is no public constructor. - """ - - def read(self, n: int = -1) -> str: - """Read at most n characters from stream. - - Read from underlying buffer until we have n characters or we hit EOF. - If n is negative or omitted, read until EOF. - """ - self._unsupported("read") - - def write(self, s: str) -> int: - """Write string s to stream.""" - self._unsupported("write") - - def truncate(self, pos: int = None) -> int: - """Truncate size to pos.""" - self._unsupported("truncate") - - def readline(self) -> str: - """Read until newline or EOF. - - Returns an empty string if EOF is hit immediately. - """ - self._unsupported("readline") - - @property - def encoding(self): - """Subclasses should override.""" - return None - - @property - def newlines(self): - """Line endings translated so far. - - Only line endings translated during reading are considered. - - Subclasses should override. - """ - return None - - -class IncrementalNewlineDecoder(codecs.IncrementalDecoder): - r"""Codec used when reading a file in universal newlines mode. It wraps - another incremental decoder, translating \r\n and \r into \n. It also - records the types of newlines encountered. When used with - translate=False, it ensures that the newline sequence is returned in - one piece. - """ - def __init__(self, decoder, translate, errors='strict'): - codecs.IncrementalDecoder.__init__(self, errors=errors) - self.translate = translate - self.decoder = decoder - self.seennl = 0 - self.pendingcr = False - - def decode(self, input, final=False): - # decode input (with the eventual \r from a previous pass) - output = self.decoder.decode(input, final=final) - if self.pendingcr and (output or final): - output = "\r" + output - self.pendingcr = False - - # retain last \r even when not translating data: - # then readline() is sure to get \r\n in one pass - if output.endswith("\r") and not final: - output = output[:-1] - self.pendingcr = True - - # Record which newlines are read - crlf = output.count('\r\n') - cr = output.count('\r') - crlf - lf = output.count('\n') - crlf - self.seennl |= (lf and self._LF) | (cr and self._CR) \ - | (crlf and self._CRLF) - - if self.translate: - if crlf: - output = output.replace("\r\n", "\n") - if cr: - output = output.replace("\r", "\n") - - return output - - def getstate(self): - buf, flag = self.decoder.getstate() - flag <<= 1 - if self.pendingcr: - flag |= 1 - return buf, flag - - def setstate(self, state): - buf, flag = state - self.pendingcr = bool(flag & 1) - self.decoder.setstate((buf, flag >> 1)) - - def reset(self): - self.seennl = 0 - self.pendingcr = False - self.decoder.reset() - - _LF = 1 - _CR = 2 - _CRLF = 4 - - @property - def newlines(self): - return (None, - "\n", - "\r", - ("\r", "\n"), - "\r\n", - ("\n", "\r\n"), - ("\r", "\r\n"), - ("\r", "\n", "\r\n") - )[self.seennl] - - -class TextIOWrapper(TextIOBase): - - r"""Character and line based layer over a BufferedIOBase object, buffer. - - encoding gives the name of the encoding that the stream will be - decoded or encoded with. It defaults to locale.getpreferredencoding. - - errors determines the strictness of encoding and decoding (see the - codecs.register) and defaults to "strict". - - newline can be None, '', '\n', '\r', or '\r\n'. It controls the - handling of line endings. If it is None, universal newlines is - enabled. With this enabled, on input, the lines endings '\n', '\r', - or '\r\n' are translated to '\n' before being returned to the - caller. Conversely, on output, '\n' is translated to the system - default line separator, os.linesep. If newline is any other of its - legal values, that newline becomes the newline when the file is read - and it is returned untranslated. On output, '\n' is converted to the - newline. - - If line_buffering is True, a call to flush is implied when a call to - write contains a newline character. - """ - - _CHUNK_SIZE = 2048 - - def __init__(self, buffer, encoding=None, errors=None, newline=None, - line_buffering=False): - if newline not in (None, "", "\n", "\r", "\r\n"): - raise ValueError("illegal newline value: %r" % (newline,)) - if encoding is None: - try: - encoding = os.device_encoding(buffer.fileno()) - except (AttributeError, UnsupportedOperation): - pass - if encoding is None: - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "ascii" - else: - encoding = locale.getpreferredencoding() - - if not isinstance(encoding, str): - raise ValueError("invalid encoding: %r" % encoding) - - if errors is None: - errors = "strict" - else: - if not isinstance(errors, str): - raise ValueError("invalid errors: %r" % errors) - - self.buffer = buffer - self._line_buffering = line_buffering - self._encoding = encoding - self._errors = errors - self._readuniversal = not newline - self._readtranslate = newline is None - self._readnl = newline - self._writetranslate = newline != '' - self._writenl = newline or os.linesep - self._encoder = None - self._decoder = None - self._decoded_chars = '' # buffer for text returned from decoder - self._decoded_chars_used = 0 # offset into _decoded_chars for read() - self._snapshot = None # info for reconstructing decoder state - self._seekable = self._telling = self.buffer.seekable() - - # self._snapshot is either None, or a tuple (dec_flags, next_input) - # where dec_flags is the second (integer) item of the decoder state - # and next_input is the chunk of input bytes that comes next after the - # snapshot point. We use this to reconstruct decoder states in tell(). - - # Naming convention: - # - "bytes_..." for integer variables that count input bytes - # - "chars_..." for integer variables that count decoded characters - - @property - def encoding(self): - return self._encoding - - @property - def errors(self): - return self._errors - - @property - def line_buffering(self): - return self._line_buffering - - def seekable(self): - return self._seekable - - def readable(self): - return self.buffer.readable() - - def writable(self): - return self.buffer.writable() - - def flush(self): - self.buffer.flush() - self._telling = self._seekable - - def close(self): - try: - self.flush() - except: - pass # If flush() fails, just give up - self.buffer.close() - - @property - def closed(self): - return self.buffer.closed - - @property - def name(self): - return self.buffer.name - - def fileno(self): - return self.buffer.fileno() - - def isatty(self): - return self.buffer.isatty() - - def write(self, s: str): - self._checkClosed() - if not isinstance(s, str): - raise TypeError("can't write %s to text stream" % - s.__class__.__name__) - length = len(s) - haslf = (self._writetranslate or self._line_buffering) and "\n" in s - if haslf and self._writetranslate and self._writenl != "\n": - s = s.replace("\n", self._writenl) - encoder = self._encoder or self._get_encoder() - # XXX What if we were just reading? - b = encoder.encode(s) - self.buffer.write(b) - if self._line_buffering and (haslf or "\r" in s): - self.flush() - self._snapshot = None - if self._decoder: - self._decoder.reset() - return length - - def _get_encoder(self): - make_encoder = codecs.getincrementalencoder(self._encoding) - self._encoder = make_encoder(self._errors) - return self._encoder - - def _get_decoder(self): - make_decoder = codecs.getincrementaldecoder(self._encoding) - decoder = make_decoder(self._errors) - if self._readuniversal: - decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) - self._decoder = decoder - return decoder - - # The following three methods implement an ADT for _decoded_chars. - # Text returned from the decoder is buffered here until the client - # requests it by calling our read() or readline() method. - def _set_decoded_chars(self, chars): - """Set the _decoded_chars buffer.""" - self._decoded_chars = chars - self._decoded_chars_used = 0 - - def _get_decoded_chars(self, n=None): - """Advance into the _decoded_chars buffer.""" - offset = self._decoded_chars_used - if n is None: - chars = self._decoded_chars[offset:] - else: - chars = self._decoded_chars[offset:offset + n] - self._decoded_chars_used += len(chars) - return chars - - def _rewind_decoded_chars(self, n): - """Rewind the _decoded_chars buffer.""" - if self._decoded_chars_used < n: - raise AssertionError("rewind decoded_chars out of bounds") - self._decoded_chars_used -= n - - def _read_chunk(self): - """ - Read and decode the next chunk of data from the BufferedReader. - """ - - # The return value is True unless EOF was reached. The decoded - # string is placed in self._decoded_chars (replacing its previous - # value). The entire input chunk is sent to the decoder, though - # some of it may remain buffered in the decoder, yet to be - # converted. - - if self._decoder is None: - raise ValueError("no decoder") - - if self._telling: - # To prepare for tell(), we need to snapshot a point in the - # file where the decoder's input buffer is empty. - - dec_buffer, dec_flags = self._decoder.getstate() - # Given this, we know there was a valid snapshot point - # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). - - # Read a chunk, decode it, and put the result in self._decoded_chars. - input_chunk = self.buffer.read1(self._CHUNK_SIZE) - eof = not input_chunk - self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) - - if self._telling: - # At the snapshot point, len(dec_buffer) bytes before the read, - # the next input to be decoded is dec_buffer + input_chunk. - self._snapshot = (dec_flags, dec_buffer + input_chunk) - - return not eof - - def _pack_cookie(self, position, dec_flags=0, - bytes_to_feed=0, need_eof=0, chars_to_skip=0): - # The meaning of a tell() cookie is: seek to position, set the - # decoder flags to dec_flags, read bytes_to_feed bytes, feed them - # into the decoder with need_eof as the EOF flag, then skip - # chars_to_skip characters of the decoded result. For most simple - # decoders, tell() will often just give a byte offset in the file. - return (position | (dec_flags<<64) | (bytes_to_feed<<128) | - (chars_to_skip<<192) | bool(need_eof)<<256) - - def _unpack_cookie(self, bigint): - rest, position = divmod(bigint, 1<<64) - rest, dec_flags = divmod(rest, 1<<64) - rest, bytes_to_feed = divmod(rest, 1<<64) - need_eof, chars_to_skip = divmod(rest, 1<<64) - return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip - - def tell(self): - self._checkClosed() - if not self._seekable: - raise IOError("underlying stream is not seekable") - if not self._telling: - raise IOError("telling position disabled by next() call") - self.flush() - position = self.buffer.tell() - decoder = self._decoder - if decoder is None or self._snapshot is None: - if self._decoded_chars: - # This should never happen. - raise AssertionError("pending decoded text") - return position - - # Skip backward to the snapshot point (see _read_chunk). - dec_flags, next_input = self._snapshot - position -= len(next_input) - - # How many decoded characters have been used up since the snapshot? - chars_to_skip = self._decoded_chars_used - if chars_to_skip == 0: - # We haven't moved from the snapshot point. - return self._pack_cookie(position, dec_flags) - - # Starting from the snapshot position, we will walk the decoder - # forward until it gives us enough decoded characters. - saved_state = decoder.getstate() - try: - # Note our initial start point. - decoder.setstate((b'', dec_flags)) - start_pos = position - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - need_eof = 0 - - # Feed the decoder one byte at a time. As we go, note the - # nearest "safe start point" before the current location - # (a point where the decoder has nothing buffered, so seek() - # can safely start from there and advance to this location). - next_byte = bytearray(1) - for next_byte[0] in next_input: - bytes_fed += 1 - chars_decoded += len(decoder.decode(next_byte)) - dec_buffer, dec_flags = decoder.getstate() - if not dec_buffer and chars_decoded <= chars_to_skip: - # Decoder buffer is empty, so this is a safe start point. - start_pos += bytes_fed - chars_to_skip -= chars_decoded - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - if chars_decoded >= chars_to_skip: - break - else: - # We didn't get enough decoded data; signal EOF to get more. - chars_decoded += len(decoder.decode(b'', final=True)) - need_eof = 1 - if chars_decoded < chars_to_skip: - raise IOError("can't reconstruct logical file position") - - # The returned cookie corresponds to the last safe start point. - return self._pack_cookie( - start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) - finally: - decoder.setstate(saved_state) - - def truncate(self, pos=None): - self.flush() - if pos is None: - pos = self.tell() - self.seek(pos) - return self.buffer.truncate() - - def seek(self, cookie, whence=0): - self._checkClosed() - if not self._seekable: - raise IOError("underlying stream is not seekable") - if whence == 1: # seek relative to current position - if cookie != 0: - raise IOError("can't do nonzero cur-relative seeks") - # Seeking to the current position should attempt to - # sync the underlying buffer with the current position. - whence = 0 - cookie = self.tell() - if whence == 2: # seek relative to end of file - if cookie != 0: - raise IOError("can't do nonzero end-relative seeks") - self.flush() - position = self.buffer.seek(0, 2) - self._set_decoded_chars('') - self._snapshot = None - if self._decoder: - self._decoder.reset() - return position - if whence != 0: - raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % - (whence,)) - if cookie < 0: - raise ValueError("negative seek position %r" % (cookie,)) - self.flush() - - # The strategy of seek() is to go back to the safe start point - # and replay the effect of read(chars_to_skip) from there. - start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ - self._unpack_cookie(cookie) - - # Seek back to the safe start point. - self.buffer.seek(start_pos) - self._set_decoded_chars('') - self._snapshot = None - - # Restore the decoder to its state from the safe start point. - if self._decoder or dec_flags or chars_to_skip: - self._decoder = self._decoder or self._get_decoder() - self._decoder.setstate((b'', dec_flags)) - self._snapshot = (dec_flags, b'') - - if chars_to_skip: - # Just like _read_chunk, feed the decoder and save a snapshot. - input_chunk = self.buffer.read(bytes_to_feed) - self._set_decoded_chars( - self._decoder.decode(input_chunk, need_eof)) - self._snapshot = (dec_flags, input_chunk) - - # Skip chars_to_skip of the decoded characters. - if len(self._decoded_chars) < chars_to_skip: - raise IOError("can't restore logical file position") - self._decoded_chars_used = chars_to_skip - - return cookie - - def read(self, n=None): - self._checkClosed() - if n is None: - n = -1 - decoder = self._decoder or self._get_decoder() - if n < 0: - # Read everything. - result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) - self._set_decoded_chars('') - self._snapshot = None - return result - else: - # Keep reading chunks until we have n characters to return. - eof = False - result = self._get_decoded_chars(n) - while len(result) < n and not eof: - eof = not self._read_chunk() - result += self._get_decoded_chars(n - len(result)) - return result - - def __next__(self): - self._checkClosed() - self._telling = False - line = self.readline() - if not line: - self._snapshot = None - self._telling = self._seekable - raise StopIteration - return line - - def readline(self, limit=None): - self._checkClosed() - if limit is None: - limit = -1 - - # Grab all the decoded text (we will rewind any extra bits later). - line = self._get_decoded_chars() - - start = 0 - decoder = self._decoder or self._get_decoder() - - pos = endpos = None - while True: - if self._readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start) - if pos >= 0: - endpos = pos + 1 - break - else: - start = len(line) - - elif self._readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - - # In C we'd look for these in parallel of course. - nlpos = line.find("\n", start) - crpos = line.find("\r", start) - if crpos == -1: - if nlpos == -1: - # Nothing found - start = len(line) - else: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == -1: - # Found lone \r - endpos = crpos + 1 - break - elif nlpos < crpos: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == crpos + 1: - # Found \r\n - endpos = crpos + 2 - break - else: - # Found \r - endpos = crpos + 1 - break - else: - # non-universal - pos = line.find(self._readnl) - if pos >= 0: - endpos = pos + len(self._readnl) - break - - if limit >= 0 and len(line) >= limit: - endpos = limit # reached length limit - break - - # No line ending seen yet - get more data - more_line = '' - while self._read_chunk(): - if self._decoded_chars: - break - if self._decoded_chars: - line += self._get_decoded_chars() - else: - # end of file - self._set_decoded_chars('') - self._snapshot = None - return line - - if limit >= 0 and endpos > limit: - endpos = limit # don't exceed limit - - # Rewind _decoded_chars to just after the line ending we found. - self._rewind_decoded_chars(len(line) - endpos) - return line[:endpos] - - @property - def newlines(self): - return self._decoder.newlines if self._decoder else None - -class _StringIO(TextIOWrapper): - """Text I/O implementation using an in-memory buffer. - - The initial_value argument sets the value of object. The newline - argument is like the one of TextIOWrapper's constructor. - """ - - # XXX This is really slow, but fully functional - - def __init__(self, initial_value="", newline="\n"): - super(_StringIO, self).__init__(BytesIO(), - encoding="utf-8", - errors="strict", - newline=newline) - if initial_value: - if not isinstance(initial_value, str): - initial_value = str(initial_value) - self.write(initial_value) - self.seek(0) - - def getvalue(self): - self.flush() - return self.buffer.getvalue().decode(self._encoding, self._errors) - -try: - import _stringio - - # This subclass is a reimplementation of the TextIOWrapper - # interface without any of its text decoding facilities. All the - # stored data is manipulated with the efficient - # _stringio._StringIO extension type. Also, the newline decoding - # mechanism of IncrementalNewlineDecoder is reimplemented here for - # efficiency. Doing otherwise, would require us to implement a - # fake decoder which would add an additional and unnecessary layer - # on top of the _StringIO methods. - - class StringIO(_stringio._StringIO, TextIOBase): - """Text I/O implementation using an in-memory buffer. - - The initial_value argument sets the value of object. The newline - argument is like the one of TextIOWrapper's constructor. - """ - - _CHUNK_SIZE = 4096 - - def __init__(self, initial_value="", newline="\n"): - if newline not in (None, "", "\n", "\r", "\r\n"): - raise ValueError("illegal newline value: %r" % (newline,)) - - self._readuniversal = not newline - self._readtranslate = newline is None - self._readnl = newline - self._writetranslate = newline != "" - self._writenl = newline or os.linesep - self._pending = "" - self._seennl = 0 - - # Reset the buffer first, in case __init__ is called - # multiple times. - self.truncate(0) - if initial_value is None: - initial_value = "" - self.write(initial_value) - self.seek(0) - - @property - def buffer(self): - raise UnsupportedOperation("%s.buffer attribute is unsupported" % - self.__class__.__name__) - - # XXX Cruft to support the TextIOWrapper API. This would only - # be meaningful if StringIO supported the buffer attribute. - # Hopefully, a better solution, than adding these pseudo-attributes, - # will be found. - @property - def encoding(self): - return "utf-8" - - @property - def errors(self): - return "strict" - - @property - def line_buffering(self): - return False - - def _decode_newlines(self, input, final=False): - # decode input (with the eventual \r from a previous pass) - if self._pending: - input = self._pending + input - - # retain last \r even when not translating data: - # then readline() is sure to get \r\n in one pass - if input.endswith("\r") and not final: - input = input[:-1] - self._pending = "\r" - else: - self._pending = "" - - # Record which newlines are read - crlf = input.count('\r\n') - cr = input.count('\r') - crlf - lf = input.count('\n') - crlf - self._seennl |= (lf and self._LF) | (cr and self._CR) \ - | (crlf and self._CRLF) - - if self._readtranslate: - if crlf: - output = input.replace("\r\n", "\n") - if cr: - output = input.replace("\r", "\n") - else: - output = input - - return output - - def writable(self): - return True - - def readable(self): - return True - - def seekable(self): - return True - - _read = _stringio._StringIO.read - _write = _stringio._StringIO.write - _tell = _stringio._StringIO.tell - _seek = _stringio._StringIO.seek - _truncate = _stringio._StringIO.truncate - _getvalue = _stringio._StringIO.getvalue - - def getvalue(self) -> str: - """Retrieve the entire contents of the object.""" - self._checkClosed() - return self._getvalue() - - def write(self, s: str) -> int: - """Write string s to file. - - Returns the number of characters written. - """ - self._checkClosed() - if not isinstance(s, str): - raise TypeError("can't write %s to text stream" % - s.__class__.__name__) - length = len(s) - if self._writetranslate and self._writenl != "\n": - s = s.replace("\n", self._writenl) - self._pending = "" - self._write(s) - return length - - def read(self, n: int = None) -> str: - """Read at most n characters, returned as a string. - - If the argument is negative or omitted, read until EOF - is reached. Return an empty string at EOF. - """ - self._checkClosed() - if n is None: - n = -1 - res = self._pending - if n < 0: - res += self._decode_newlines(self._read(), True) - self._pending = "" - return res - else: - res = self._decode_newlines(self._read(n), True) - self._pending = res[n:] - return res[:n] - - def tell(self) -> int: - """Tell the current file position.""" - self._checkClosed() - if self._pending: - return self._tell() - len(self._pending) - else: - return self._tell() - - def seek(self, pos: int = None, whence: int = 0) -> int: - """Change stream position. - - Seek to character offset pos relative to position indicated by whence: - 0 Start of stream (the default). pos should be >= 0; - 1 Current position - pos must be 0; - 2 End of stream - pos must be 0. - Returns the new absolute position. - """ - self._checkClosed() - self._pending = "" - return self._seek(pos, whence) - - def truncate(self, pos: int = None) -> int: - """Truncate size to pos. - - The pos argument defaults to the current file position, as - returned by tell(). Imply an absolute seek to pos. - Returns the new absolute position. - """ - self._checkClosed() - self._pending = "" - return self._truncate(pos) - - def readline(self, limit: int = None) -> str: - self._checkClosed() - if limit is None: - limit = -1 - if limit >= 0: - # XXX: Hack to support limit argument, for backwards - # XXX compatibility - line = self.readline() - if len(line) <= limit: - return line - line, self._pending = line[:limit], line[limit:] + self._pending - return line - - line = self._pending - self._pending = "" - - start = 0 - pos = endpos = None - while True: - if self._readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start) - if pos >= 0: - endpos = pos + 1 - break - else: - start = len(line) - - elif self._readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - - # In C we'd look for these in parallel of course. - nlpos = line.find("\n", start) - crpos = line.find("\r", start) - if crpos == -1: - if nlpos == -1: - # Nothing found - start = len(line) - else: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == -1: - # Found lone \r - endpos = crpos + 1 - break - elif nlpos < crpos: - # Found \n - endpos = nlpos + 1 - break - elif nlpos == crpos + 1: - # Found \r\n - endpos = crpos + 2 - break - else: - # Found \r - endpos = crpos + 1 - break - else: - # non-universal - pos = line.find(self._readnl) - if pos >= 0: - endpos = pos + len(self._readnl) - break - - # No line ending seen yet - get more data - more_line = self.read(self._CHUNK_SIZE) - if more_line: - line += more_line - else: - # end of file - return line - - self._pending = line[endpos:] - return line[:endpos] - - _LF = 1 - _CR = 2 - _CRLF = 4 - - @property - def newlines(self): - return (None, - "\n", - "\r", - ("\r", "\n"), - "\r\n", - ("\n", "\r\n"), - ("\r", "\r\n"), - ("\r", "\n", "\r\n") - )[self._seennl] +class BufferedIOBase(_io._BufferedIOBase, IOBase): + pass +class TextIOBase(_io._TextIOBase, IOBase): + pass -except ImportError: - StringIO = _StringIO +FileIO = _io.FileIO +BytesIO = _io.BytesIO +StringIO = _io.StringIO +BufferedReader = _io.BufferedReader +BufferedWriter = _io.BufferedWriter +BufferedRWPair = _io.BufferedRWPair +BufferedRandom = _io.BufferedRandom +IncrementalNewlineDecoder = _io.IncrementalNewlineDecoder +TextIOWrapper = _io.TextIOWrapper + +RawIOBase.register(FileIO) + +BufferedIOBase.register(BytesIO) +BufferedIOBase.register(BufferedReader) +BufferedIOBase.register(BufferedWriter) +BufferedIOBase.register(BufferedRandom) +BufferedIOBase.register(BufferedRWPair) + +TextIOBase.register(StringIO) +TextIOBase.register(TextIOWrapper) diff --git a/Lib/test/test_bufio.py b/Lib/test/test_bufio.py index e65951d..7d617d3 100644 --- a/Lib/test/test_bufio.py +++ b/Lib/test/test_bufio.py @@ -1,9 +1,12 @@ import unittest from test import support -# Simple test to ensure that optimizations in fileobject.c deliver -# the expected results. For best testing, run this under a debug-build -# Python too (to exercise asserts in the C code). +import io # C implementation. +import _pyio as pyio # Python implementation. + +# Simple test to ensure that optimizations in the IO library deliver the +# expected results. For best testing, run this under a debug-build Python too +# (to exercise asserts in the C code). lengths = list(range(1, 257)) + [512, 1000, 1024, 2048, 4096, 8192, 10000, 16384, 32768, 65536, 1000000] @@ -18,7 +21,7 @@ class BufferSizeTest(unittest.TestCase): # Since C doesn't guarantee we can write/read arbitrary bytes in text # files, use binary mode. - f = open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") try: # write once with \n and once without f.write(s) @@ -58,8 +61,16 @@ class BufferSizeTest(unittest.TestCase): def test_nullpat(self): self.drive_one(bytes(1000)) + +class CBufferSizeTest(BufferSizeTest): + open = io.open + +class PyBufferSizeTest(BufferSizeTest): + open = staticmethod(pyio.open) + + def test_main(): - support.run_unittest(BufferSizeTest) + support.run_unittest(CBufferSizeTest, PyBufferSizeTest) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 8d43d7a..0554fc2 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -2605,10 +2605,10 @@ order (MRO) for bases """ def test_descrdoc(self): # Testing descriptor doc strings... - from _fileio import _FileIO + from _io import FileIO def check(descr, what): self.assertEqual(descr.__doc__, what) - check(_FileIO.closed, "True if the file is closed") # getset descriptor + check(FileIO.closed, "True if the file is closed") # getset descriptor check(complex.real, "the real part of a complex number") # member descriptor def test_doc_descriptor(self): diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index 7a68b2f..3360121 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -4,6 +4,9 @@ import unittest from array import array from weakref import proxy +import io +import _pyio as pyio + from test.support import TESTFN, findfile, run_unittest from collections import UserList @@ -11,7 +14,7 @@ class AutoFileTests(unittest.TestCase): # file tests for which a test file is automatically set up def setUp(self): - self.f = open(TESTFN, 'wb') + self.f = self.open(TESTFN, 'wb') def tearDown(self): if self.f: @@ -39,7 +42,7 @@ class AutoFileTests(unittest.TestCase): self.f.write(b'12') self.f.close() a = array('b', b'x'*10) - self.f = open(TESTFN, 'rb') + self.f = self.open(TESTFN, 'rb') n = self.f.readinto(a) self.assertEquals(b'12', a.tostring()[:n]) @@ -47,7 +50,7 @@ class AutoFileTests(unittest.TestCase): # verify readinto refuses text files a = array('b', b'x'*10) self.f.close() - self.f = open(TESTFN, 'r') + self.f = self.open(TESTFN, 'r') if hasattr(self.f, "readinto"): self.assertRaises(TypeError, self.f.readinto, a) @@ -56,7 +59,7 @@ class AutoFileTests(unittest.TestCase): l = UserList([b'1', b'2']) self.f.writelines(l) self.f.close() - self.f = open(TESTFN, 'rb') + self.f = self.open(TESTFN, 'rb') buf = self.f.read() self.assertEquals(buf, b'12') @@ -126,13 +129,20 @@ class AutoFileTests(unittest.TestCase): def testReadWhenWriting(self): self.assertRaises(IOError, self.f.read) +class CAutoFileTests(AutoFileTests): + open = io.open + +class PyAutoFileTests(AutoFileTests): + open = staticmethod(pyio.open) + + class OtherFileTests(unittest.TestCase): def testModeStrings(self): # check invalid mode strings for mode in ("", "aU", "wU+"): try: - f = open(TESTFN, mode) + f = self.open(TESTFN, mode) except ValueError: pass else: @@ -153,7 +163,7 @@ class OtherFileTests(unittest.TestCase): # verify that we get a sensible error message for bad mode argument bad_mode = "qwerty" try: - f = open(TESTFN, bad_mode) + f = self.open(TESTFN, bad_mode) except ValueError as msg: if msg.args[0] != 0: s = str(msg) @@ -170,11 +180,11 @@ class OtherFileTests(unittest.TestCase): # misbehaviour especially with repeated close() calls for s in (-1, 0, 1, 512): try: - f = open(TESTFN, 'wb', s) + f = self.open(TESTFN, 'wb', s) f.write(str(s).encode("ascii")) f.close() f.close() - f = open(TESTFN, 'rb', s) + f = self.open(TESTFN, 'rb', s) d = int(f.read().decode("ascii")) f.close() f.close() @@ -187,13 +197,13 @@ class OtherFileTests(unittest.TestCase): # "file.truncate fault on windows" os.unlink(TESTFN) - f = open(TESTFN, 'wb') + f = self.open(TESTFN, 'wb') try: f.write(b'12345678901') # 11 bytes f.close() - f = open(TESTFN,'rb+') + f = self.open(TESTFN,'rb+') data = f.read(5) if data != b'12345': self.fail("Read on file opened for update failed %r" % data) @@ -233,13 +243,13 @@ class OtherFileTests(unittest.TestCase): try: # Prepare the testfile - bag = open(TESTFN, "wb") + bag = self.open(TESTFN, "wb") bag.write(filler * nchunks) bag.writelines(testlines) bag.close() # Test for appropriate errors mixing read* and iteration for methodname, args in methods: - f = open(TESTFN, 'rb') + f = self.open(TESTFN, 'rb') if next(f) != filler: self.fail, "Broken testfile" meth = getattr(f, methodname) @@ -253,7 +263,7 @@ class OtherFileTests(unittest.TestCase): # ("h", "a", "m", "\n"), so 4096 lines of that should get us # exactly on the buffer boundary for any power-of-2 buffersize # between 4 and 16384 (inclusive). - f = open(TESTFN, 'rb') + f = self.open(TESTFN, 'rb') for i in range(nchunks): next(f) testline = testlines.pop(0) @@ -295,7 +305,7 @@ class OtherFileTests(unittest.TestCase): self.fail("readlines() after next() with empty buffer " "failed. Got %r, expected %r" % (line, testline)) # Reading after iteration hit EOF shouldn't hurt either - f = open(TESTFN, 'rb') + f = self.open(TESTFN, 'rb') try: for line in f: pass @@ -311,12 +321,19 @@ class OtherFileTests(unittest.TestCase): finally: os.unlink(TESTFN) +class COtherFileTests(OtherFileTests): + open = io.open + +class PyOtherFileTests(OtherFileTests): + open = staticmethod(pyio.open) + def test_main(): # Historically, these tests have been sloppy about removing TESTFN. # So get rid of it no matter what. try: - run_unittest(AutoFileTests, OtherFileTests) + run_unittest(CAutoFileTests, PyAutoFileTests, + COtherFileTests, PyOtherFileTests) finally: if os.path.exists(TESTFN): os.unlink(TESTFN) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 497914f..615361e 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -10,13 +10,13 @@ from test.support import (TESTFN, findfile, check_warnings, run_unittest, make_bad_fd) from collections import UserList -import _fileio +from _io import FileIO as _FileIO class AutoFileTests(unittest.TestCase): # file tests for which a test file is automatically set up def setUp(self): - self.f = _fileio._FileIO(TESTFN, 'w') + self.f = _FileIO(TESTFN, 'w') def tearDown(self): if self.f: @@ -63,13 +63,13 @@ class AutoFileTests(unittest.TestCase): self.f.write(bytes([1, 2])) self.f.close() a = array('b', b'x'*10) - self.f = _fileio._FileIO(TESTFN, 'r') + self.f = _FileIO(TESTFN, 'r') n = self.f.readinto(a) self.assertEquals(array('b', [1, 2]), a[:n]) def testRepr(self): self.assertEquals(repr(self.f), - "_fileio._FileIO(%d, %s)" % (self.f.fileno(), + "io.FileIO(%d, %s)" % (self.f.fileno(), repr(self.f.mode))) def testErrors(self): @@ -80,7 +80,7 @@ class AutoFileTests(unittest.TestCase): self.assertRaises(ValueError, f.read, 10) # Open for reading f.close() self.assert_(f.closed) - f = _fileio._FileIO(TESTFN, 'r') + f = _FileIO(TESTFN, 'r') self.assertRaises(TypeError, f.readinto, "") self.assert_(not f.closed) f.close() @@ -106,7 +106,7 @@ class AutoFileTests(unittest.TestCase): # Windows always returns "[Errno 13]: Permission denied # Unix calls dircheck() and returns "[Errno 21]: Is a directory" try: - _fileio._FileIO('.', 'r') + _FileIO('.', 'r') except IOError as e: self.assertNotEqual(e.errno, 0) self.assertEqual(e.filename, ".") @@ -118,19 +118,19 @@ class OtherFileTests(unittest.TestCase): def testAbles(self): try: - f = _fileio._FileIO(TESTFN, "w") + f = _FileIO(TESTFN, "w") self.assertEquals(f.readable(), False) self.assertEquals(f.writable(), True) self.assertEquals(f.seekable(), True) f.close() - f = _fileio._FileIO(TESTFN, "r") + f = _FileIO(TESTFN, "r") self.assertEquals(f.readable(), True) self.assertEquals(f.writable(), False) self.assertEquals(f.seekable(), True) f.close() - f = _fileio._FileIO(TESTFN, "a+") + f = _FileIO(TESTFN, "a+") self.assertEquals(f.readable(), True) self.assertEquals(f.writable(), True) self.assertEquals(f.seekable(), True) @@ -139,14 +139,14 @@ class OtherFileTests(unittest.TestCase): if sys.platform != "win32": try: - f = _fileio._FileIO("/dev/tty", "a") + f = _FileIO("/dev/tty", "a") except EnvironmentError: # When run in a cron job there just aren't any # ttys, so skip the test. This also handles other # OS'es that don't support /dev/tty. pass else: - f = _fileio._FileIO("/dev/tty", "a") + f = _FileIO("/dev/tty", "a") self.assertEquals(f.readable(), False) self.assertEquals(f.writable(), True) if sys.platform != "darwin" and \ @@ -163,7 +163,7 @@ class OtherFileTests(unittest.TestCase): # check invalid mode strings for mode in ("", "aU", "wU+", "rw", "rt"): try: - f = _fileio._FileIO(TESTFN, mode) + f = _FileIO(TESTFN, mode) except ValueError: pass else: @@ -172,10 +172,26 @@ class OtherFileTests(unittest.TestCase): def testUnicodeOpen(self): # verify repr works for unicode too - f = _fileio._FileIO(str(TESTFN), "w") + f = _FileIO(str(TESTFN), "w") f.close() os.unlink(TESTFN) + def testBytesOpen(self): + # Opening a bytes filename + try: + fn = TESTFN.encode("ascii") + except UnicodeEncodeError: + # Skip test + return + f = _FileIO(fn, "w") + try: + f.write(b"abc") + f.close() + with open(TESTFN, "rb") as f: + self.assertEquals(f.read(), b"abc") + finally: + os.unlink(TESTFN) + def testInvalidFd(self): self.assertRaises(ValueError, _fileio._FileIO, -10) self.assertRaises(OSError, _fileio._FileIO, make_bad_fd()) @@ -184,7 +200,7 @@ class OtherFileTests(unittest.TestCase): # verify that we get a sensible error message for bad mode argument bad_mode = "qwerty" try: - f = _fileio._FileIO(TESTFN, bad_mode) + f = _FileIO(TESTFN, bad_mode) except ValueError as msg: if msg.args[0] != 0: s = str(msg) @@ -200,11 +216,11 @@ class OtherFileTests(unittest.TestCase): def bug801631(): # SF bug <http://www.python.org/sf/801631> # "file.truncate fault on windows" - f = _fileio._FileIO(TESTFN, 'w') + f = _FileIO(TESTFN, 'w') f.write(bytes(range(11))) f.close() - f = _fileio._FileIO(TESTFN,'r+') + f = _FileIO(TESTFN,'r+') data = f.read(5) if data != bytes(range(5)): self.fail("Read on file opened for update failed %r" % data) @@ -244,14 +260,14 @@ class OtherFileTests(unittest.TestCase): pass def testInvalidInit(self): - self.assertRaises(TypeError, _fileio._FileIO, "1", 0, 0) + self.assertRaises(TypeError, _FileIO, "1", 0, 0) def testWarnings(self): with check_warnings() as w: self.assertEqual(w.warnings, []) - self.assertRaises(TypeError, _fileio._FileIO, []) + self.assertRaises(TypeError, _FileIO, []) self.assertEqual(w.warnings, []) - self.assertRaises(ValueError, _fileio._FileIO, "/some/invalid/name", "rt") + self.assertRaises(ValueError, _FileIO, "/some/invalid/name", "rt") self.assertEqual(w.warnings, []) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 30869ac..58f33a2 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1,4 +1,23 @@ -"""Unit tests for io.py.""" +"""Unit tests for the io module.""" + +# Tests of io are scattered over the test suite: +# * test_bufio - tests file buffering +# * test_memoryio - tests BytesIO and StringIO +# * test_fileio - tests FileIO +# * test_file - tests the file interface +# * test_io - tests everything else in the io module +# * test_univnewlines - tests universal newline support +# * test_largefile - tests operations on a file greater than 2**32 bytes +# (only enabled with -ulargefile) + +################################################################################ +# ATTENTION TEST WRITERS!!! +################################################################################ +# When writing tests for io, it's important to test both the C and Python +# implementations. This is usually done by writing a base test that refers to +# the type it is testing as a attribute. Then it provides custom subclasses to +# test both implementations. This file has lots of examples. +################################################################################ import os import sys @@ -7,27 +26,40 @@ import array import threading import random import unittest -from itertools import chain, cycle +import weakref +import gc +import abc +from itertools import chain, cycle, count +from collections import deque from test import support import codecs -import io # The module under test +import io # C implementation of io +import _pyio as pyio # Python implementation of io -class MockRawIO(io.RawIOBase): +def _default_chunk_size(): + """Get the default TextIOWrapper chunk size""" + with open(__file__, "r", encoding="latin1") as f: + return f._CHUNK_SIZE + + +class MockRawIO: def __init__(self, read_stack=()): self._read_stack = list(read_stack) self._write_stack = [] + self._reads = 0 def read(self, n=None): + self._reads += 1 try: return self._read_stack.pop(0) except: return b"" def write(self, b): - self._write_stack.append(b[:]) + self._write_stack.append(bytes(b)) return len(b) def writable(self): @@ -43,41 +75,148 @@ class MockRawIO(io.RawIOBase): return True def seek(self, pos, whence): - pass + return 0 # wrong but we gotta return something def tell(self): - return 42 + return 0 # same comment as above + + def readinto(self, buf): + self._reads += 1 + max_len = len(buf) + try: + data = self._read_stack[0] + except IndexError: + return 0 + if data is None: + del self._read_stack[0] + return None + n = len(data) + if len(data) <= max_len: + del self._read_stack[0] + buf[:n] = data + return n + else: + buf[:] = data[:max_len] + self._read_stack[0] = data[max_len:] + return max_len + + def truncate(self, pos=None): + return pos + +class CMockRawIO(MockRawIO, io.RawIOBase): + pass + +class PyMockRawIO(MockRawIO, pyio.RawIOBase): + pass + + +class MisbehavedRawIO(MockRawIO): + def write(self, b): + return super().write(b) * 2 + + def read(self, n=None): + return super().read(n) * 2 + + def seek(self, pos, whence): + return -123 + + def tell(self): + return -456 + + def readinto(self, buf): + super().readinto(buf) + return len(buf) * 5 + +class CMisbehavedRawIO(MisbehavedRawIO, io.RawIOBase): + pass + +class PyMisbehavedRawIO(MisbehavedRawIO, pyio.RawIOBase): + pass + + +class CloseFailureIO(MockRawIO): + closed = 0 + + def close(self): + if not self.closed: + self.closed = 1 + raise IOError + +class CCloseFailureIO(CloseFailureIO, io.RawIOBase): + pass +class PyCloseFailureIO(CloseFailureIO, pyio.RawIOBase): + pass -class MockFileIO(io.BytesIO): + +class MockFileIO: def __init__(self, data): self.read_history = [] - io.BytesIO.__init__(self, data) + super().__init__(data) def read(self, n=None): - res = io.BytesIO.read(self, n) + res = super().read(n) self.read_history.append(None if res is None else len(res)) return res + def readinto(self, b): + res = super().readinto(b) + self.read_history.append(res) + return res + +class CMockFileIO(MockFileIO, io.BytesIO): + pass + +class PyMockFileIO(MockFileIO, pyio.BytesIO): + pass -class MockNonBlockWriterIO(io.RawIOBase): - def __init__(self, blocking_script): - self._blocking_script = list(blocking_script) +class MockNonBlockWriterIO: + + def __init__(self): self._write_stack = [] + self._blocker_char = None - def write(self, b): - self._write_stack.append(b[:]) - n = self._blocking_script.pop(0) - if (n < 0): - raise io.BlockingIOError(0, "test blocking", -n) - else: - return n + def pop_written(self): + s = b"".join(self._write_stack) + self._write_stack[:] = [] + return s + + def block_on(self, char): + """Block when a given char is encountered.""" + self._blocker_char = char + + def readable(self): + return True + + def seekable(self): + return True def writable(self): return True + def write(self, b): + b = bytes(b) + n = -1 + if self._blocker_char: + try: + n = b.index(self._blocker_char) + except ValueError: + pass + else: + self._blocker_char = None + self._write_stack.append(b[:n]) + raise self.BlockingIOError(0, "test blocking", n) + self._write_stack.append(b) + return len(b) + +class CMockNonBlockWriterIO(MockNonBlockWriterIO, io.RawIOBase): + BlockingIOError = io.BlockingIOError + +class PyMockNonBlockWriterIO(MockNonBlockWriterIO, pyio.RawIOBase): + BlockingIOError = pyio.BlockingIOError + class IOTest(unittest.TestCase): @@ -151,13 +290,13 @@ class IOTest(unittest.TestCase): self.assertEqual(f.read(2), b"x") def test_raw_file_io(self): - f = io.open(support.TESTFN, "wb", buffering=0) + f = self.open(support.TESTFN, "wb", buffering=0) self.assertEqual(f.readable(), False) self.assertEqual(f.writable(), True) self.assertEqual(f.seekable(), True) self.write_ops(f) f.close() - f = io.open(support.TESTFN, "rb", buffering=0) + f = self.open(support.TESTFN, "rb", buffering=0) self.assertEqual(f.readable(), True) self.assertEqual(f.writable(), False) self.assertEqual(f.seekable(), True) @@ -165,13 +304,13 @@ class IOTest(unittest.TestCase): f.close() def test_buffered_file_io(self): - f = io.open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") self.assertEqual(f.readable(), False) self.assertEqual(f.writable(), True) self.assertEqual(f.seekable(), True) self.write_ops(f) f.close() - f = io.open(support.TESTFN, "rb") + f = self.open(support.TESTFN, "rb") self.assertEqual(f.readable(), True) self.assertEqual(f.writable(), False) self.assertEqual(f.seekable(), True) @@ -180,22 +319,23 @@ class IOTest(unittest.TestCase): def test_readline(self): f = io.open(support.TESTFN, "wb") - f.write(b"abc\ndef\nxyzzy\nfoo") + f.write(b"abc\ndef\nxyzzy\nfoo\x00bar\nanother line") f.close() - f = io.open(support.TESTFN, "rb") + f = self.open(support.TESTFN, "rb") self.assertEqual(f.readline(), b"abc\n") self.assertEqual(f.readline(10), b"def\n") self.assertEqual(f.readline(2), b"xy") self.assertEqual(f.readline(4), b"zzy\n") - self.assertEqual(f.readline(), b"foo") + self.assertEqual(f.readline(), b"foo\x00bar\n") + self.assertEqual(f.readline(), b"another line") f.close() def test_raw_bytes_io(self): - f = io.BytesIO() + f = self.BytesIO() self.write_ops(f) data = f.getvalue() self.assertEqual(data, b"hello world\n") - f = io.BytesIO(data) + f = self.BytesIO(data) self.read_ops(f, True) def test_large_file_ops(self): @@ -211,12 +351,10 @@ class IOTest(unittest.TestCase): print("Use 'regrtest.py -u largefile test_io' to run it.", file=sys.stderr) return - f = io.open(support.TESTFN, "w+b", 0) - self.large_file_ops(f) - f.close() - f = io.open(support.TESTFN, "w+b") - self.large_file_ops(f) - f.close() + with self.open(support.TESTFN, "w+b", 0) as f: + self.large_file_ops(f) + with self.open(support.TESTFN, "w+b") as f: + self.large_file_ops(f) def test_with_open(self): for bufsize in (0, 1, 100): @@ -235,59 +373,107 @@ class IOTest(unittest.TestCase): # issue 5008 def test_append_mode_tell(self): - with io.open(support.TESTFN, "wb") as f: + with self.open(support.TESTFN, "wb") as f: f.write(b"xxx") - with io.open(support.TESTFN, "ab", buffering=0) as f: + with self.open(support.TESTFN, "ab", buffering=0) as f: self.assertEqual(f.tell(), 3) - with io.open(support.TESTFN, "ab") as f: + with self.open(support.TESTFN, "ab") as f: self.assertEqual(f.tell(), 3) - with io.open(support.TESTFN, "a") as f: + with self.open(support.TESTFN, "a") as f: self.assert_(f.tell() > 0) def test_destructor(self): record = [] - class MyFileIO(io.FileIO): + class MyFileIO(self.FileIO): def __del__(self): record.append(1) - io.FileIO.__del__(self) + try: + f = super().__del__ + except AttributeError: + pass + else: + f() def close(self): record.append(2) - io.FileIO.close(self) + super().close() def flush(self): record.append(3) - io.FileIO.flush(self) - f = MyFileIO(support.TESTFN, "w") - f.write("xxx") + super().flush() + f = MyFileIO(support.TESTFN, "wb") + f.write(b"xxx") + del f + self.assertEqual(record, [1, 2, 3]) + f = open(support.TESTFN, "rb") + self.assertEqual(f.read(), b"xxx") + + def _check_base_destructor(self, base): + record = [] + class MyIO(base): + def __init__(self): + # This exercises the availability of attributes on object + # destruction. + # (in the C version, close() is called by the tp_dealloc + # function, not by __del__) + self.on_del = 1 + self.on_close = 2 + self.on_flush = 3 + def __del__(self): + record.append(self.on_del) + try: + f = super().__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(self.on_close) + super().close() + def flush(self): + record.append(self.on_flush) + super().flush() + f = MyIO() del f self.assertEqual(record, [1, 2, 3]) + def test_IOBase_destructor(self): + self._check_base_destructor(self.IOBase) + + def test_RawIOBase_destructor(self): + self._check_base_destructor(self.RawIOBase) + + def test_BufferedIOBase_destructor(self): + self._check_base_destructor(self.BufferedIOBase) + + def test_TextIOBase_destructor(self): + self._check_base_destructor(self.TextIOBase) + def test_close_flushes(self): - f = io.open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") f.write(b"xxx") f.close() - f = io.open(support.TESTFN, "rb") + f = self.open(support.TESTFN, "rb") self.assertEqual(f.read(), b"xxx") f.close() def test_array_writes(self): a = array.array('i', range(10)) n = len(a.tostring()) - f = io.open(support.TESTFN, "wb", 0) + f = self.open(support.TESTFN, "wb", 0) self.assertEqual(f.write(a), n) f.close() - f = io.open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") self.assertEqual(f.write(a), n) f.close() def test_closefd(self): - self.assertRaises(ValueError, io.open, support.TESTFN, 'w', + self.assertRaises(ValueError, self.open, support.TESTFN, 'w', closefd=False) - def testReadClosed(self): - with io.open(support.TESTFN, "w") as f: + def test_read_closed(self): + with self.open(support.TESTFN, "w") as f: f.write("egg\n") - with io.open(support.TESTFN, "r") as f: - file = io.open(f.fileno(), "r", closefd=False) + with self.open(support.TESTFN, "r") as f: + file = self.open(f.fileno(), "r", closefd=False) self.assertEqual(file.read(), "egg\n") file.seek(0) file.close() @@ -295,85 +481,168 @@ class IOTest(unittest.TestCase): def test_no_closefd_with_filename(self): # can't use closefd in combination with a file name - self.assertRaises(ValueError, io.open, support.TESTFN, "r", closefd=False) + self.assertRaises(ValueError, self.open, support.TESTFN, "r", closefd=False) def test_closefd_attr(self): - with io.open(support.TESTFN, "wb") as f: + with self.open(support.TESTFN, "wb") as f: f.write(b"egg\n") - with io.open(support.TESTFN, "r") as f: + with self.open(support.TESTFN, "r") as f: self.assertEqual(f.buffer.raw.closefd, True) - file = io.open(f.fileno(), "r", closefd=False) + file = self.open(f.fileno(), "r", closefd=False) self.assertEqual(file.buffer.raw.closefd, False) + def test_garbage_collection(self): + # FileIO objects are collected, and collecting them flushes + # all data to disk. + f = self.FileIO(support.TESTFN, "wb") + f.write(b"abcxxx") + f.f = f + wr = weakref.ref(f) + del f + gc.collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"abcxxx") -class MemorySeekTestMixin: - - def testInit(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) - - def testRead(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) - - self.assertEquals(buf[:1], bytesIo.read(1)) - self.assertEquals(buf[1:5], bytesIo.read(4)) - self.assertEquals(buf[5:], bytesIo.read(900)) - self.assertEquals(self.EOF, bytesIo.read()) - - def testReadNoArgs(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) - - self.assertEquals(buf, bytesIo.read()) - self.assertEquals(self.EOF, bytesIo.read()) - - def testSeek(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) - - bytesIo.read(5) - bytesIo.seek(0) - self.assertEquals(buf, bytesIo.read()) - - bytesIo.seek(3) - self.assertEquals(buf[3:], bytesIo.read()) - self.assertRaises(TypeError, bytesIo.seek, 0.0) - - def testTell(self): - buf = self.buftype("1234567890") - bytesIo = self.ioclass(buf) - - self.assertEquals(0, bytesIo.tell()) - bytesIo.seek(5) - self.assertEquals(5, bytesIo.tell()) - bytesIo.seek(10000) - self.assertEquals(10000, bytesIo.tell()) +class CIOTest(IOTest): + pass +class PyIOTest(IOTest): + pass -class BytesIOTest(MemorySeekTestMixin, unittest.TestCase): - @staticmethod - def buftype(s): - return s.encode("utf-8") - ioclass = io.BytesIO - EOF = b"" +class CommonBufferedTests: + # Tests common to BufferedReader, BufferedWriter and BufferedRandom -class StringIOTest(MemorySeekTestMixin, unittest.TestCase): - buftype = str - ioclass = io.StringIO - EOF = "" + def test_fileno(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertEquals(42, bufio.fileno()) -class BufferedReaderTest(unittest.TestCase): + def test_no_fileno(self): + # XXX will we always have fileno() function? If so, kill + # this test. Else, write it. + pass - def testRead(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_invalid_args(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + # Invalid whence + self.assertRaises(ValueError, bufio.seek, 0, -1) + self.assertRaises(ValueError, bufio.seek, 0, 3) + def test_override_destructor(self): + tp = self.tp + record = [] + class MyBufferedIO(tp): + def __del__(self): + record.append(1) + try: + f = super().__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(2) + super().close() + def flush(self): + record.append(3) + super().flush() + rawio = self.MockRawIO() + bufio = MyBufferedIO(rawio) + writable = bufio.writable() + del bufio + if writable: + self.assertEqual(record, [1, 2, 3]) + else: + self.assertEqual(record, [1, 2]) + + def test_context_manager(self): + # Test usability as a context manager + rawio = self.MockRawIO() + bufio = self.tp(rawio) + def _with(): + with bufio: + pass + _with() + # bufio should now be closed, and using it a second time should raise + # a ValueError. + self.assertRaises(ValueError, _with) + + def test_error_through_destructor(self): + # Test that the exception state is not modified by a destructor, + # even if close() fails. + rawio = self.CloseFailureIO() + def f(): + self.tp(rawio).xyzzy + with support.captured_output("stderr") as s: + self.assertRaises(AttributeError, f) + s = s.getvalue().strip() + if s: + # The destructor *may* have printed an unraisable error, check it + self.assertEqual(len(s.splitlines()), 1) + self.assert_(s.startswith("Exception IOError: "), s) + self.assert_(s.endswith(" ignored"), s) + + +class BufferedReaderTest(unittest.TestCase, CommonBufferedTests): + read_mode = "rb" + + def test_constructor(self): + rawio = self.MockRawIO([b"abc"]) + bufio = self.tp(rawio) + bufio.__init__(rawio) + bufio.__init__(rawio, buffer_size=1024) + bufio.__init__(rawio, buffer_size=16) + self.assertEquals(b"abc", bufio.read()) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + rawio = self.MockRawIO([b"abc"]) + bufio.__init__(rawio) + self.assertEquals(b"abc", bufio.read()) + + def test_read(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) self.assertEquals(b"abcdef", bufio.read(6)) - - def testBuffering(self): + # Invalid args + self.assertRaises(ValueError, bufio.read, -2) + + def test_read1(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + self.assertEquals(b"a", bufio.read(1)) + self.assertEquals(b"b", bufio.read1(1)) + self.assertEquals(rawio._reads, 1) + self.assertEquals(b"c", bufio.read1(100)) + self.assertEquals(rawio._reads, 1) + self.assertEquals(b"d", bufio.read1(100)) + self.assertEquals(rawio._reads, 2) + self.assertEquals(b"efg", bufio.read1(100)) + self.assertEquals(rawio._reads, 3) + self.assertEquals(b"", bufio.read1(100)) + # Invalid args + self.assertRaises(ValueError, bufio.read1, -1) + + def test_readinto(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + b = bytearray(2) + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"ab") + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"cd") + self.assertEquals(bufio.readinto(b), 2) + self.assertEquals(b, b"ef") + self.assertEquals(bufio.readinto(b), 1) + self.assertEquals(b, b"gf") + self.assertEquals(bufio.readinto(b), 0) + self.assertEquals(b, b"gf") + + def test_buffering(self): data = b"abcdefghi" dlen = len(data) @@ -384,49 +653,40 @@ class BufferedReaderTest(unittest.TestCase): ] for bufsize, buf_read_sizes, raw_read_sizes in tests: - rawio = MockFileIO(data) - bufio = io.BufferedReader(rawio, buffer_size=bufsize) + rawio = self.MockFileIO(data) + bufio = self.tp(rawio, buffer_size=bufsize) pos = 0 for nbytes in buf_read_sizes: self.assertEquals(bufio.read(nbytes), data[pos:pos+nbytes]) pos += nbytes + # this is mildly implementation-dependent self.assertEquals(rawio.read_history, raw_read_sizes) - def testReadNonBlocking(self): + def test_read_non_blocking(self): # Inject some None's in there to simulate EWOULDBLOCK - rawio = MockRawIO((b"abc", b"d", None, b"efg", None, None)) - bufio = io.BufferedReader(rawio) + rawio = self.MockRawIO((b"abc", b"d", None, b"efg", None, None, None)) + bufio = self.tp(rawio) self.assertEquals(b"abcd", bufio.read(6)) self.assertEquals(b"e", bufio.read(1)) self.assertEquals(b"fg", bufio.read()) + self.assertEquals(b"", bufio.peek(1)) self.assert_(None is bufio.read()) self.assertEquals(b"", bufio.read()) - def testReadToEof(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_read_past_eof(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) self.assertEquals(b"abcdefg", bufio.read(9000)) - def testReadNoArgs(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) + def test_read_all(self): + rawio = self.MockRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) self.assertEquals(b"abcdefg", bufio.read()) - def testFileno(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedReader(rawio) - - self.assertEquals(42, bufio.fileno()) - - def testFilenoNoFileno(self): - # XXX will we always have fileno() function? If so, kill - # this test. Else, write it. - pass - - def testThreads(self): + def test_threads(self): try: # Write out many bytes with exactly the same number of 0's, # 1's... 255's. This will help us check that concurrent reading @@ -437,8 +697,8 @@ class BufferedReaderTest(unittest.TestCase): s = bytes(bytearray(l)) with io.open(support.TESTFN, "wb") as f: f.write(s) - with io.open(support.TESTFN, "rb", buffering=0) as raw: - bufio = io.BufferedReader(raw, 8) + with io.open(support.TESTFN, self.read_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) errors = [] results = [] def f(): @@ -468,80 +728,230 @@ class BufferedReaderTest(unittest.TestCase): finally: support.unlink(support.TESTFN) + def test_misbehaved_io(self): + rawio = self.MisbehavedRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + self.assertRaises(IOError, bufio.seek, 0) + self.assertRaises(IOError, bufio.tell) + +class CBufferedReaderTest(BufferedReaderTest): + tp = io.BufferedReader + + def test_constructor(self): + BufferedReaderTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) + + def test_initialization(self): + rawio = self.MockRawIO([b"abc"]) + bufio = self.tp(rawio) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.read) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.read) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + self.assertRaises(ValueError, bufio.read) + + def test_misbehaved_io_read(self): + rawio = self.MisbehavedRawIO((b"abc", b"d", b"efg")) + bufio = self.tp(rawio) + # _pyio.BufferedReader seems to implement reading different, so that + # checking this is not so easy. + self.assertRaises(IOError, bufio.read, 10) + + def test_garbage_collection(self): + # C BufferedReader objects are collected. + # The Python version has __del__, so it ends into gc.garbage instead + rawio = self.FileIO(support.TESTFN, "w+b") + f = self.tp(rawio) + f.f = f + wr = weakref.ref(f) + del f + gc.collect() + self.assert_(wr() is None, wr) +class PyBufferedReaderTest(BufferedReaderTest): + tp = pyio.BufferedReader -class BufferedWriterTest(unittest.TestCase): - - def testWrite(self): - # Write to the buffered IO but don't overflow the buffer. - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) - - bufio.write(b"abc") - self.assertFalse(writer._write_stack) +class BufferedWriterTest(unittest.TestCase, CommonBufferedTests): + write_mode = "wb" - def testWriteOverflow(self): - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) + def test_constructor(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + bufio.__init__(rawio) + bufio.__init__(rawio, buffer_size=1024) + bufio.__init__(rawio, buffer_size=16) + self.assertEquals(3, bufio.write(b"abc")) + bufio.flush() + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + bufio.__init__(rawio) + self.assertEquals(3, bufio.write(b"ghi")) + bufio.flush() + self.assertEquals(b"".join(rawio._write_stack), b"abcghi") + def test_write(self): + # Write to the buffered IO but don't overflow the buffer. + writer = self.MockRawIO() + bufio = self.tp(writer, 8) bufio.write(b"abc") - bufio.write(b"defghijkl") - - self.assertEquals(b"abcdefghijkl", writer._write_stack[0]) - - def testWriteNonBlocking(self): - raw = MockNonBlockWriterIO((9, 2, 22, -6, 10, 12, 12)) - bufio = io.BufferedWriter(raw, 8, 16) - - bufio.write(b"asdf") - bufio.write(b"asdfa") - self.assertEquals(b"asdfasdfa", raw._write_stack[0]) - - bufio.write(b"asdfasdfasdf") - self.assertEquals(b"asdfasdfasdf", raw._write_stack[1]) - bufio.write(b"asdfasdfasdf") - self.assertEquals(b"dfasdfasdf", raw._write_stack[2]) - self.assertEquals(b"asdfasdfasdf", raw._write_stack[3]) - - bufio.write(b"asdfasdfasdf") - - # XXX I don't like this test. It relies too heavily on how the - # algorithm actually works, which we might change. Refactor - # later. - - def testFileno(self): - rawio = MockRawIO((b"abc", b"d", b"efg")) - bufio = io.BufferedWriter(rawio) - - self.assertEquals(42, bufio.fileno()) + self.assertFalse(writer._write_stack) - def testFlush(self): - writer = MockRawIO() - bufio = io.BufferedWriter(writer, 8) + def test_write_overflow(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) + contents = b"abcdefghijklmnop" + for n in range(0, len(contents), 3): + bufio.write(contents[n:n+3]) + flushed = b"".join(writer._write_stack) + # At least (total - 8) bytes were implicitly flushed, perhaps more + # depending on the implementation. + self.assert_(flushed.startswith(contents[:-8]), flushed) + + def check_writes(self, intermediate_func): + # Lots of writes, test the flushed output is as expected. + contents = bytes(range(256)) * 1000 + n = 0 + writer = self.MockRawIO() + bufio = self.tp(writer, 13) + # Generator of write sizes: repeat each N 15 times then proceed to N+1 + def gen_sizes(): + for size in count(1): + for i in range(15): + yield size + sizes = gen_sizes() + while n < len(contents): + size = min(next(sizes), len(contents) - n) + self.assertEquals(bufio.write(contents[n:n+size]), size) + intermediate_func(bufio) + n += size + bufio.flush() + self.assertEquals(contents, b"".join(writer._write_stack)) + + def test_writes(self): + self.check_writes(lambda bufio: None) + + def test_writes_and_flushes(self): + self.check_writes(lambda bufio: bufio.flush()) + + def test_writes_and_seeks(self): + def _seekabs(bufio): + pos = bufio.tell() + bufio.seek(pos + 1, 0) + bufio.seek(pos - 1, 0) + bufio.seek(pos, 0) + self.check_writes(_seekabs) + def _seekrel(bufio): + pos = bufio.seek(0, 1) + bufio.seek(+1, 1) + bufio.seek(-1, 1) + bufio.seek(pos, 0) + self.check_writes(_seekrel) + + def test_writes_and_truncates(self): + self.check_writes(lambda bufio: bufio.truncate(bufio.tell())) + + def test_write_non_blocking(self): + raw = self.MockNonBlockWriterIO() + bufio = self.tp(raw, 8, 8) + + self.assertEquals(bufio.write(b"abcd"), 4) + self.assertEquals(bufio.write(b"efghi"), 5) + # 1 byte will be written, the rest will be buffered + raw.block_on(b"k") + self.assertEquals(bufio.write(b"jklmn"), 5) + + # 8 bytes will be written, 8 will be buffered and the rest will be lost + raw.block_on(b"0") + try: + bufio.write(b"opqrwxyz0123456789") + except self.BlockingIOError as e: + written = e.characters_written + else: + self.fail("BlockingIOError should have been raised") + self.assertEquals(written, 16) + self.assertEquals(raw.pop_written(), + b"abcdefghijklmnopqrwxyz") + + self.assertEquals(bufio.write(b"ABCDEFGHI"), 9) + s = raw.pop_written() + # Previously buffered bytes were flushed + self.assertTrue(s.startswith(b"01234567A"), s) + + def test_write_and_rewind(self): + raw = io.BytesIO() + bufio = self.tp(raw, 4) + self.assertEqual(bufio.write(b"abcdef"), 6) + self.assertEqual(bufio.tell(), 6) + bufio.seek(0, 0) + self.assertEqual(bufio.write(b"XY"), 2) + bufio.seek(6, 0) + self.assertEqual(raw.getvalue(), b"XYcdef") + self.assertEqual(bufio.write(b"123456"), 6) + bufio.flush() + self.assertEqual(raw.getvalue(), b"XYcdef123456") + def test_flush(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) bufio.write(b"abc") bufio.flush() + self.assertEquals(b"abc", writer._write_stack[0]) + def test_destructor(self): + writer = self.MockRawIO() + bufio = self.tp(writer, 8) + bufio.write(b"abc") + del bufio self.assertEquals(b"abc", writer._write_stack[0]) - def testThreads(self): - # BufferedWriter should not raise exceptions or crash - # when called from multiple threads. + def test_truncate(self): + # Truncate implicitly flushes the buffer. + with io.open(support.TESTFN, self.write_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) + bufio.write(b"abcdef") + self.assertEqual(bufio.truncate(3), 3) + self.assertEqual(bufio.tell(), 3) + with io.open(support.TESTFN, "rb", buffering=0) as f: + self.assertEqual(f.read(), b"abc") + + def test_threads(self): try: + # Write out many bytes from many threads and test they were + # all flushed. + N = 1000 + contents = bytes(range(256)) * N + sizes = cycle([1, 19]) + n = 0 + queue = deque() + while n < len(contents): + size = next(sizes) + queue.append(contents[n:n+size]) + n += size + del contents # We use a real file object because it allows us to # exercise situations where the GIL is released before # writing the buffer to the raw streams. This is in addition # to concurrency issues due to switching threads in the middle # of Python code. - with io.open(support.TESTFN, "wb", buffering=0) as raw: - bufio = io.BufferedWriter(raw, 8) + with io.open(support.TESTFN, self.write_mode, buffering=0) as raw: + bufio = self.tp(raw, 8) errors = [] def f(): try: - # Write enough bytes to flush the buffer - s = b"a" * 19 - for i in range(50): + while True: + try: + s = queue.popleft() + except IndexError: + return bufio.write(s) except Exception as e: errors.append(e) @@ -554,37 +964,102 @@ class BufferedWriterTest(unittest.TestCase): t.join() self.assertFalse(errors, "the following exceptions were caught: %r" % errors) + bufio.close() + with io.open(support.TESTFN, "rb") as f: + s = f.read() + for i in range(256): + self.assertEquals(s.count(bytes([i])), N) finally: support.unlink(support.TESTFN) + def test_misbehaved_io(self): + rawio = self.MisbehavedRawIO() + bufio = self.tp(rawio, 5) + self.assertRaises(IOError, bufio.seek, 0) + self.assertRaises(IOError, bufio.tell) + self.assertRaises(IOError, bufio.write, b"abcdef") + +class CBufferedWriterTest(BufferedWriterTest): + tp = io.BufferedWriter + + def test_constructor(self): + BufferedWriterTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) + + def test_initialization(self): + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=0) + self.assertRaises(ValueError, bufio.write, b"def") + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-16) + self.assertRaises(ValueError, bufio.write, b"def") + self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) + self.assertRaises(ValueError, bufio.write, b"def") + + def test_garbage_collection(self): + # C BufferedWriter objects are collected, and collecting them flushes + # all data to disk. + # The Python version has __del__, so it ends into gc.garbage instead + rawio = self.FileIO(support.TESTFN, "w+b") + f = self.tp(rawio) + f.write(b"123xxx") + f.x = f + wr = weakref.ref(f) + del f + gc.collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"123xxx") + + +class PyBufferedWriterTest(BufferedWriterTest): + tp = pyio.BufferedWriter class BufferedRWPairTest(unittest.TestCase): - def testRWPair(self): - r = MockRawIO(()) - w = MockRawIO() - pair = io.BufferedRWPair(r, w) + def test_basic(self): + r = self.MockRawIO(()) + w = self.MockRawIO() + pair = self.tp(r, w) self.assertFalse(pair.closed) # XXX More Tests +class CBufferedRWPairTest(BufferedRWPairTest): + tp = io.BufferedRWPair -class BufferedRandomTest(unittest.TestCase): +class PyBufferedRWPairTest(BufferedRWPairTest): + tp = pyio.BufferedRWPair - def testReadAndWrite(self): - raw = MockRawIO((b"asdf", b"ghjk")) - rw = io.BufferedRandom(raw, 8, 12) + +class BufferedRandomTest(BufferedReaderTest, BufferedWriterTest): + read_mode = "rb+" + write_mode = "wb+" + + def test_constructor(self): + BufferedReaderTest.test_constructor(self) + BufferedWriterTest.test_constructor(self) + + def test_read_and_write(self): + raw = self.MockRawIO((b"asdf", b"ghjk")) + rw = self.tp(raw, 8, 12) self.assertEqual(b"as", rw.read(2)) rw.write(b"ddd") rw.write(b"eee") self.assertFalse(raw._write_stack) # Buffer writes - self.assertEqual(b"ghjk", rw.read()) # This read forces write flush + self.assertEqual(b"ghjk", rw.read()) self.assertEquals(b"dddeee", raw._write_stack[0]) - def testSeekAndTell(self): - raw = io.BytesIO(b"asdfghjkl") - rw = io.BufferedRandom(raw) + def test_seek_and_tell(self): + raw = self.BytesIO(b"asdfghjkl") + rw = self.tp(raw) self.assertEquals(b"as", rw.read(2)) self.assertEquals(2, rw.tell()) @@ -602,6 +1077,115 @@ class BufferedRandomTest(unittest.TestCase): self.assertEquals(b"fl", rw.read(11)) self.assertRaises(TypeError, rw.seek, 0.0) + def check_flush_and_read(self, read_func): + raw = self.BytesIO(b"abcdefghi") + bufio = self.tp(raw) + + self.assertEquals(b"ab", read_func(bufio, 2)) + bufio.write(b"12") + self.assertEquals(b"ef", read_func(bufio, 2)) + self.assertEquals(6, bufio.tell()) + bufio.flush() + self.assertEquals(6, bufio.tell()) + self.assertEquals(b"ghi", read_func(bufio)) + raw.seek(0, 0) + raw.write(b"XYZ") + # flush() resets the read buffer + bufio.flush() + bufio.seek(0, 0) + self.assertEquals(b"XYZ", read_func(bufio, 3)) + + def test_flush_and_read(self): + self.check_flush_and_read(lambda bufio, *args: bufio.read(*args)) + + def test_flush_and_readinto(self): + def _readinto(bufio, n=-1): + b = bytearray(n if n >= 0 else 9999) + n = bufio.readinto(b) + return bytes(b[:n]) + self.check_flush_and_read(_readinto) + + def test_flush_and_peek(self): + def _peek(bufio, n=-1): + # This relies on the fact that the buffer can contain the whole + # raw stream, otherwise peek() can return less. + b = bufio.peek(n) + if n != -1: + b = b[:n] + bufio.seek(len(b), 1) + return b + self.check_flush_and_read(_peek) + + def test_flush_and_write(self): + raw = self.BytesIO(b"abcdefghi") + bufio = self.tp(raw) + + bufio.write(b"123") + bufio.flush() + bufio.write(b"45") + bufio.flush() + bufio.seek(0, 0) + self.assertEquals(b"12345fghi", raw.getvalue()) + self.assertEquals(b"12345fghi", bufio.read()) + + def test_threads(self): + BufferedReaderTest.test_threads(self) + BufferedWriterTest.test_threads(self) + + def test_writes_and_peek(self): + def _peek(bufio): + bufio.peek(1) + self.check_writes(_peek) + def _peek(bufio): + pos = bufio.tell() + bufio.seek(-1, 1) + bufio.peek(1) + bufio.seek(pos, 0) + self.check_writes(_peek) + + def test_writes_and_reads(self): + def _read(bufio): + bufio.seek(-1, 1) + bufio.read(1) + self.check_writes(_read) + + def test_writes_and_read1s(self): + def _read1(bufio): + bufio.seek(-1, 1) + bufio.read1(1) + self.check_writes(_read1) + + def test_writes_and_readintos(self): + def _read(bufio): + bufio.seek(-1, 1) + bufio.readinto(bytearray(1)) + self.check_writes(_read) + + def test_misbehaved_io(self): + BufferedReaderTest.test_misbehaved_io(self) + BufferedWriterTest.test_misbehaved_io(self) + +class CBufferedRandomTest(BufferedRandomTest): + tp = io.BufferedRandom + + def test_constructor(self): + BufferedRandomTest.test_constructor(self) + # The allocation can succeed on 32-bit builds, e.g. with more + # than 2GB RAM and a 64-bit kernel. + if sys.maxsize > 0x7FFFFFFF: + rawio = self.MockRawIO() + bufio = self.tp(rawio) + self.assertRaises((OverflowError, MemoryError, ValueError), + bufio.__init__, rawio, sys.maxsize) + + def test_garbage_collection(self): + CBufferedReaderTest.test_garbage_collection(self) + CBufferedWriterTest.test_garbage_collection(self) + +class PyBufferedRandomTest(BufferedRandomTest): + tp = pyio.BufferedRandom + + # To fully exercise seek/tell, the StatefulIncrementalDecoder has these # properties: # - A single output character can correspond to many bytes of input. @@ -735,7 +1319,7 @@ class StatefulIncrementalDecoderTest(unittest.TestCase): 'm--------------.') ] - def testDecoder(self): + def test_decoder(self): # Try a few one-shot test cases. for input, eof, output in self.test_cases: d = StatefulIncrementalDecoder() @@ -756,10 +1340,24 @@ class TextIOWrapperTest(unittest.TestCase): def tearDown(self): support.unlink(support.TESTFN) - def testLineBuffering(self): - r = io.BytesIO() - b = io.BufferedWriter(r, 1000) - t = io.TextIOWrapper(b, newline="\n", line_buffering=True) + def test_constructor(self): + r = self.BytesIO(b"\xc3\xa9\n\n") + b = self.BufferedReader(r, 1000) + t = self.TextIOWrapper(b) + t.__init__(b, encoding="latin1", newline="\r\n") + self.assertEquals(t.encoding, "latin1") + self.assertEquals(t.line_buffering, False) + t.__init__(b, encoding="utf8", line_buffering=True) + self.assertEquals(t.encoding, "utf8") + self.assertEquals(t.line_buffering, True) + self.assertEquals("\xe9\n", t.readline()) + self.assertRaises(TypeError, t.__init__, b, newline=42) + self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') + + def test_line_buffering(self): + r = self.BytesIO() + b = self.BufferedWriter(r, 1000) + t = self.TextIOWrapper(b, newline="\n", line_buffering=True) t.write("X") self.assertEquals(r.getvalue(), b"") # No flush happened t.write("Y\nZ") @@ -767,83 +1365,58 @@ class TextIOWrapperTest(unittest.TestCase): t.write("A\rB") self.assertEquals(r.getvalue(), b"XY\nZA\rB") - def testEncodingErrorsReading(self): + def test_encoding(self): + # Check the encoding attribute is always set, and valid + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="utf8") + self.assertEqual(t.encoding, "utf8") + t = self.TextIOWrapper(b) + self.assert_(t.encoding is not None) + codecs.lookup(t.encoding) + + def test_encoding_errors_reading(self): # (1) default - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii") self.assertRaises(UnicodeError, t.read) # (2) explicit strict - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="strict") self.assertRaises(UnicodeError, t.read) # (3) ignore - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="ignore") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="ignore") self.assertEquals(t.read(), "abc\n\n") # (4) replace - b = io.BytesIO(b"abc\n\xff\n") - t = io.TextIOWrapper(b, encoding="ascii", errors="replace") + b = self.BytesIO(b"abc\n\xff\n") + t = self.TextIOWrapper(b, encoding="ascii", errors="replace") self.assertEquals(t.read(), "abc\n\ufffd\n") - def testEncodingErrorsWriting(self): + def test_encoding_errors_writing(self): # (1) default - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii") + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii") self.assertRaises(UnicodeError, t.write, "\xff") # (2) explicit strict - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="strict") + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="strict") self.assertRaises(UnicodeError, t.write, "\xff") # (3) ignore - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="ignore", + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="ignore", newline="\n") t.write("abc\xffdef\n") t.flush() self.assertEquals(b.getvalue(), b"abcdef\n") # (4) replace - b = io.BytesIO() - t = io.TextIOWrapper(b, encoding="ascii", errors="replace", + b = self.BytesIO() + t = self.TextIOWrapper(b, encoding="ascii", errors="replace", newline="\n") t.write("abc\xffdef\n") t.flush() self.assertEquals(b.getvalue(), b"abc?def\n") - def testNewlinesInput(self): - testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" - normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") - for newline, expected in [ - (None, normalized.decode("ascii").splitlines(True)), - ("", testdata.decode("ascii").splitlines(True)), - ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), - ]: - buf = io.BytesIO(testdata) - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - self.assertEquals(txt.readlines(), expected) - txt.seek(0) - self.assertEquals(txt.read(), "".join(expected)) - - def testNewlinesOutput(self): - testdict = { - "": b"AAA\nBBB\nCCC\nX\rY\r\nZ", - "\n": b"AAA\nBBB\nCCC\nX\rY\r\nZ", - "\r": b"AAA\rBBB\rCCC\rX\rY\r\rZ", - "\r\n": b"AAA\r\nBBB\r\nCCC\r\nX\rY\r\r\nZ", - } - tests = [(None, testdict[os.linesep])] + sorted(testdict.items()) - for newline, expected in tests: - buf = io.BytesIO() - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - txt.write("AAA\nB") - txt.write("BB\nCCC\n") - txt.write("X\rY\r\nZ") - txt.flush() - self.assertEquals(buf.closed, False) - self.assertEquals(buf.getvalue(), expected) - - def testNewlines(self): + def test_newlines(self): input_lines = [ "unix\n", "windows\r\n", "os9\r", "last\n", "nonl" ] tests = [ @@ -867,8 +1440,8 @@ class TextIOWrapperTest(unittest.TestCase): for do_reads in (False, True): for bufsize in range(1, 10): for newline, exp_lines in tests: - bufio = io.BufferedReader(io.BytesIO(data), bufsize) - textio = io.TextIOWrapper(bufio, newline=newline, + bufio = self.BufferedReader(self.BytesIO(data), bufsize) + textio = self.TextIOWrapper(bufio, newline=newline, encoding=encoding) if do_reads: got_lines = [] @@ -885,60 +1458,100 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(got_line, exp_line) self.assertEquals(len(got_lines), len(exp_lines)) - def testNewlinesInput(self): - testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" + def test_newlines_input(self): + testdata = b"AAA\nBB\x00B\nCCC\rDDD\rEEE\r\nFFF\r\nGGG" normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n") for newline, expected in [ (None, normalized.decode("ascii").splitlines(True)), ("", testdata.decode("ascii").splitlines(True)), - ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), - ("\r", ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), + ("\n", ["AAA\n", "BB\x00B\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), + ("\r\n", ["AAA\nBB\x00B\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]), + ("\r", ["AAA\nBB\x00B\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]), ]: - buf = io.BytesIO(testdata) - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) + buf = self.BytesIO(testdata) + txt = self.TextIOWrapper(buf, encoding="ascii", newline=newline) self.assertEquals(txt.readlines(), expected) txt.seek(0) self.assertEquals(txt.read(), "".join(expected)) - def testNewlinesOutput(self): - data = "AAA\nBBB\rCCC\n" - data_lf = b"AAA\nBBB\rCCC\n" - data_cr = b"AAA\rBBB\rCCC\r" - data_crlf = b"AAA\r\nBBB\rCCC\r\n" - save_linesep = os.linesep - try: - for os.linesep, newline, expected in [ - ("\n", None, data_lf), - ("\r\n", None, data_crlf), - ("\n", "", data_lf), - ("\r\n", "", data_lf), - ("\n", "\n", data_lf), - ("\r\n", "\n", data_lf), - ("\n", "\r", data_cr), - ("\r\n", "\r", data_cr), - ("\n", "\r\n", data_crlf), - ("\r\n", "\r\n", data_crlf), - ]: - buf = io.BytesIO() - txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline) - txt.write(data) - txt.close() - self.assertEquals(buf.closed, True) - self.assertRaises(ValueError, buf.getvalue) - finally: - os.linesep = save_linesep + def test_newlines_output(self): + testdict = { + "": b"AAA\nBBB\nCCC\nX\rY\r\nZ", + "\n": b"AAA\nBBB\nCCC\nX\rY\r\nZ", + "\r": b"AAA\rBBB\rCCC\rX\rY\r\rZ", + "\r\n": b"AAA\r\nBBB\r\nCCC\r\nX\rY\r\r\nZ", + } + tests = [(None, testdict[os.linesep])] + sorted(testdict.items()) + for newline, expected in tests: + buf = self.BytesIO() + txt = self.TextIOWrapper(buf, encoding="ascii", newline=newline) + txt.write("AAA\nB") + txt.write("BB\nCCC\n") + txt.write("X\rY\r\nZ") + txt.flush() + self.assertEquals(buf.closed, False) + self.assertEquals(buf.getvalue(), expected) + + def test_destructor(self): + l = [] + base = self.BytesIO + class MyBytesIO(base): + def close(self): + l.append(self.getvalue()) + base.close(self) + b = MyBytesIO() + t = self.TextIOWrapper(b, encoding="ascii") + t.write("abc") + del t + self.assertEquals([b"abc"], l) + + def test_override_destructor(self): + record = [] + class MyTextIO(self.TextIOWrapper): + def __del__(self): + record.append(1) + try: + f = super().__del__ + except AttributeError: + pass + else: + f() + def close(self): + record.append(2) + super().close() + def flush(self): + record.append(3) + super().flush() + b = self.BytesIO() + t = MyTextIO(b, encoding="ascii") + del t + self.assertEqual(record, [1, 2, 3]) + + def test_error_through_destructor(self): + # Test that the exception state is not modified by a destructor, + # even if close() fails. + rawio = self.CloseFailureIO() + def f(): + self.TextIOWrapper(rawio).xyzzy + with support.captured_output("stderr") as s: + self.assertRaises(AttributeError, f) + s = s.getvalue().strip() + if s: + # The destructor *may* have printed an unraisable error, check it + self.assertEqual(len(s.splitlines()), 1) + self.assert_(s.startswith("Exception IOError: "), s) + self.assert_(s.endswith(" ignored"), s) # Systematic tests of the text I/O API - def testBasicIO(self): + def test_basic_io(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le": - f = io.open(support.TESTFN, "w+", encoding=enc) + f = self.open(support.TESTFN, "w+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEquals(f.write("abc"), 3) f.close() - f = io.open(support.TESTFN, "r+", encoding=enc) + f = self.open(support.TESTFN, "r+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEquals(f.tell(), 0) self.assertEquals(f.read(), "abc") @@ -980,8 +1593,8 @@ class TextIOWrapperTest(unittest.TestCase): rlines.append((pos, line)) self.assertEquals(rlines, wlines) - def testTelling(self): - f = io.open(support.TESTFN, "w+", encoding="utf8") + def test_telling(self): + f = self.open(support.TESTFN, "w+", encoding="utf8") p0 = f.tell() f.write("\xff\n") p1 = f.tell() @@ -1000,8 +1613,8 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(f.tell(), p2) f.close() - def testSeeking(self): - chunk_size = io.TextIOWrapper._CHUNK_SIZE + def test_seeking(self): + chunk_size = _default_chunk_size() prefix_size = chunk_size - 2 u_prefix = "a" * prefix_size prefix = bytes(u_prefix.encode("utf-8")) @@ -1009,48 +1622,46 @@ class TextIOWrapperTest(unittest.TestCase): u_suffix = "\u8888\n" suffix = bytes(u_suffix.encode("utf-8")) line = prefix + suffix - f = io.open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") f.write(line*2) f.close() - f = io.open(support.TESTFN, "r", encoding="utf-8") + f = self.open(support.TESTFN, "r", encoding="utf-8") s = f.read(prefix_size) self.assertEquals(s, str(prefix, "ascii")) self.assertEquals(f.tell(), prefix_size) self.assertEquals(f.readline(), u_suffix) - def testSeekingToo(self): + def test_seeking_too(self): # Regression test for a specific bug data = b'\xe0\xbf\xbf\n' - f = io.open(support.TESTFN, "wb") + f = self.open(support.TESTFN, "wb") f.write(data) f.close() - f = io.open(support.TESTFN, "r", encoding="utf-8") + f = self.open(support.TESTFN, "r", encoding="utf-8") f._CHUNK_SIZE # Just test that it exists f._CHUNK_SIZE = 2 f.readline() f.tell() - def testSeekAndTell(self): - """Test seek/tell using the StatefulIncrementalDecoder.""" - # Make this test faster by forcing a smaller (but large enough) - # chunk size. The bigger the chunker size, the slower seek() is, - # as it tries to replay character decoding one byte at a time. - CHUNK_SIZE = 256 + def test_seek_and_tell(self): + #Test seek/tell using the StatefulIncrementalDecoder. + # Make test faster by doing smaller seeks + CHUNK_SIZE = 128 def testSeekAndTellWithData(data, min_pos=0): """Tell/seek to various points within a data stream and ensure that the decoded data returned by read() is consistent.""" - f = io.open(support.TESTFN, 'wb') + f = self.open(support.TESTFN, 'wb') f.write(data) f.close() - f = io.open(support.TESTFN, encoding='test_decoder') + f = self.open(support.TESTFN, encoding='test_decoder') + f._CHUNK_SIZE = CHUNK_SIZE decoded = f.read() f.close() for i in range(min_pos, len(decoded) + 1): # seek positions for j in [1, 5, len(decoded) - i]: # read lengths - f = io.open(support.TESTFN, encoding='test_decoder') - f._CHUNK_SIZE = CHUNK_SIZE + f = self.open(support.TESTFN, encoding='test_decoder') self.assertEquals(f.read(i), decoded[:i]) cookie = f.tell() self.assertEquals(f.read(j), decoded[i:i + j]) @@ -1079,7 +1690,7 @@ class TextIOWrapperTest(unittest.TestCase): finally: StatefulIncrementalDecoder.codecEnabled = 0 - def testEncodedWrites(self): + def test_encoded_writes(self): data = "1234567890" tests = ("utf-16", "utf-16-le", @@ -1088,8 +1699,8 @@ class TextIOWrapperTest(unittest.TestCase): "utf-32-le", "utf-32-be") for encoding in tests: - buf = io.BytesIO() - f = io.TextIOWrapper(buf, encoding=encoding) + buf = self.BytesIO() + f = self.TextIOWrapper(buf, encoding=encoding) # Check if the BOM is written only once (see issue1753). f.write(data) f.write(data) @@ -1097,45 +1708,8 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(f.read(), data * 2) self.assertEquals(buf.getvalue(), (data * 2).encode(encoding)) - def timingTest(self): - timer = time.time - enc = "utf8" - line = "\0\x0f\xff\u0fff\uffff\U000fffff\U0010ffff"*3 + "\n" - nlines = 10000 - nchars = len(line) - nbytes = len(line.encode(enc)) - for chunk_size in (32, 64, 128, 256): - f = io.open(support.TESTFN, "w+", encoding=enc) - f._CHUNK_SIZE = chunk_size - t0 = timer() - for i in range(nlines): - f.write(line) - f.flush() - t1 = timer() - f.seek(0) - for line in f: - pass - t2 = timer() - f.seek(0) - while f.readline(): - pass - t3 = timer() - f.seek(0) - while f.readline(): - f.tell() - t4 = timer() - f.close() - if support.verbose: - print("\nTiming test: %d lines of %d characters (%d bytes)" % - (nlines, nchars, nbytes)) - print("File chunk size: %6s" % f._CHUNK_SIZE) - print("Writing: %6.3f seconds" % (t1-t0)) - print("Reading using iteration: %6.3f seconds" % (t2-t1)) - print("Reading using readline(): %6.3f seconds" % (t3-t2)) - print("Using readline()+tell(): %6.3f seconds" % (t4-t3)) - - def testReadOneByOne(self): - txt = io.TextIOWrapper(io.BytesIO(b"AA\r\nBB")) + def test_read_one_by_one(self): + txt = self.TextIOWrapper(self.BytesIO(b"AA\r\nBB")) reads = "" while True: c = txt.read(1) @@ -1145,9 +1719,9 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, "AA\nBB") # read in amounts equal to TextIOWrapper._CHUNK_SIZE which is 128. - def testReadByChunk(self): + def test_read_by_chunk(self): # make sure "\r\n" straddles 128 char boundary. - txt = io.TextIOWrapper(io.BytesIO(b"A" * 127 + b"\r\nB")) + txt = self.TextIOWrapper(self.BytesIO(b"A" * 127 + b"\r\nB")) reads = "" while True: c = txt.read(128) @@ -1157,7 +1731,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, "A"*127+"\nB") def test_issue1395_1(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") # read one char at a time reads = "" @@ -1169,7 +1743,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, self.normalized) def test_issue1395_2(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = "" @@ -1181,7 +1755,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, self.normalized) def test_issue1395_3(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1192,7 +1766,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, self.normalized) def test_issue1395_4(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1200,7 +1774,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(reads, self.normalized) def test_issue1395_5(self): - txt = io.TextIOWrapper(io.BytesIO(self.testdata), encoding="ascii") + txt = self.TextIOWrapper(self.BytesIO(self.testdata), encoding="ascii") txt._CHUNK_SIZE = 4 reads = txt.read(4) @@ -1210,12 +1784,45 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEquals(txt.read(4), "BBB\n") def test_issue2282(self): - buffer = io.BytesIO(self.testdata) - txt = io.TextIOWrapper(buffer, encoding="ascii") + buffer = self.BytesIO(self.testdata) + txt = self.TextIOWrapper(buffer, encoding="ascii") self.assertEqual(buffer.seekable(), txt.seekable()) - def check_newline_decoder_utf8(self, decoder): +class CTextIOWrapperTest(TextIOWrapperTest): + + def test_initialization(self): + r = self.BytesIO(b"\xc3\xa9\n\n") + b = self.BufferedReader(r, 1000) + t = self.TextIOWrapper(b) + self.assertRaises(TypeError, t.__init__, b, newline=42) + self.assertRaises(ValueError, t.read) + self.assertRaises(ValueError, t.__init__, b, newline='xyzzy') + self.assertRaises(ValueError, t.read) + + def test_garbage_collection(self): + # C TextIOWrapper objects are collected, and collecting them flushes + # all data to disk. + # The Python version has __del__, so it ends in gc.garbage instead. + rawio = io.FileIO(support.TESTFN, "wb") + b = self.BufferedWriter(rawio) + t = self.TextIOWrapper(b, encoding="ascii") + t.write("456def") + t.x = t + wr = weakref.ref(t) + del t + gc.collect() + self.assert_(wr() is None, wr) + with open(support.TESTFN, "rb") as f: + self.assertEqual(f.read(), b"456def") + +class PyTextIOWrapperTest(TextIOWrapperTest): + pass + + +class IncrementalNewlineDecoderTest(unittest.TestCase): + + def check_newline_decoding_utf8(self, decoder): # UTF-8 specific tests for a newline decoder def _check_decode(b, s, **kwargs): # We exercise getstate() / setstate() as well as decode() @@ -1257,12 +1864,20 @@ class TextIOWrapperTest(unittest.TestCase): _check_decode(b'\xe8\xa2\x88\r', "\u8888") _check_decode(b'\n', "\n") - def check_newline_decoder(self, decoder, encoding): + def check_newline_decoding(self, decoder, encoding): result = [] - encoder = codecs.getincrementalencoder(encoding)() - def _decode_bytewise(s): - for b in encoder.encode(s): - result.append(decoder.decode(bytes([b]))) + if encoding is not None: + encoder = codecs.getincrementalencoder(encoding)() + def _decode_bytewise(s): + # Decode one byte at a time + for b in encoder.encode(s): + result.append(decoder.decode(bytes([b]))) + else: + encoder = None + def _decode_bytewise(s): + # Decode one char at a time + for c in s: + result.append(decoder.decode(c)) self.assertEquals(decoder.newlines, None) _decode_bytewise("abc\n\r") self.assertEquals(decoder.newlines, '\n') @@ -1275,22 +1890,34 @@ class TextIOWrapperTest(unittest.TestCase): _decode_bytewise("abc\r") self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc") decoder.reset() - self.assertEquals(decoder.decode("abc".encode(encoding)), "abc") + input = "abc" + if encoder is not None: + encoder.reset() + input = encoder.encode(input) + self.assertEquals(decoder.decode(input), "abc") self.assertEquals(decoder.newlines, None) def test_newline_decoder(self): encodings = ( - 'utf-8', 'latin-1', + # None meaning the IncrementalNewlineDecoder takes unicode input + # rather than bytes input + None, 'utf-8', 'latin-1', 'utf-16', 'utf-16-le', 'utf-16-be', 'utf-32', 'utf-32-le', 'utf-32-be', ) for enc in encodings: - decoder = codecs.getincrementaldecoder(enc)() - decoder = io.IncrementalNewlineDecoder(decoder, translate=True) - self.check_newline_decoder(decoder, enc) + decoder = enc and codecs.getincrementaldecoder(enc)() + decoder = self.IncrementalNewlineDecoder(decoder, translate=True) + self.check_newline_decoding(decoder, enc) decoder = codecs.getincrementaldecoder("utf-8")() - decoder = io.IncrementalNewlineDecoder(decoder, translate=True) - self.check_newline_decoder_utf8(decoder) + decoder = self.IncrementalNewlineDecoder(decoder, translate=True) + self.check_newline_decoding_utf8(decoder) + +class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): + pass + +class PyIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest): + pass # XXX Tests for open() @@ -1300,24 +1927,23 @@ class MiscIOTest(unittest.TestCase): def tearDown(self): support.unlink(support.TESTFN) - def testImport__all__(self): - for name in io.__all__: - obj = getattr(io, name, None) + def test___all__(self): + for name in self.io.__all__: + obj = getattr(self.io, name, None) self.assert_(obj is not None, name) if name == "open": continue elif "error" in name.lower(): self.assert_(issubclass(obj, Exception), name) else: - self.assert_(issubclass(obj, io.IOBase)) - + self.assert_(issubclass(obj, self.IOBase), name) def test_attributes(self): - f = io.open(support.TESTFN, "wb", buffering=0) + f = self.open(support.TESTFN, "wb", buffering=0) self.assertEquals(f.mode, "wb") f.close() - f = io.open(support.TESTFN, "U") + f = self.open(support.TESTFN, "U") self.assertEquals(f.name, support.TESTFN) self.assertEquals(f.buffer.name, support.TESTFN) self.assertEquals(f.buffer.raw.name, support.TESTFN) @@ -1326,12 +1952,12 @@ class MiscIOTest(unittest.TestCase): self.assertEquals(f.buffer.raw.mode, "rb") f.close() - f = io.open(support.TESTFN, "w+") + f = self.open(support.TESTFN, "w+") self.assertEquals(f.mode, "w+") self.assertEquals(f.buffer.mode, "rb+") # Does it really matter? self.assertEquals(f.buffer.raw.mode, "rb+") - g = io.open(f.fileno(), "wb", closefd=False) + g = self.open(f.fileno(), "wb", closefd=False) self.assertEquals(g.mode, "wb") self.assertEquals(g.raw.mode, "wb") self.assertEquals(g.name, f.fileno()) @@ -1357,13 +1983,12 @@ class MiscIOTest(unittest.TestCase): {"mode": "w+", "buffering": 2}, {"mode": "w+b", "buffering": 0}, ]: - f = io.open(support.TESTFN, **kwargs) + f = self.open(support.TESTFN, **kwargs) f.close() self.assertRaises(ValueError, f.flush) self.assertRaises(ValueError, f.fileno) self.assertRaises(ValueError, f.isatty) self.assertRaises(ValueError, f.__iter__) - self.assertRaises(ValueError, next, f) if hasattr(f, "peek"): self.assertRaises(ValueError, f.peek, 1) self.assertRaises(ValueError, f.read) @@ -1376,16 +2001,101 @@ class MiscIOTest(unittest.TestCase): self.assertRaises(ValueError, f.seek, 0) self.assertRaises(ValueError, f.tell) self.assertRaises(ValueError, f.truncate) - self.assertRaises(ValueError, f.write, "") + self.assertRaises(ValueError, f.write, + b"" if "b" in kwargs['mode'] else "") self.assertRaises(ValueError, f.writelines, []) + self.assertRaises(ValueError, next, f) + def test_blockingioerror(self): + # Various BlockingIOError issues + self.assertRaises(TypeError, self.BlockingIOError) + self.assertRaises(TypeError, self.BlockingIOError, 1) + self.assertRaises(TypeError, self.BlockingIOError, 1, 2, 3, 4) + self.assertRaises(TypeError, self.BlockingIOError, 1, "", None) + b = self.BlockingIOError(1, "") + self.assertEqual(b.characters_written, 0) + class C(str): + pass + c = C("") + b = self.BlockingIOError(1, c) + c.b = b + b.c = c + wr = weakref.ref(c) + del c, b + gc.collect() + self.assert_(wr() is None, wr) + + def test_abcs(self): + # Test the visible base classes are ABCs. + self.assertTrue(isinstance(self.IOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.RawIOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.BufferedIOBase, abc.ABCMeta)) + self.assertTrue(isinstance(self.TextIOBase, abc.ABCMeta)) + + def _check_abc_inheritance(self, abcmodule): + with self.open(support.TESTFN, "wb", buffering=0) as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertTrue(isinstance(f, abcmodule.RawIOBase)) + self.assertFalse(isinstance(f, abcmodule.BufferedIOBase)) + self.assertFalse(isinstance(f, abcmodule.TextIOBase)) + with self.open(support.TESTFN, "wb") as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertFalse(isinstance(f, abcmodule.RawIOBase)) + self.assertTrue(isinstance(f, abcmodule.BufferedIOBase)) + self.assertFalse(isinstance(f, abcmodule.TextIOBase)) + with self.open(support.TESTFN, "w") as f: + self.assertTrue(isinstance(f, abcmodule.IOBase)) + self.assertFalse(isinstance(f, abcmodule.RawIOBase)) + self.assertFalse(isinstance(f, abcmodule.BufferedIOBase)) + self.assertTrue(isinstance(f, abcmodule.TextIOBase)) + + def test_abc_inheritance(self): + # Test implementations inherit from their respective ABCs + self._check_abc_inheritance(self) + + def test_abc_inheritance_official(self): + # Test implementations inherit from the official ABCs of the + # baseline "io" module. + self._check_abc_inheritance(io) + +class CMiscIOTest(MiscIOTest): + io = io + +class PyMiscIOTest(MiscIOTest): + io = pyio def test_main(): - support.run_unittest(IOTest, BytesIOTest, StringIOTest, - BufferedReaderTest, BufferedWriterTest, - BufferedRWPairTest, BufferedRandomTest, - StatefulIncrementalDecoderTest, - TextIOWrapperTest, MiscIOTest) + tests = (CIOTest, PyIOTest, + CBufferedReaderTest, PyBufferedReaderTest, + CBufferedWriterTest, PyBufferedWriterTest, + CBufferedRWPairTest, PyBufferedRWPairTest, + CBufferedRandomTest, PyBufferedRandomTest, + StatefulIncrementalDecoderTest, + CIncrementalNewlineDecoderTest, PyIncrementalNewlineDecoderTest, + CTextIOWrapperTest, PyTextIOWrapperTest, + CMiscIOTest, PyMiscIOTest,) + + # Put the namespaces of the IO module we are testing and some useful mock + # classes in the __dict__ of each test. + mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO, + MockNonBlockWriterIO) + all_members = io.__all__ + ["IncrementalNewlineDecoder"] + c_io_ns = {name : getattr(io, name) for name in all_members} + py_io_ns = {name : getattr(pyio, name) for name in all_members} + globs = globals() + c_io_ns.update((x.__name__, globs["C" + x.__name__]) for x in mocks) + py_io_ns.update((x.__name__, globs["Py" + x.__name__]) for x in mocks) + # Avoid turning open into a bound method. + py_io_ns["open"] = pyio.OpenWrapper + for test in tests: + if test.__name__.startswith("C"): + for name, obj in c_io_ns.items(): + setattr(test, name, obj) + elif test.__name__.startswith("Py"): + for name, obj in py_io_ns.items(): + setattr(test, name, obj) + + support.run_unittest(*tests) if __name__ == "__main__": - unittest.main() + test_main() diff --git a/Lib/test/test_largefile.py b/Lib/test/test_largefile.py index 8060ea0..584a206 100644 --- a/Lib/test/test_largefile.py +++ b/Lib/test/test_largefile.py @@ -7,6 +7,8 @@ import sys import unittest from test.support import run_unittest, TESTFN, verbose, requires, \ TestSkipped, unlink +import io # C implementation of io +import _pyio as pyio # Python implementation of io try: import signal @@ -21,7 +23,7 @@ except (ImportError, AttributeError): size = 2500000000 -class TestCase(unittest.TestCase): +class LargeFileTest(unittest.TestCase): """Test that each file function works as expected for a large (i.e. > 2GB, do we have to check > 4GB) files. @@ -34,7 +36,7 @@ class TestCase(unittest.TestCase): def test_seek(self): if verbose: print('create large file via seek (may be sparse file) ...') - with open(TESTFN, 'wb') as f: + with self.open(TESTFN, 'wb') as f: f.write(b'z') f.seek(0) f.seek(size) @@ -52,7 +54,7 @@ class TestCase(unittest.TestCase): def test_seek_read(self): if verbose: print('play around with seek() and read() with the built largefile') - with open(TESTFN, 'rb') as f: + with self.open(TESTFN, 'rb') as f: self.assertEqual(f.tell(), 0) self.assertEqual(f.read(1), b'z') self.assertEqual(f.tell(), 1) @@ -85,7 +87,7 @@ class TestCase(unittest.TestCase): def test_lseek(self): if verbose: print('play around with os.lseek() with the built largefile') - with open(TESTFN, 'rb') as f: + with self.open(TESTFN, 'rb') as f: self.assertEqual(os.lseek(f.fileno(), 0, 0), 0) self.assertEqual(os.lseek(f.fileno(), 42, 0), 42) self.assertEqual(os.lseek(f.fileno(), 42, 1), 84) @@ -100,7 +102,7 @@ class TestCase(unittest.TestCase): def test_truncate(self): if verbose: print('try truncate') - with open(TESTFN, 'r+b') as f: + with self.open(TESTFN, 'r+b') as f: # this is already decided before start running the test suite # but we do it anyway for extra protection if not hasattr(f, 'truncate'): @@ -143,7 +145,7 @@ def test_main(): # Only run if the current filesystem supports large files. # (Skip this test on Windows, since we now always support # large files.) - f = open(TESTFN, 'wb') + f = open(TESTFN, 'wb', buffering=0) try: # 2**31 == 2147483648 f.seek(2147483649) @@ -158,14 +160,19 @@ def test_main(): else: f.close() suite = unittest.TestSuite() - suite.addTest(TestCase('test_seek')) - suite.addTest(TestCase('test_osstat')) - suite.addTest(TestCase('test_seek_read')) - suite.addTest(TestCase('test_lseek')) - with open(TESTFN, 'w') as f: - if hasattr(f, 'truncate'): - suite.addTest(TestCase('test_truncate')) - unlink(TESTFN) + for _open, prefix in [(io.open, 'C'), (pyio.open, 'Py')]: + class TestCase(LargeFileTest): + pass + TestCase.open = staticmethod(_open) + TestCase.__name__ = prefix + LargeFileTest.__name__ + suite.addTest(TestCase('test_seek')) + suite.addTest(TestCase('test_osstat')) + suite.addTest(TestCase('test_seek_read')) + suite.addTest(TestCase('test_lseek')) + with _open(TESTFN, 'wb') as f: + if hasattr(f, 'truncate'): + suite.addTest(TestCase('test_truncate')) + unlink(TESTFN) try: run_unittest(suite) finally: diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index d1745bc..b3fc042 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -7,13 +7,52 @@ import unittest from test import support import io +import _pyio as pyio import sys -try: - import _bytesio, _stringio - has_c_implementation = True -except ImportError: - has_c_implementation = False +class MemorySeekTestMixin: + + def testInit(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + def testRead(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(buf[:1], bytesIo.read(1)) + self.assertEquals(buf[1:5], bytesIo.read(4)) + self.assertEquals(buf[5:], bytesIo.read(900)) + self.assertEquals(self.EOF, bytesIo.read()) + + def testReadNoArgs(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(buf, bytesIo.read()) + self.assertEquals(self.EOF, bytesIo.read()) + + def testSeek(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + bytesIo.read(5) + bytesIo.seek(0) + self.assertEquals(buf, bytesIo.read()) + + bytesIo.seek(3) + self.assertEquals(buf[3:], bytesIo.read()) + self.assertRaises(TypeError, bytesIo.seek, 0.0) + + def testTell(self): + buf = self.buftype("1234567890") + bytesIo = self.ioclass(buf) + + self.assertEquals(0, bytesIo.tell()) + bytesIo.seek(5) + self.assertEquals(5, bytesIo.tell()) + bytesIo.seek(10000) + self.assertEquals(10000, bytesIo.tell()) class MemoryTestMixin: @@ -148,7 +187,7 @@ class MemoryTestMixin: self.assertEqual(memio.readline(), self.EOF) memio.seek(0) self.assertEqual(type(memio.readline()), type(buf)) - self.assertEqual(memio.readline(None), buf) + self.assertEqual(memio.readline(), buf) self.assertRaises(TypeError, memio.readline, '') memio.close() self.assertRaises(ValueError, memio.readline) @@ -296,11 +335,11 @@ class MemoryTestMixin: self.assertEqual(test2(), buf) -class PyBytesIOTest(MemoryTestMixin, unittest.TestCase): +class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): @staticmethod def buftype(s): return s.encode("ascii") - ioclass = io._BytesIO + ioclass = pyio.BytesIO EOF = b"" def test_read1(self): @@ -371,11 +410,32 @@ class PyBytesIOTest(MemoryTestMixin, unittest.TestCase): self.assertEqual(memio.getvalue(), buf) -class PyStringIOTest(MemoryTestMixin, unittest.TestCase): +class PyStringIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase): buftype = str - ioclass = io._StringIO + ioclass = pyio.StringIO EOF = "" + # TextIO-specific behaviour. + + def test_newlines_property(self): + memio = self.ioclass(newline=None) + # The C StringIO decodes newlines in write() calls, but the Python + # implementation only does when reading. This function forces them to + # be decoded for testing. + def force_decode(): + memio.seek(0) + memio.read() + self.assertEqual(memio.newlines, None) + memio.write("a\n") + force_decode() + self.assertEqual(memio.newlines, "\n") + memio.write("b\r\n") + force_decode() + self.assertEqual(memio.newlines, ("\n", "\r\n")) + memio.write("c\rd") + force_decode() + self.assertEqual(memio.newlines, ("\r", "\n", "\r\n")) + def test_relative_seek(self): memio = self.ioclass() @@ -386,32 +446,99 @@ class PyStringIOTest(MemoryTestMixin, unittest.TestCase): self.assertRaises(IOError, memio.seek, 1, 1) self.assertRaises(IOError, memio.seek, 1, 2) + def test_textio_properties(self): + memio = self.ioclass() + + # These are just dummy values but we nevertheless check them for fear + # of unexpected breakage. + self.assertEqual(memio.encoding, "utf-8") + self.assertEqual(memio.errors, "strict") + self.assertEqual(memio.line_buffering, False) + + def test_newline_none(self): + # newline=None + memio = self.ioclass("a\nb\r\nc\rd", newline=None) + self.assertEqual(list(memio), ["a\n", "b\n", "c\n", "d"]) + memio.seek(0) + self.assertEqual(memio.read(1), "a") + self.assertEqual(memio.read(2), "\nb") + self.assertEqual(memio.read(2), "\nc") + self.assertEqual(memio.read(1), "\n") + memio = self.ioclass(newline=None) + self.assertEqual(2, memio.write("a\n")) + self.assertEqual(3, memio.write("b\r\n")) + self.assertEqual(3, memio.write("c\rd")) + memio.seek(0) + self.assertEqual(memio.read(), "a\nb\nc\nd") + memio = self.ioclass("a\r\nb", newline=None) + self.assertEqual(memio.read(3), "a\nb") + + def test_newline_empty(self): + # newline="" + memio = self.ioclass("a\nb\r\nc\rd", newline="") + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"]) + memio.seek(0) + self.assertEqual(memio.read(4), "a\nb\r") + self.assertEqual(memio.read(2), "\nc") + self.assertEqual(memio.read(1), "\r") + memio = self.ioclass(newline="") + self.assertEqual(2, memio.write("a\n")) + self.assertEqual(2, memio.write("b\r")) + self.assertEqual(2, memio.write("\nc")) + self.assertEqual(2, memio.write("\rd")) + memio.seek(0) + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"]) + + def test_newline_lf(self): + # newline="\n" + memio = self.ioclass("a\nb\r\nc\rd") + self.assertEqual(list(memio), ["a\n", "b\r\n", "c\rd"]) + + def test_newline_cr(self): + # newline="\r" + memio = self.ioclass("a\nb\r\nc\rd", newline="\r") + memio.seek(0) + self.assertEqual(memio.read(), "a\rb\r\rc\rd") + memio.seek(0) + self.assertEqual(list(memio), ["a\r", "b\r", "\r", "c\r", "d"]) + + def test_newline_crlf(self): + # newline="\r\n" + memio = self.ioclass("a\nb\r\nc\rd", newline="\r\n") + memio.seek(0) + self.assertEqual(memio.read(), "a\r\nb\r\r\nc\rd") + memio.seek(0) + self.assertEqual(list(memio), ["a\r\n", "b\r\r\n", "c\rd"]) + + def test_issue5265(self): + # StringIO can duplicate newlines in universal newlines mode + memio = self.ioclass("a\r\nb\r\n", newline=None) + self.assertEqual(memio.read(5), "a\nb\n") + + +class CBytesIOTest(PyBytesIOTest): + ioclass = io.BytesIO + +class CStringIOTest(PyStringIOTest): + ioclass = io.StringIO + # XXX: For the Python version of io.StringIO, this is highly # dependent on the encoding used for the underlying buffer. - # def test_widechar(self): - # buf = self.buftype("\U0002030a\U00020347") - # memio = self.ioclass(buf) - # - # self.assertEqual(memio.getvalue(), buf) - # self.assertEqual(memio.write(buf), len(buf)) - # self.assertEqual(memio.tell(), len(buf)) - # self.assertEqual(memio.getvalue(), buf) - # self.assertEqual(memio.write(buf), len(buf)) - # self.assertEqual(memio.tell(), len(buf) * 2) - # self.assertEqual(memio.getvalue(), buf + buf) - -if has_c_implementation: - class CBytesIOTest(PyBytesIOTest): - ioclass = io.BytesIO - - class CStringIOTest(PyStringIOTest): - ioclass = io.StringIO + def test_widechar(self): + buf = self.buftype("\U0002030a\U00020347") + memio = self.ioclass(buf) + + self.assertEqual(memio.getvalue(), buf) + self.assertEqual(memio.write(buf), len(buf)) + self.assertEqual(memio.tell(), len(buf)) + self.assertEqual(memio.getvalue(), buf) + self.assertEqual(memio.write(buf), len(buf)) + self.assertEqual(memio.tell(), len(buf) * 2) + self.assertEqual(memio.getvalue(), buf + buf) def test_main(): - tests = [PyBytesIOTest, PyStringIOTest] - if has_c_implementation: - tests.extend([CBytesIOTest, CStringIOTest]) + tests = [PyBytesIOTest, PyStringIOTest, CBytesIOTest, CStringIOTest] support.run_unittest(*tests) if __name__ == '__main__': diff --git a/Lib/test/test_univnewlines.py b/Lib/test/test_univnewlines.py index c4e4a3f..a6b9909 100644 --- a/Lib/test/test_univnewlines.py +++ b/Lib/test/test_univnewlines.py @@ -1,4 +1,6 @@ # Tests universal newline support for both reading and parsing files. +import io +import _pyio as pyio import unittest import os import sys @@ -35,7 +37,7 @@ class TestGenericUnivNewlines(unittest.TestCase): WRITEMODE = 'wb' def setUp(self): - fp = open(support.TESTFN, self.WRITEMODE) + fp = self.open(support.TESTFN, self.WRITEMODE) data = self.DATA if "b" in self.WRITEMODE: data = data.encode("ascii") @@ -49,19 +51,19 @@ class TestGenericUnivNewlines(unittest.TestCase): pass def test_read(self): - fp = open(support.TESTFN, self.READMODE) + fp = self.open(support.TESTFN, self.READMODE) data = fp.read() self.assertEqual(data, DATA_LF) self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) def test_readlines(self): - fp = open(support.TESTFN, self.READMODE) + fp = self.open(support.TESTFN, self.READMODE) data = fp.readlines() self.assertEqual(data, DATA_SPLIT) self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) def test_readline(self): - fp = open(support.TESTFN, self.READMODE) + fp = self.open(support.TESTFN, self.READMODE) data = [] d = fp.readline() while d: @@ -71,7 +73,7 @@ class TestGenericUnivNewlines(unittest.TestCase): self.assertEqual(repr(fp.newlines), repr(self.NEWLINE)) def test_seek(self): - fp = open(support.TESTFN, self.READMODE) + fp = self.open(support.TESTFN, self.READMODE) fp.readline() pos = fp.tell() data = fp.readlines() @@ -94,7 +96,7 @@ class TestCRLFNewlines(TestGenericUnivNewlines): DATA = DATA_CRLF def test_tell(self): - fp = open(support.TESTFN, self.READMODE) + fp = self.open(support.TESTFN, self.READMODE) self.assertEqual(repr(fp.newlines), repr(None)) data = fp.readline() pos = fp.tell() @@ -106,12 +108,22 @@ class TestMixedNewlines(TestGenericUnivNewlines): def test_main(): - support.run_unittest( - TestCRNewlines, - TestLFNewlines, - TestCRLFNewlines, - TestMixedNewlines - ) + base_tests = (TestCRNewlines, + TestLFNewlines, + TestCRLFNewlines, + TestMixedNewlines) + tests = [] + # Test the C and Python implementations. + for test in base_tests: + class CTest(test): + open = io.open + CTest.__name__ = "C" + test.__name__ + class PyTest(test): + open = staticmethod(pyio.open) + PyTest.__name__ = "Py" + test.__name__ + tests.append(CTest) + tests.append(PyTest) + support.run_unittest(*tests) if __name__ == '__main__': test_main() diff --git a/Lib/test/test_uu.py b/Lib/test/test_uu.py index d2b6e73..a54f2c1 100644 --- a/Lib/test/test_uu.py +++ b/Lib/test/test_uu.py @@ -32,6 +32,8 @@ class FakeIO(io.TextIOWrapper): encoding=encoding, errors=errors, newline=newline) + self._encoding = encoding + self._errors = errors if initial_value: if not isinstance(initial_value, str): initial_value = str(initial_value) |