summaryrefslogtreecommitdiffstats
path: root/Lib/_pyio.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/_pyio.py')
-rw-r--r--Lib/_pyio.py139
1 files changed, 90 insertions, 49 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index a2faeb3..f66290f 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -15,7 +15,6 @@ except ImportError:
import io
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
-from errno import EINTR
# open() uses st_blksize whenever we can
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
@@ -24,20 +23,12 @@ DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
# defined in io.py. We don't use real inheritance though, because we don't
# want to inherit the C implementations.
-
-class BlockingIOError(IOError):
-
- """Exception raised when I/O would block on a non-blocking I/O stream."""
-
- def __init__(self, errno, strerror, characters_written=0):
- super().__init__(errno, strerror)
- if not isinstance(characters_written, int):
- raise TypeError("characters_written must be a integer")
- self.characters_written = characters_written
+# Rebind for compatibility
+BlockingIOError = BlockingIOError
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
- newline=None, closefd=True):
+ newline=None, closefd=True, opener=None):
r"""Open file and return a stream. Raise IOError upon failure.
@@ -47,21 +38,22 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
wrapped. (If a file descriptor is given, it is closed when the
returned I/O object is closed, unless closefd is set to False.)
- mode is an optional string that specifies the mode in which the file
- is opened. It defaults to 'r' which means open for reading in text
- mode. Other common values are 'w' for writing (truncating the file if
- it already exists), and 'a' for appending (which on some Unix systems,
- means that all writes append to the end of the file regardless of the
- current seek position). In text mode, if encoding is not specified the
- encoding used is platform dependent. (For reading and writing raw
- bytes use binary mode and leave encoding unspecified.) The available
- modes are:
+ mode is an optional string that specifies the mode in which the file is
+ opened. It defaults to 'r' which means open for reading in text mode. Other
+ common values are 'w' for writing (truncating the file if it already
+ exists), 'x' for exclusive creation of a new file, and 'a' for appending
+ (which on some Unix systems, means that all writes append to the end of the
+ file regardless of the current seek position). In text mode, if encoding is
+ not specified the encoding used is platform dependent. (For reading and
+ writing raw bytes use binary mode and leave encoding unspecified.) The
+ available modes are:
========= ===============================================================
Character Meaning
--------- ---------------------------------------------------------------
'r' open for reading (default)
'w' open for writing, truncating the file first
+ 'x' create a new file and open it for writing
'a' open for writing, appending to the end of the file if it exists
'b' binary mode
't' text mode (default)
@@ -72,7 +64,8 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
The default mode is 'rt' (open for reading text). For binary random
access, the mode 'w+b' opens and truncates the file to 0 bytes, while
- 'r+b' opens the file without truncation.
+ 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
+ raises an `FileExistsError` if the file already exists.
Python distinguishes between files opened in binary and text modes,
even when the underlying operating system doesn't. Files opened in
@@ -132,6 +125,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
be kept open when the file is closed. This does not work when a file name is
given and must be True in that case.
+ A custom opener can be used by passing a callable as *opener*. The
+ underlying file descriptor for the file object is then obtained by calling
+ *opener* with (*file*, *flags*). *opener* must return an open file
+ descriptor (passing os.open as *opener* results in functionality similar to
+ passing None).
+
open() returns a file object whose type depends on the mode, and
through which the standard file operations such as reading and writing
are performed. When open() is used to open a file in a text mode ('w',
@@ -157,8 +156,9 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
if errors is not None and not isinstance(errors, str):
raise TypeError("invalid errors: %r" % errors)
modes = set(mode)
- if modes - set("arwb+tU") or len(mode) > len(modes):
+ if modes - set("axrwb+tU") or len(mode) > len(modes):
raise ValueError("invalid mode: %r" % mode)
+ creating = "x" in modes
reading = "r" in modes
writing = "w" in modes
appending = "a" in modes
@@ -166,14 +166,14 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
text = "t" in modes
binary = "b" in modes
if "U" in modes:
- if writing or appending:
+ if creating or writing or appending:
raise ValueError("can't use U and writing mode at once")
reading = True
if text and binary:
raise ValueError("can't have text and binary mode at once")
- if reading + writing + appending > 1:
+ if creating + reading + writing + appending > 1:
raise ValueError("can't have read/write/append mode at once")
- if not (reading or writing or appending):
+ if not (creating or reading or writing or appending):
raise ValueError("must have exactly one of read/write/append mode")
if binary and encoding is not None:
raise ValueError("binary mode doesn't take an encoding argument")
@@ -182,11 +182,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
if binary and newline is not None:
raise ValueError("binary mode doesn't take a newline argument")
raw = FileIO(file,
+ (creating and "x" or "") +
(reading and "r" or "") +
(writing and "w" or "") +
(appending and "a" or "") +
(updating and "+" or ""),
- closefd)
+ closefd, opener=opener)
line_buffering = False
if buffering == 1 or buffering < 0 and raw.isatty():
buffering = -1
@@ -208,7 +209,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
raise ValueError("can't have unbuffered text I/O")
if updating:
buffer = BufferedRandom(raw, buffering)
- elif writing or appending:
+ elif creating or writing or appending:
buffer = BufferedWriter(raw, buffering)
elif reading:
buffer = BufferedReader(raw, buffering)
@@ -948,15 +949,19 @@ class BufferedReader(_BufferedIOMixin):
# Special case for when the number of bytes to read is unspecified.
if n is None or n == -1:
self._reset_read_buf()
+ if hasattr(self.raw, 'readall'):
+ chunk = self.raw.readall()
+ if chunk is None:
+ return buf[pos:] or None
+ else:
+ return buf[pos:] + chunk
chunks = [buf[pos:]] # Strip the consumed bytes.
current_size = 0
while True:
# Read until EOF or until read() would block.
try:
chunk = self.raw.read()
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
if chunk in empty_values:
nodata_val = chunk
@@ -978,9 +983,7 @@ class BufferedReader(_BufferedIOMixin):
while avail < n:
try:
chunk = self.raw.read(wanted)
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
if chunk in empty_values:
nodata_val = chunk
@@ -1013,9 +1016,7 @@ class BufferedReader(_BufferedIOMixin):
while True:
try:
current = self.raw.read(to_read)
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
break
if current:
@@ -1120,13 +1121,11 @@ class BufferedWriter(_BufferedIOMixin):
while self._write_buf:
try:
n = self.raw.write(self._write_buf)
+ except InterruptedError:
+ continue
except BlockingIOError:
raise RuntimeError("self.raw should implement RawIOBase: it "
"should not raise BlockingIOError")
- except IOError as e:
- if e.errno != EINTR:
- raise
- continue
if n is None:
raise BlockingIOError(
errno.EAGAIN,
@@ -1515,6 +1514,7 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable()
self._has_read1 = hasattr(self.buffer, 'read1')
+ self._b2cratio = 0.0
if self._seekable and self.writable():
position = self.buffer.tell()
@@ -1685,7 +1685,12 @@ class TextIOWrapper(TextIOBase):
else:
input_chunk = self.buffer.read(self._CHUNK_SIZE)
eof = not input_chunk
- self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
+ decoded_chars = self._decoder.decode(input_chunk, eof)
+ self._set_decoded_chars(decoded_chars)
+ if decoded_chars:
+ self._b2cratio = len(input_chunk) / len(self._decoded_chars)
+ else:
+ self._b2cratio = 0.0
if self._telling:
# At the snapshot point, len(dec_buffer) bytes before the read,
@@ -1739,20 +1744,56 @@ class TextIOWrapper(TextIOBase):
# forward until it gives us enough decoded characters.
saved_state = decoder.getstate()
try:
+ # Fast search for an acceptable start point, close to our
+ # current pos.
+ # Rationale: calling decoder.decode() has a large overhead
+ # regardless of chunk size; we want the number of such calls to
+ # be O(1) in most situations (common decoders, non-crazy input).
+ # Actually, it will be exactly 1 for fixed-size codecs (all
+ # 8-bit codecs, also UTF-16 and UTF-32).
+ skip_bytes = int(self._b2cratio * chars_to_skip)
+ skip_back = 1
+ assert skip_bytes <= len(next_input)
+ while skip_bytes > 0:
+ decoder.setstate((b'', dec_flags))
+ # Decode up to temptative start point
+ n = len(decoder.decode(next_input[:skip_bytes]))
+ if n <= chars_to_skip:
+ b, d = decoder.getstate()
+ if not b:
+ # Before pos and no bytes buffered in decoder => OK
+ dec_flags = d
+ chars_to_skip -= n
+ break
+ # Skip back by buffered amount and reset heuristic
+ skip_bytes -= len(b)
+ skip_back = 1
+ else:
+ # We're too far ahead, skip back a bit
+ skip_bytes -= skip_back
+ skip_back = skip_back * 2
+ else:
+ skip_bytes = 0
+ decoder.setstate((b'', dec_flags))
+
# Note our initial start point.
- decoder.setstate((b'', dec_flags))
- start_pos = position
- start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
- need_eof = 0
+ start_pos = position + skip_bytes
+ start_flags = dec_flags
+ if chars_to_skip == 0:
+ # We haven't moved from the start point.
+ return self._pack_cookie(start_pos, start_flags)
# Feed the decoder one byte at a time. As we go, note the
# nearest "safe start point" before the current location
# (a point where the decoder has nothing buffered, so seek()
# can safely start from there and advance to this location).
- next_byte = bytearray(1)
- for next_byte[0] in next_input:
+ bytes_fed = 0
+ need_eof = 0
+ # Chars decoded since `start_pos`
+ chars_decoded = 0
+ for i in range(skip_bytes, len(next_input)):
bytes_fed += 1
- chars_decoded += len(decoder.decode(next_byte))
+ chars_decoded += len(decoder.decode(next_input[i:i+1]))
dec_buffer, dec_flags = decoder.getstate()
if not dec_buffer and chars_decoded <= chars_to_skip:
# Decoder buffer is empty, so this is a safe start point.