diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2016-03-08 16:35:19 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2016-03-08 16:35:19 (GMT) |
commit | 674e2d0ea05dac2dbcd7156ffb229066aa8acc17 (patch) | |
tree | 82e4a3cbcea14fe2284a6897acc60a2a18a554e2 /Lib/fileinput.py | |
parent | 238fecd75cf79aa835d4e9e310be44a295698340 (diff) | |
parent | cc2dbc5844929da1f89e3f548a8d1312b4f0ba0e (diff) | |
download | cpython-674e2d0ea05dac2dbcd7156ffb229066aa8acc17.zip cpython-674e2d0ea05dac2dbcd7156ffb229066aa8acc17.tar.gz cpython-674e2d0ea05dac2dbcd7156ffb229066aa8acc17.tar.bz2 |
Issue #15068: Got rid of excessive buffering in fileinput.
The bufsize parameter is now deprecated and ignored.
Diffstat (limited to 'Lib/fileinput.py')
-rw-r--r-- | Lib/fileinput.py | 166 |
1 files changed, 76 insertions, 90 deletions
diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 3543653..4286156 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -64,13 +64,6 @@ deleted when the output file is closed. In-place filtering is disabled when standard input is read. XXX The current implementation does not work for MS-DOS 8+3 filesystems. -Performance: this module is unfortunately one of the slower ways of -processing large numbers of input lines. Nevertheless, a significant -speed-up has been obtained by using readlines(bufsize) instead of -readline(). A new keyword argument, bufsize=N, is present on the -input() function and the FileInput() class to override the default -buffer size. - XXX Possible additions: - optional getopt argument processing @@ -87,8 +80,6 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", _state = None -DEFAULT_BUFSIZE = 8*1024 - def input(files=None, inplace=False, backup="", bufsize=0, mode="r", openhook=None): """Return an instance of the FileInput class, which can be iterated. @@ -208,17 +199,19 @@ class FileInput: self._files = files self._inplace = inplace self._backup = backup - self._bufsize = bufsize or DEFAULT_BUFSIZE + if bufsize: + import warnings + warnings.warn('bufsize is deprecated and ignored', + DeprecationWarning, stacklevel=2) self._savestdout = None self._output = None self._filename = None - self._lineno = 0 + self._startlineno = 0 self._filelineno = 0 self._file = None + self._readline = self._start_readline self._isstdin = False self._backupfilename = None - self._buffer = [] - self._bufindex = 0 # restrict mode argument to reading modes if mode not in ('r', 'rU', 'U', 'rb'): raise ValueError("FileInput opening mode must be one of " @@ -254,22 +247,18 @@ class FileInput: return self def __next__(self): - try: - line = self._buffer[self._bufindex] - except IndexError: - pass - else: - self._bufindex += 1 - self._lineno += 1 + line = self._readline() + if line: self._filelineno += 1 return line - line = self.readline() - if not line: + if not self._file: raise StopIteration - return line + self.nextfile() + # Recursive call + return self.__next__() def __getitem__(self, i): - if i != self._lineno: + if i != self.lineno(): raise RuntimeError("accessing lines out of order") try: return self.__next__() @@ -290,6 +279,7 @@ class FileInput: finally: file = self._file self._file = None + self._readline = self._start_readline try: if file and not self._isstdin: file.close() @@ -301,85 +291,81 @@ class FileInput: except OSError: pass self._isstdin = False - self._buffer = [] - self._bufindex = 0 def readline(self): - try: - line = self._buffer[self._bufindex] - except IndexError: - pass + while True: + line = self._readline() + if line: + self._filelineno += 1 + return line + if not self._file: + return line + self.nextfile() + # repeat with next file + + def _start_readline(self): + if not self._files: + if 'b' in self._mode: + return b'' + else: + return '' + self._filename = self._files[0] + self._files = self._files[1:] + self._startlineno = self.lineno() + self._filelineno = 0 + self._file = None + self._isstdin = False + self._backupfilename = 0 + if self._filename == '-': + self._filename = '<stdin>' + if 'b' in self._mode: + self._file = getattr(sys.stdin, 'buffer', sys.stdin) + else: + self._file = sys.stdin + self._isstdin = True else: - self._bufindex += 1 - self._lineno += 1 - self._filelineno += 1 - return line - if not self._file: - if not self._files: - if 'b' in self._mode: - return b'' + if self._inplace: + self._backupfilename = ( + self._filename + (self._backup or ".bak")) + try: + os.unlink(self._backupfilename) + except OSError: + pass + # The next few lines may raise OSError + os.rename(self._filename, self._backupfilename) + self._file = open(self._backupfilename, self._mode) + try: + perm = os.fstat(self._file.fileno()).st_mode + except OSError: + self._output = open(self._filename, "w") else: - return '' - self._filename = self._files[0] - self._files = self._files[1:] - self._filelineno = 0 - self._file = None - self._isstdin = False - self._backupfilename = 0 - if self._filename == '-': - self._filename = '<stdin>' - if 'b' in self._mode: - self._file = getattr(sys.stdin, 'buffer', sys.stdin) - else: - self._file = sys.stdin - self._isstdin = True - else: - if self._inplace: - self._backupfilename = ( - self._filename + (self._backup or ".bak")) + mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC + if hasattr(os, 'O_BINARY'): + mode |= os.O_BINARY + + fd = os.open(self._filename, mode, perm) + self._output = os.fdopen(fd, "w") try: - os.unlink(self._backupfilename) + if hasattr(os, 'chmod'): + os.chmod(self._filename, perm) except OSError: pass - # The next few lines may raise OSError - os.rename(self._filename, self._backupfilename) - self._file = open(self._backupfilename, self._mode) - try: - perm = os.fstat(self._file.fileno()).st_mode - except OSError: - self._output = open(self._filename, "w") - else: - mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC - if hasattr(os, 'O_BINARY'): - mode |= os.O_BINARY - - fd = os.open(self._filename, mode, perm) - self._output = os.fdopen(fd, "w") - try: - if hasattr(os, 'chmod'): - os.chmod(self._filename, perm) - except OSError: - pass - self._savestdout = sys.stdout - sys.stdout = self._output + self._savestdout = sys.stdout + sys.stdout = self._output + else: + # This may raise OSError + if self._openhook: + self._file = self._openhook(self._filename, self._mode) else: - # This may raise OSError - if self._openhook: - self._file = self._openhook(self._filename, self._mode) - else: - self._file = open(self._filename, self._mode) - self._buffer = self._file.readlines(self._bufsize) - self._bufindex = 0 - if not self._buffer: - self.nextfile() - # Recursive call - return self.readline() + self._file = open(self._filename, self._mode) + self._readline = self._file.readline + return self._readline() def filename(self): return self._filename def lineno(self): - return self._lineno + return self._startlineno + self._filelineno def filelineno(self): return self._filelineno |