diff options
author | Inada Naoki <songofacandy@gmail.com> | 2021-04-12 05:46:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-12 05:46:53 (GMT) |
commit | cc2ffcdfd78df3a18edae60df81b2f1b044b1634 (patch) | |
tree | 0b93072493e3767357def749f92241822be543c9 | |
parent | 553ee2781a37ac9d2068da3e1325a780ca79e21e (diff) | |
download | cpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.zip cpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.tar.gz cpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.tar.bz2 |
bpo-43785: Improve BZ2File performance by removing RLock (GH-25299)
Remove `RLock` from `BZ2File`. It makes `BZ2File` to thread unsafe, but
gzip and lzma don't use it too.
Co-authored-by: Gregory P. Smith <greg@krypto.org>
-rw-r--r-- | Lib/bz2.py | 103 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst | 4 |
2 files changed, 48 insertions, 59 deletions
@@ -13,7 +13,6 @@ from builtins import open as _builtin_open import io import os import _compression -from threading import RLock from _bz2 import BZ2Compressor, BZ2Decompressor @@ -53,9 +52,6 @@ class BZ2File(_compression.BaseStream): If mode is 'r', the input file may be the concatenation of multiple compressed streams. """ - # This lock must be recursive, so that BufferedIOBase's - # writelines() does not deadlock. - self._lock = RLock() self._fp = None self._closefp = False self._mode = _MODE_CLOSED @@ -104,24 +100,23 @@ class BZ2File(_compression.BaseStream): May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - with self._lock: - if self._mode == _MODE_CLOSED: - return + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: try: - if self._mode == _MODE_READ: - self._buffer.close() - elif self._mode == _MODE_WRITE: - self._fp.write(self._compressor.flush()) - self._compressor = None + if self._closefp: + self._fp.close() finally: - try: - if self._closefp: - self._fp.close() - finally: - self._fp = None - self._closefp = False - self._mode = _MODE_CLOSED - self._buffer = None + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._buffer = None @property def closed(self): @@ -153,12 +148,11 @@ class BZ2File(_compression.BaseStream): Always returns at least one byte of data, unless at EOF. The exact number of bytes returned is unspecified. """ - with self._lock: - self._check_can_read() - # Relies on the undocumented fact that BufferedReader.peek() - # always returns at least one byte (except at EOF), independent - # of the value of n - return self._buffer.peek(n) + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() + # always returns at least one byte (except at EOF), independent + # of the value of n + return self._buffer.peek(n) def read(self, size=-1): """Read up to size uncompressed bytes from the file. @@ -166,9 +160,8 @@ class BZ2File(_compression.BaseStream): If size is negative or omitted, read until EOF is reached. Returns b'' if the file is already at EOF. """ - with self._lock: - self._check_can_read() - return self._buffer.read(size) + self._check_can_read() + return self._buffer.read(size) def read1(self, size=-1): """Read up to size uncompressed bytes, while trying to avoid @@ -177,20 +170,18 @@ class BZ2File(_compression.BaseStream): Returns b'' if the file is at EOF. """ - with self._lock: - self._check_can_read() - if size < 0: - size = io.DEFAULT_BUFFER_SIZE - return self._buffer.read1(size) + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) def readinto(self, b): """Read bytes into b. Returns the number of bytes read (0 for EOF). """ - with self._lock: - self._check_can_read() - return self._buffer.readinto(b) + self._check_can_read() + return self._buffer.readinto(b) def readline(self, size=-1): """Read a line of uncompressed bytes from the file. @@ -203,9 +194,8 @@ class BZ2File(_compression.BaseStream): if not hasattr(size, "__index__"): raise TypeError("Integer argument expected") size = size.__index__() - with self._lock: - self._check_can_read() - return self._buffer.readline(size) + self._check_can_read() + return self._buffer.readline(size) def readlines(self, size=-1): """Read a list of lines of uncompressed bytes from the file. @@ -218,9 +208,8 @@ class BZ2File(_compression.BaseStream): if not hasattr(size, "__index__"): raise TypeError("Integer argument expected") size = size.__index__() - with self._lock: - self._check_can_read() - return self._buffer.readlines(size) + self._check_can_read() + return self._buffer.readlines(size) def write(self, data): """Write a byte string to the file. @@ -229,12 +218,11 @@ class BZ2File(_compression.BaseStream): always len(data). Note that due to buffering, the file on disk may not reflect the data written until close() is called. """ - with self._lock: - self._check_can_write() - compressed = self._compressor.compress(data) - self._fp.write(compressed) - self._pos += len(data) - return len(data) + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) def writelines(self, seq): """Write a sequence of byte strings to the file. @@ -244,8 +232,7 @@ class BZ2File(_compression.BaseStream): Line separators are not added between the written byte strings. """ - with self._lock: - return _compression.BaseStream.writelines(self, seq) + return _compression.BaseStream.writelines(self, seq) def seek(self, offset, whence=io.SEEK_SET): """Change the file position. @@ -262,17 +249,15 @@ class BZ2File(_compression.BaseStream): Note that seeking is emulated, so depending on the parameters, this operation may be extremely slow. """ - with self._lock: - self._check_can_seek() - return self._buffer.seek(offset, whence) + self._check_can_seek() + return self._buffer.seek(offset, whence) def tell(self): """Return the current file position.""" - with self._lock: - self._check_not_closed() - if self._mode == _MODE_READ: - return self._buffer.tell() - return self._pos + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos def open(filename, mode="rb", compresslevel=9, diff --git a/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst new file mode 100644 index 0000000..b4ed5e5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst @@ -0,0 +1,4 @@ +Improve ``bz2.BZ2File`` performance by removing the RLock from BZ2File. +This makes BZ2File thread unsafe in the face of multiple simultaneous +readers or writers, just like its equivalent classes in :mod:`gzip` and +:mod:`lzma` have always been. Patch by Inada Naoki. |