summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2021-04-12 05:46:53 (GMT)
committerGitHub <noreply@github.com>2021-04-12 05:46:53 (GMT)
commitcc2ffcdfd78df3a18edae60df81b2f1b044b1634 (patch)
tree0b93072493e3767357def749f92241822be543c9
parent553ee2781a37ac9d2068da3e1325a780ca79e21e (diff)
downloadcpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.zip
cpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.tar.gz
cpython-cc2ffcdfd78df3a18edae60df81b2f1b044b1634.tar.bz2
bpo-43785: Improve BZ2File performance by removing RLock (GH-25299)
Remove `RLock` from `BZ2File`. It makes `BZ2File` to thread unsafe, but gzip and lzma don't use it too. Co-authored-by: Gregory P. Smith <greg@krypto.org>
-rw-r--r--Lib/bz2.py103
-rw-r--r--Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst4
2 files changed, 48 insertions, 59 deletions
diff --git a/Lib/bz2.py b/Lib/bz2.py
index 1da3ce6..43f321a 100644
--- a/Lib/bz2.py
+++ b/Lib/bz2.py
@@ -13,7 +13,6 @@ from builtins import open as _builtin_open
import io
import os
import _compression
-from threading import RLock
from _bz2 import BZ2Compressor, BZ2Decompressor
@@ -53,9 +52,6 @@ class BZ2File(_compression.BaseStream):
If mode is 'r', the input file may be the concatenation of
multiple compressed streams.
"""
- # This lock must be recursive, so that BufferedIOBase's
- # writelines() does not deadlock.
- self._lock = RLock()
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
@@ -104,24 +100,23 @@ class BZ2File(_compression.BaseStream):
May be called more than once without error. Once the file is
closed, any other operation on it will raise a ValueError.
"""
- with self._lock:
- if self._mode == _MODE_CLOSED:
- return
+ if self._mode == _MODE_CLOSED:
+ return
+ try:
+ if self._mode == _MODE_READ:
+ self._buffer.close()
+ elif self._mode == _MODE_WRITE:
+ self._fp.write(self._compressor.flush())
+ self._compressor = None
+ finally:
try:
- if self._mode == _MODE_READ:
- self._buffer.close()
- elif self._mode == _MODE_WRITE:
- self._fp.write(self._compressor.flush())
- self._compressor = None
+ if self._closefp:
+ self._fp.close()
finally:
- try:
- if self._closefp:
- self._fp.close()
- finally:
- self._fp = None
- self._closefp = False
- self._mode = _MODE_CLOSED
- self._buffer = None
+ self._fp = None
+ self._closefp = False
+ self._mode = _MODE_CLOSED
+ self._buffer = None
@property
def closed(self):
@@ -153,12 +148,11 @@ class BZ2File(_compression.BaseStream):
Always returns at least one byte of data, unless at EOF.
The exact number of bytes returned is unspecified.
"""
- with self._lock:
- self._check_can_read()
- # Relies on the undocumented fact that BufferedReader.peek()
- # always returns at least one byte (except at EOF), independent
- # of the value of n
- return self._buffer.peek(n)
+ self._check_can_read()
+ # Relies on the undocumented fact that BufferedReader.peek()
+ # always returns at least one byte (except at EOF), independent
+ # of the value of n
+ return self._buffer.peek(n)
def read(self, size=-1):
"""Read up to size uncompressed bytes from the file.
@@ -166,9 +160,8 @@ class BZ2File(_compression.BaseStream):
If size is negative or omitted, read until EOF is reached.
Returns b'' if the file is already at EOF.
"""
- with self._lock:
- self._check_can_read()
- return self._buffer.read(size)
+ self._check_can_read()
+ return self._buffer.read(size)
def read1(self, size=-1):
"""Read up to size uncompressed bytes, while trying to avoid
@@ -177,20 +170,18 @@ class BZ2File(_compression.BaseStream):
Returns b'' if the file is at EOF.
"""
- with self._lock:
- self._check_can_read()
- if size < 0:
- size = io.DEFAULT_BUFFER_SIZE
- return self._buffer.read1(size)
+ self._check_can_read()
+ if size < 0:
+ size = io.DEFAULT_BUFFER_SIZE
+ return self._buffer.read1(size)
def readinto(self, b):
"""Read bytes into b.
Returns the number of bytes read (0 for EOF).
"""
- with self._lock:
- self._check_can_read()
- return self._buffer.readinto(b)
+ self._check_can_read()
+ return self._buffer.readinto(b)
def readline(self, size=-1):
"""Read a line of uncompressed bytes from the file.
@@ -203,9 +194,8 @@ class BZ2File(_compression.BaseStream):
if not hasattr(size, "__index__"):
raise TypeError("Integer argument expected")
size = size.__index__()
- with self._lock:
- self._check_can_read()
- return self._buffer.readline(size)
+ self._check_can_read()
+ return self._buffer.readline(size)
def readlines(self, size=-1):
"""Read a list of lines of uncompressed bytes from the file.
@@ -218,9 +208,8 @@ class BZ2File(_compression.BaseStream):
if not hasattr(size, "__index__"):
raise TypeError("Integer argument expected")
size = size.__index__()
- with self._lock:
- self._check_can_read()
- return self._buffer.readlines(size)
+ self._check_can_read()
+ return self._buffer.readlines(size)
def write(self, data):
"""Write a byte string to the file.
@@ -229,12 +218,11 @@ class BZ2File(_compression.BaseStream):
always len(data). Note that due to buffering, the file on disk
may not reflect the data written until close() is called.
"""
- with self._lock:
- self._check_can_write()
- compressed = self._compressor.compress(data)
- self._fp.write(compressed)
- self._pos += len(data)
- return len(data)
+ self._check_can_write()
+ compressed = self._compressor.compress(data)
+ self._fp.write(compressed)
+ self._pos += len(data)
+ return len(data)
def writelines(self, seq):
"""Write a sequence of byte strings to the file.
@@ -244,8 +232,7 @@ class BZ2File(_compression.BaseStream):
Line separators are not added between the written byte strings.
"""
- with self._lock:
- return _compression.BaseStream.writelines(self, seq)
+ return _compression.BaseStream.writelines(self, seq)
def seek(self, offset, whence=io.SEEK_SET):
"""Change the file position.
@@ -262,17 +249,15 @@ class BZ2File(_compression.BaseStream):
Note that seeking is emulated, so depending on the parameters,
this operation may be extremely slow.
"""
- with self._lock:
- self._check_can_seek()
- return self._buffer.seek(offset, whence)
+ self._check_can_seek()
+ return self._buffer.seek(offset, whence)
def tell(self):
"""Return the current file position."""
- with self._lock:
- self._check_not_closed()
- if self._mode == _MODE_READ:
- return self._buffer.tell()
- return self._pos
+ self._check_not_closed()
+ if self._mode == _MODE_READ:
+ return self._buffer.tell()
+ return self._pos
def open(filename, mode="rb", compresslevel=9,
diff --git a/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst
new file mode 100644
index 0000000..b4ed5e5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-04-09-14-51-58.bpo-43785.1mM5xE.rst
@@ -0,0 +1,4 @@
+Improve ``bz2.BZ2File`` performance by removing the RLock from BZ2File.
+This makes BZ2File thread unsafe in the face of multiple simultaneous
+readers or writers, just like its equivalent classes in :mod:`gzip` and
+:mod:`lzma` have always been. Patch by Inada Naoki.