summaryrefslogtreecommitdiffstats
path: root/Lib/hashlib.py
diff options
context:
space:
mode:
authorChristian Heimes <christian@python.org>2022-03-22 09:37:00 (GMT)
committerGitHub <noreply@github.com>2022-03-22 09:37:00 (GMT)
commit4f97d64c831c94660ceb01f34d51fa236ad968b0 (patch)
treead8b4e06cddb3112553a98679f3a6b4be6e34606 /Lib/hashlib.py
parent3751b6b030b4a3b88959b4f3c4ef2e58d325e497 (diff)
downloadcpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.zip
cpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.tar.gz
cpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.tar.bz2
bpo-45150: Add hashlib.file_digest() for efficient file hashing (GH-31930)
Diffstat (limited to 'Lib/hashlib.py')
-rw-r--r--Lib/hashlib.py48
1 files changed, 47 insertions, 1 deletions
diff --git a/Lib/hashlib.py b/Lib/hashlib.py
index 5625018..b546a3f 100644
--- a/Lib/hashlib.py
+++ b/Lib/hashlib.py
@@ -65,7 +65,7 @@ algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
__all__ = __always_supported + ('new', 'algorithms_guaranteed',
- 'algorithms_available', 'pbkdf2_hmac')
+ 'algorithms_available', 'pbkdf2_hmac', 'file_digest')
__builtin_constructor_cache = {}
@@ -254,6 +254,52 @@ except ImportError:
pass
+def file_digest(fileobj, digest, /, *, _bufsize=2**18):
+ """Hash the contents of a file-like object. Returns a digest object.
+
+ *fileobj* must be a file-like object opened for reading in binary mode.
+ It accepts file objects from open(), io.BytesIO(), and SocketIO objects.
+ The function may bypass Python's I/O and use the file descriptor *fileno*
+ directly.
+
+ *digest* must either be a hash algorithm name as a *str*, a hash
+ constructor, or a callable that returns a hash object.
+ """
+ # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy
+ # hashing with hardware acceleration.
+ if isinstance(digest, str):
+ digestobj = new(digest)
+ else:
+ digestobj = digest()
+
+ if hasattr(fileobj, "getbuffer"):
+ # io.BytesIO object, use zero-copy buffer
+ digestobj.update(fileobj.getbuffer())
+ return digestobj
+
+ # Only binary files implement readinto().
+ if not (
+ hasattr(fileobj, "readinto")
+ and hasattr(fileobj, "readable")
+ and fileobj.readable()
+ ):
+ raise ValueError(
+ f"'{fileobj!r}' is not a file-like object in binary reading mode."
+ )
+
+ # binary file, socket.SocketIO object
+ # Note: socket I/O uses different syscalls than file I/O.
+ buf = bytearray(_bufsize) # Reusable buffer to reduce allocations.
+ view = memoryview(buf)
+ while True:
+ size = fileobj.readinto(buf)
+ if size == 0:
+ break # EOF
+ digestobj.update(view[:size])
+
+ return digestobj
+
+
for __func_name in __always_supported:
# try them all, some may not work due to the OpenSSL
# version not supporting that algorithm.