diff options
author | Christian Heimes <christian@python.org> | 2022-03-22 09:37:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-22 09:37:00 (GMT) |
commit | 4f97d64c831c94660ceb01f34d51fa236ad968b0 (patch) | |
tree | ad8b4e06cddb3112553a98679f3a6b4be6e34606 /Lib/hashlib.py | |
parent | 3751b6b030b4a3b88959b4f3c4ef2e58d325e497 (diff) | |
download | cpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.zip cpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.tar.gz cpython-4f97d64c831c94660ceb01f34d51fa236ad968b0.tar.bz2 |
bpo-45150: Add hashlib.file_digest() for efficient file hashing (GH-31930)
Diffstat (limited to 'Lib/hashlib.py')
-rw-r--r-- | Lib/hashlib.py | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 5625018..b546a3f 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -65,7 +65,7 @@ algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) __all__ = __always_supported + ('new', 'algorithms_guaranteed', - 'algorithms_available', 'pbkdf2_hmac') + 'algorithms_available', 'pbkdf2_hmac', 'file_digest') __builtin_constructor_cache = {} @@ -254,6 +254,52 @@ except ImportError: pass +def file_digest(fileobj, digest, /, *, _bufsize=2**18): + """Hash the contents of a file-like object. Returns a digest object. + + *fileobj* must be a file-like object opened for reading in binary mode. + It accepts file objects from open(), io.BytesIO(), and SocketIO objects. + The function may bypass Python's I/O and use the file descriptor *fileno* + directly. + + *digest* must either be a hash algorithm name as a *str*, a hash + constructor, or a callable that returns a hash object. + """ + # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy + # hashing with hardware acceleration. + if isinstance(digest, str): + digestobj = new(digest) + else: + digestobj = digest() + + if hasattr(fileobj, "getbuffer"): + # io.BytesIO object, use zero-copy buffer + digestobj.update(fileobj.getbuffer()) + return digestobj + + # Only binary files implement readinto(). + if not ( + hasattr(fileobj, "readinto") + and hasattr(fileobj, "readable") + and fileobj.readable() + ): + raise ValueError( + f"'{fileobj!r}' is not a file-like object in binary reading mode." + ) + + # binary file, socket.SocketIO object + # Note: socket I/O uses different syscalls than file I/O. + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + + return digestobj + + for __func_name in __always_supported: # try them all, some may not work due to the OpenSSL # version not supporting that algorithm. |