diff options
author | Gregory P. Smith <greg@mad-scientist.com> | 2005-08-21 18:45:59 (GMT) |
---|---|---|
committer | Gregory P. Smith <greg@mad-scientist.com> | 2005-08-21 18:45:59 (GMT) |
commit | f21a5f773964d34c7b6deb7e3d753fae2b9c70e2 (patch) | |
tree | ba3b66cea11da1d8e930555aa5a10f775a285d84 /Lib | |
parent | 33a5f2af59ddcf3f1b0447a8dbd0576fd78de303 (diff) | |
download | cpython-f21a5f773964d34c7b6deb7e3d753fae2b9c70e2.zip cpython-f21a5f773964d34c7b6deb7e3d753fae2b9c70e2.tar.gz cpython-f21a5f773964d34c7b6deb7e3d753fae2b9c70e2.tar.bz2 |
[ sf.net patch # 1121611 ]
A new hashlib module to replace the md5 and sha modules. It adds
support for additional secure hashes such as SHA-256 and SHA-512. The
hashlib module uses OpenSSL for fast platform optimized
implementations of algorithms when available. The old md5 and sha
modules still exist as wrappers around hashlib to preserve backwards
compatibility.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/hashlib.py | 110 | ||||
-rw-r--r-- | Lib/hmac.py | 24 | ||||
-rw-r--r-- | Lib/md5.py | 10 | ||||
-rw-r--r-- | Lib/sha.py | 11 | ||||
-rwxr-xr-x | Lib/test/regrtest.py | 3 | ||||
-rw-r--r-- | Lib/test/test_hashlib.py | 191 | ||||
-rw-r--r-- | Lib/test/test_hashlib_speed.py | 93 | ||||
-rw-r--r-- | Lib/test/test_hmac.py | 9 |
8 files changed, 438 insertions, 13 deletions
diff --git a/Lib/hashlib.py b/Lib/hashlib.py new file mode 100644 index 0000000..3528699 --- /dev/null +++ b/Lib/hashlib.py @@ -0,0 +1,110 @@ +# $Id$ +# +# Copyright (C) 2005 Gregory P. Smith (greg@electricrain.com) +# Licensed to PSF under a Contributor Agreement. +# + +__doc__ = """hashlib module - A common interface to many hash functions. + +new(name, string='') - returns a new hash object implementing the + given hash function; initializing the hash + using the given string data. + +Named constructor functions are also available, these are much faster +than using new(): + +md5(), sha1(), sha224(), sha256(), sha384(), and sha512() + +More algorithms may be available on your platform but the above are +guaranteed to exist. + +Choose your hash function wisely. Some have known weaknesses. +sha384 and sha512 will be slow on 32 bit platforms. +""" + + +def __get_builtin_constructor(name): + if name in ('SHA1', 'sha1'): + import _sha + return _sha.new + elif name in ('MD5', 'md5'): + import _md5 + return _md5.new + elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): + import _sha256 + bs = name[3:] + if bs == '256': + return _sha256.sha256 + elif bs == '224': + return _sha256.sha224 + elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): + import _sha512 + bs = name[3:] + if bs == '512': + return _sha512.sha512 + elif bs == '384': + return _sha512.sha384 + + raise ValueError, "unsupported hash type" + + +def __py_new(name, string=''): + """new(name, string='') - Return a new hashing object using the named algorithm; + optionally initialized with a string. + """ + return __get_builtin_constructor(name)(string) + + +def __hash_new(name, string=''): + """new(name, string='') - Return a new hashing object using the named algorithm; + optionally initialized with a string. + """ + try: + return _hashlib.new(name, string) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(string) + + +try: + import _hashlib + # use the wrapper of the C implementation + new = __hash_new + + for opensslFuncName in filter(lambda n: n.startswith('openssl_'), dir(_hashlib)): + funcName = opensslFuncName[len('openssl_'):] + try: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + f = getattr(_hashlib, opensslFuncName) + f() + # Use the C function directly (very fast) + exec funcName + ' = f' + except ValueError: + try: + # Use the builtin implementation directly (fast) + exec funcName + ' = __get_builtin_constructor(funcName)' + except ValueError: + # this one has no builtin implementation, don't define it + pass + # clean up our locals + del f + del opensslFuncName + del funcName + +except ImportError: + # We don't have the _hashlib OpenSSL module? + # use the built in legacy interfaces via a wrapper function + new = __py_new + + # lookup the C function to use directly for the named constructors + md5 = __get_builtin_constructor('md5') + sha1 = __get_builtin_constructor('sha1') + sha224 = __get_builtin_constructor('sha224') + sha256 = __get_builtin_constructor('sha256') + sha384 = __get_builtin_constructor('sha384') + sha512 = __get_builtin_constructor('sha512') + diff --git a/Lib/hmac.py b/Lib/hmac.py index 11b0fb3..41d6c6c 100644 --- a/Lib/hmac.py +++ b/Lib/hmac.py @@ -28,27 +28,33 @@ class HMAC: key: key for the keyed hash object. msg: Initial input for the hash, if provided. - digestmod: A module supporting PEP 247. Defaults to the md5 module. + digestmod: A module supporting PEP 247. *OR* + A hashlib constructor returning a new hash object. + Defaults to hashlib.md5. """ if key is _secret_backdoor_key: # cheap return if digestmod is None: - import md5 - digestmod = md5 + import hashlib + digestmod = hashlib.md5 - self.digestmod = digestmod - self.outer = digestmod.new() - self.inner = digestmod.new() - self.digest_size = digestmod.digest_size + if callable(digestmod): + self.digest_cons = digestmod + else: + self.digest_cons = lambda d='': digestmod.new(d) + + self.outer = self.digest_cons() + self.inner = self.digest_cons() + self.digest_size = self.inner.digest_size blocksize = 64 ipad = "\x36" * blocksize opad = "\x5C" * blocksize if len(key) > blocksize: - key = digestmod.new(key).digest() + key = self.digest_cons(key).digest() key = key + chr(0) * (blocksize - len(key)) self.outer.update(_strxor(key, opad)) @@ -70,7 +76,7 @@ class HMAC: An update to this copy won't affect the original object. """ other = HMAC(_secret_backdoor_key) - other.digestmod = self.digestmod + other.digest_cons = self.digest_cons other.digest_size = self.digest_size other.inner = self.inner.copy() other.outer = self.outer.copy() diff --git a/Lib/md5.py b/Lib/md5.py new file mode 100644 index 0000000..bbe1984 --- /dev/null +++ b/Lib/md5.py @@ -0,0 +1,10 @@ +# $Id$ +# +# Copyright (C) 2005 Gregory P. Smith (greg@electricrain.com) +# Licensed to PSF under a Contributor Agreement. + +from hashlib import md5 +new = md5 + +blocksize = 1 # legacy value (wrong in any useful sense) +digest_size = 16 diff --git a/Lib/sha.py b/Lib/sha.py new file mode 100644 index 0000000..9d914a9 --- /dev/null +++ b/Lib/sha.py @@ -0,0 +1,11 @@ +# $Id$ +# +# Copyright (C) 2005 Gregory P. Smith (greg@electricrain.com) +# Licensed to PSF under a Contributor Agreement. + +from hashlib import sha1 as sha +new = sha + +blocksize = 1 # legacy value (wrong in any useful sense) +digest_size = 20 +digestsize = 20 diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 2f620a2..e1c878c 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -1090,6 +1090,9 @@ class _ExpectedSkips: s = _expectations[sys.platform] self.expected = set(s.split()) + # this isn't a regularly run unit test, it is always skipped + self.expected.add('test_hashlib_speed') + if not os.path.supports_unicode_filenames: self.expected.add('test_pep277') diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py new file mode 100644 index 0000000..1dcadcd --- /dev/null +++ b/Lib/test/test_hashlib.py @@ -0,0 +1,191 @@ +# Test hashlib module +# +# $Id$ +# +# Copyright (C) 2005 Gregory P. Smith (greg@electricrain.com) +# Licensed to PSF under a Contributor Agreement. +# + +import hashlib +import unittest +from test import test_support + + +def hexstr(s): + import string + h = string.hexdigits + r = '' + for c in s: + i = ord(c) + r = r + h[(i >> 4) & 0xF] + h[i & 0xF] + return r + + +class HashLibTestCase(unittest.TestCase): + supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', + 'sha224', 'SHA224', 'sha256', 'SHA256', + 'sha384', 'SHA384', 'sha512', 'SHA512' ) + + def test_unknown_hash(self): + try: + hashlib.new('spam spam spam spam spam') + except ValueError: + pass + else: + self.assert_(0 == "hashlib didn't reject bogus hash name") + + def test_hexdigest(self): + for name in self.supported_hash_names: + h = hashlib.new(name) + self.assert_(hexstr(h.digest()) == h.hexdigest()) + + + def test_large_update(self): + aas = 'a' * 128 + bees = 'b' * 127 + cees = 'c' * 126 + + for name in self.supported_hash_names: + m1 = hashlib.new(name) + m1.update(aas) + m1.update(bees) + m1.update(cees) + + m2 = hashlib.new(name) + m2.update(aas + bees + cees) + self.assertEqual(m1.digest(), m2.digest()) + + + def check(self, name, data, digest): + # test the direct constructors + computed = getattr(hashlib, name)(data).hexdigest() + self.assert_(computed == digest) + # test the general new() interface + computed = hashlib.new(name, data).hexdigest() + self.assert_(computed == digest) + + + def test_case_md5_0(self): + self.check('md5', '', 'd41d8cd98f00b204e9800998ecf8427e') + + def test_case_md5_1(self): + self.check('md5', 'abc', '900150983cd24fb0d6963f7d28e17f72') + + def test_case_md5_2(self): + self.check('md5', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', + 'd174ab98d277d9f5a5611c2c9f419d9f') + + + # use the three examples from Federal Information Processing Standards + # Publication 180-1, Secure Hash Standard, 1995 April 17 + # http://www.itl.nist.gov/div897/pubs/fip180-1.htm + + def test_case_sha1_0(self): + self.check('sha1', "", + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + + def test_case_sha1_1(self): + self.check('sha1', "abc", + "a9993e364706816aba3e25717850c26c9cd0d89d") + + def test_case_sha1_2(self): + self.check('sha1', "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "84983e441c3bd26ebaae4aa1f95129e5e54670f1") + + def test_case_sha1_3(self): + self.check('sha1', "a" * 1000000, + "34aa973cd4c4daa4f61eeb2bdbad27316534016f") + + + # use the examples from Federal Information Processing Standards + # Publication 180-2, Secure Hash Standard, 2002 August 1 + # http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf + + def test_case_sha224_0(self): + self.check('sha224', "", + "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f") + + def test_case_sha224_1(self): + self.check('sha224', "abc", + "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7") + + def test_case_sha224_2(self): + self.check('sha224', + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525") + + def test_case_sha224_3(self): + self.check('sha224', "a" * 1000000, + "20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67") + + + def test_case_sha256_0(self): + self.check('sha256', "", + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") + + def test_case_sha256_1(self): + self.check('sha256', "abc", + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad") + + def test_case_sha256_2(self): + self.check('sha256', + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1") + + def test_case_sha256_3(self): + self.check('sha256', "a" * 1000000, + "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0") + + + def test_case_sha384_0(self): + self.check('sha384', "", + "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da"+ + "274edebfe76f65fbd51ad2f14898b95b") + + def test_case_sha384_1(self): + self.check('sha384', "abc", + "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed"+ + "8086072ba1e7cc2358baeca134c825a7") + + def test_case_sha384_2(self): + self.check('sha384', + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+ + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712"+ + "fcc7c71a557e2db966c3e9fa91746039") + + def test_case_sha384_3(self): + self.check('sha384', "a" * 1000000, + "9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b"+ + "07b8b3dc38ecc4ebae97ddd87f3d8985") + + + def test_case_sha512_0(self): + self.check('sha512', "", + "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce"+ + "47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e") + + def test_case_sha512_1(self): + self.check('sha512', "abc", + "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a"+ + "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f") + + def test_case_sha512_2(self): + self.check('sha512', + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"+ + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + "8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018"+ + "501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909") + + def test_case_sha512_3(self): + self.check('sha512', "a" * 1000000, + "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ + "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") + + +def test_main(): + test_support.run_unittest(HashLibTestCase) + + +if __name__ == "__main__": + test_main() diff --git a/Lib/test/test_hashlib_speed.py b/Lib/test/test_hashlib_speed.py new file mode 100644 index 0000000..a62d923 --- /dev/null +++ b/Lib/test/test_hashlib_speed.py @@ -0,0 +1,93 @@ + +import sys, time +import hashlib +from test import test_support + + +def creatorFunc(): + raise RuntimeError, "eek, creatorFunc not overridden" + + +def test_scaled_msg(scale, name): + + iterations = 106201/scale * 20 + longStr = 'Z'*scale + + localCF = creatorFunc + start = time.time() + for f in xrange(iterations): + x = localCF(longStr).digest() + end = time.time() + + print ('%2.2f' % (end-start)), "seconds", iterations, "x", len(longStr), "bytes", name + +def test_create(): + start = time.time() + for f in xrange(20000): + d = creatorFunc() + end = time.time() + + print ('%2.2f' % (end-start)), "seconds", '[20000 creations]' + +def test_zero(): + start = time.time() + for f in xrange(20000): + x = creatorFunc().digest() + end = time.time() + + print ('%2.2f' % (end-start)), "seconds", '[20000 "" digests]' + + + +### this 'test' is not normally run. skip it if the test runner finds it +if __name__ != '__main__': + raise test_support.TestSkipped, "not a unit test (stand alone benchmark)" + +hName = sys.argv[1] + +# +# setup our creatorFunc to test the requested hash +# +if hName in ('_md5', '_sha'): + exec 'import '+hName + exec 'creatorFunc = '+hName+'.new' + print "testing speed of old", hName, "legacy interface" +elif hName == '_hashlib' and len(sys.argv) > 3: + import _hashlib + exec 'creatorFunc = _hashlib.%s' % sys.argv[2] + print "testing speed of _hashlib.%s" % sys.argv[2], getattr(_hashlib, sys.argv[2]) +elif hName == '_hashlib' and len(sys.argv) == 3: + import _hashlib + exec 'creatorFunc = lambda x=_hashlib.new : x(%r)' % sys.argv[2] + print "testing speed of _hashlib.new(%r)" % sys.argv[2] +elif hasattr(hashlib, hName) and callable(getattr(hashlib, hName)): + creatorFunc = getattr(hashlib, hName) + print "testing speed of hashlib."+hName, getattr(hashlib, hName) +else: + exec "creatorFunc = lambda x=hashlib.new : x(%r)" % hName + print "testing speed of hashlib.new(%r)" % hName + +try: + test_create() +except ValueError: + print + print "pass argument(s) naming the hash to run a speed test on:" + print " '_md5' and '_sha' test the legacy builtin md5 and sha" + print " '_hashlib' 'openssl_hName' 'fast' tests the builtin _hashlib" + print " '_hashlib' 'hName' tests builtin _hashlib.new(shaFOO)" + print " 'hName' tests the hashlib.hName() implementation if it exists" + print " otherwise it uses hashlib.new(hName)." + print + raise + +test_zero() +test_scaled_msg(scale=106201, name='[huge data]') +test_scaled_msg(scale=10620, name='[large data]') +test_scaled_msg(scale=1062, name='[medium data]') +test_scaled_msg(scale=424, name='[4*small data]') +test_scaled_msg(scale=336, name='[3*small data]') +test_scaled_msg(scale=212, name='[2*small data]') +test_scaled_msg(scale=106, name='[small data]') +test_scaled_msg(scale=creatorFunc().digest_size, name='[digest_size data]') +test_scaled_msg(scale=10, name='[tiny data]') + diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index b365794..9d094d2 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -105,9 +105,10 @@ class SanityTestCase(unittest.TestCase): def test_default_is_md5(self): # Testing if HMAC defaults to MD5 algorithm. - import md5 + # NOTE: this whitebox test depends on the hmac class internals + import hashlib h = hmac.HMAC("key") - self.failUnless(h.digestmod == md5) + self.failUnless(h.digest_cons == hashlib.md5) def test_exercise_all_methods(self): # Exercising all methods once. @@ -127,8 +128,8 @@ class CopyTestCase(unittest.TestCase): # Testing if attributes are of same type. h1 = hmac.HMAC("key") h2 = h1.copy() - self.failUnless(h1.digestmod == h2.digestmod, - "Modules don't match.") + self.failUnless(h1.digest_cons == h2.digest_cons, + "digest constructors don't match.") self.failUnless(type(h1.inner) == type(h2.inner), "Types of inner don't match.") self.failUnless(type(h1.outer) == type(h2.outer), |