From d3a12e1058e9afcb77b265a88690f1d3d5190fd7 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Tue, 12 Sep 2006 10:25:35 -0500 Subject: [svn-r12661] Description: Add 'loookup3' checksum routine and switch to using it for metadata checksums - it's just as "strong" as the CRC32 and about 40% faster in general (with some compiler optimizations, it's nearly as fast as the fletcher-32 algorithm). Tested on: Linux/32 2.6 (chicago) Linux/64 2.6 (chicago2) --- src/H5checksum.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/H5private.h | 1 + test/tchecksum.c | 30 +++++++++ 3 files changed, 213 insertions(+), 10 deletions(-) diff --git a/src/H5checksum.c b/src/H5checksum.c index fd2a075..98260ef 100644 --- a/src/H5checksum.c +++ b/src/H5checksum.c @@ -97,7 +97,8 @@ static hbool_t H5_crc_table_computed = FALSE; * Note #3: The algorithm below differs from that given in the Wikipedia * page by copying the data into 'sum1' in a more portable way * and also by initializing 'sum1' and 'sum2' to 0 instead of - * 0xffff (for backward compatibility reasons, mostly). + * 0xffff (for backward compatibility reasons with earlier + * HDF5 fletcher32 I/O filter routine, mostly). * * Return: 32-bit fletcher checksum of input buffer (can't fail) * @@ -250,6 +251,184 @@ H5_checksum_crc(const void *_data, size_t len) FUNC_LEAVE_NOAPI(H5_checksum_crc_update((uint32_t)0xffffffffL, (const uint8_t *)_data, len) ^ 0xffffffffL) } /* end H5_checksum_crc() */ +/* +------------------------------------------------------------------------------- +H5_lookup3_mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define H5_lookup3_rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) +#define H5_lookup3_mix(a,b,c) \ +{ \ + a -= c; a ^= H5_lookup3_rot(c, 4); c += b; \ + b -= a; b ^= H5_lookup3_rot(a, 6); a += c; \ + c -= b; c ^= H5_lookup3_rot(b, 8); b += a; \ + a -= c; a ^= H5_lookup3_rot(c,16); c += b; \ + b -= a; b ^= H5_lookup3_rot(a,19); a += c; \ + c -= b; c ^= H5_lookup3_rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +H5_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define H5_lookup3_final(a,b,c) \ +{ \ + c ^= b; c -= H5_lookup3_rot(b,14); \ + a ^= c; a -= H5_lookup3_rot(c,11); \ + b ^= a; b -= H5_lookup3_rot(a,25); \ + c ^= b; c -= H5_lookup3_rot(b,16); \ + a ^= c; a -= H5_lookup3_rot(c,4); \ + b ^= a; b -= H5_lookup3_rot(a,14); \ + c ^= b; c -= H5_lookup3_rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +H5_checksum_lookup3() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 0); + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) /* + initval */; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + H5_lookup3_mix(a, b, c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : goto done; + } + + H5_lookup3_final(a, b, c); + +done: + FUNC_LEAVE_NOAPI(c) +} /* end H5_checksum_lookup3() */ + /*------------------------------------------------------------------------- * Function: H5_checksum_metadata @@ -268,8 +447,6 @@ H5_checksum_crc(const void *_data, size_t len) uint32_t H5_checksum_metadata(const void *data, size_t len) { - uint32_t chksum; /* Checksum value to return */ - FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5_checksum_metadata) /* Sanity check */ @@ -277,12 +454,7 @@ H5_checksum_metadata(const void *data, size_t len) HDassert(len > 0); /* Choose the appropriate checksum routine */ - /* (use Fletcher's checksum for "larger" buffers and CRC for "shorter" ones) */ - if(len < 256) - chksum = H5_checksum_crc(data, len); - else - chksum = H5_checksum_fletcher32(data, len); - - FUNC_LEAVE_NOAPI(chksum) + /* (use Bob Jenkin's "lookup3" algorithm for all buffer sizes) */ + FUNC_LEAVE_NOAPI(H5_checksum_lookup3(data, len)) } /* end H5_checksum_metadata() */ diff --git a/src/H5private.h b/src/H5private.h index e9beabd..254c2eb 100644 --- a/src/H5private.h +++ b/src/H5private.h @@ -1449,6 +1449,7 @@ H5_DLL int H5Z_term_interface(void); /* Checksum functions */ H5_DLL uint32_t H5_checksum_fletcher32(const void *data, size_t len); H5_DLL uint32_t H5_checksum_crc(const void *data, size_t len); +H5_DLL uint32_t H5_checksum_lookup3(const void *data, size_t len); H5_DLL uint32_t H5_checksum_metadata(const void *data, size_t len); /* Functions for debugging */ diff --git a/test/tchecksum.c b/test/tchecksum.c index ee492f8..4c860b7 100644 --- a/test/tchecksum.c +++ b/test/tchecksum.c @@ -57,6 +57,9 @@ test_chksum_size_one(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xfa2568b7, "H5_checksum_crc"); + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0xa209c931, "H5_checksum_lookup3"); + /* Buffer w/zero(s) for data */ HDmemset(buf, 0, sizeof(buf)); chksum = H5_checksum_fletcher32(buf, sizeof(buf)); @@ -64,6 +67,9 @@ test_chksum_size_one(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xfa60fb57, "H5_checksum_crc"); + + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x8ba9414b, "H5_checksum_lookup3"); } /* test_chksum_size_one() */ @@ -85,6 +91,9 @@ test_chksum_size_two(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xfc856608, "H5_checksum_crc"); + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x8ba7a6c9, "H5_checksum_lookup3"); + /* Buffer w/zero(s) for data */ HDmemset(buf, 0, sizeof(buf)); chksum = H5_checksum_fletcher32(buf, sizeof(buf)); @@ -92,6 +101,9 @@ test_chksum_size_two(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xfc7e9b20, "H5_checksum_crc"); + + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x62cd61b3, "H5_checksum_lookup3"); } /* test_chksum_size_two() */ @@ -113,6 +125,9 @@ test_chksum_size_three(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xfebc5d70, "H5_checksum_crc"); + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0xcebdf4f0, "H5_checksum_lookup3"); + /* Buffer w/zero(s) for data */ HDmemset(buf, 0, sizeof(buf)); chksum = H5_checksum_fletcher32(buf, sizeof(buf)); @@ -120,6 +135,9 @@ test_chksum_size_three(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xf9cc4c7a, "H5_checksum_crc"); + + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x6bd0060f, "H5_checksum_lookup3"); } /* test_chksum_size_three() */ @@ -141,6 +159,9 @@ test_chksum_size_four(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xff398a46, "H5_checksum_crc"); + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x2c88bb51, "H5_checksum_lookup3"); + /* Buffer w/zero(s) for data */ HDmemset(buf, 0, sizeof(buf)); chksum = H5_checksum_fletcher32(buf, sizeof(buf)); @@ -148,6 +169,9 @@ test_chksum_size_four(void) chksum = H5_checksum_crc(buf, sizeof(buf)); VERIFY(chksum, 0xff117081, "H5_checksum_crc"); + + chksum = H5_checksum_lookup3(buf, sizeof(buf)); + VERIFY(chksum, 0x049396b8, "H5_checksum_lookup3"); } /* test_chksum_size_four() */ @@ -173,6 +197,9 @@ test_chksum_large(void) chksum = H5_checksum_crc(large_buf, sizeof(large_buf)); VERIFY(chksum, 0xfbd0f7c0, "H5_checksum_crc"); + chksum = H5_checksum_lookup3(large_buf, sizeof(large_buf)); + VERIFY(chksum, 0x1bd2ee7b, "H5_checksum_lookup3"); + /* Buffer w/zero(s) for data */ HDmemset(large_buf, 0, sizeof(large_buf)); chksum = H5_checksum_fletcher32(large_buf, sizeof(large_buf)); @@ -180,6 +207,9 @@ test_chksum_large(void) chksum = H5_checksum_crc(large_buf, sizeof(large_buf)); VERIFY(chksum, 0xfac8b4c4, "H5_checksum_crc"); + + chksum = H5_checksum_lookup3(large_buf, sizeof(large_buf)); + VERIFY(chksum, 0x930c7afc, "H5_checksum_lookup3"); } /* test_chksum_large() */ -- cgit v0.12