summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2006-09-12 15:25:35 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2006-09-12 15:25:35 (GMT)
commitd3a12e1058e9afcb77b265a88690f1d3d5190fd7 (patch)
tree407aafe3aa82cbebd081f14ac042f869eb25e553
parent314b3f597b0656820598ab89295af96fea24fd63 (diff)
downloadhdf5-d3a12e1058e9afcb77b265a88690f1d3d5190fd7.zip
hdf5-d3a12e1058e9afcb77b265a88690f1d3d5190fd7.tar.gz
hdf5-d3a12e1058e9afcb77b265a88690f1d3d5190fd7.tar.bz2
[svn-r12661] Description:
Add 'loookup3' checksum routine and switch to using it for metadata checksums - it's just as "strong" as the CRC32 and about 40% faster in general (with some compiler optimizations, it's nearly as fast as the fletcher-32 algorithm). Tested on: Linux/32 2.6 (chicago) Linux/64 2.6 (chicago2)
-rw-r--r--src/H5checksum.c192
-rw-r--r--src/H5private.h1
-rw-r--r--test/tchecksum.c30
3 files changed, 213 insertions, 10 deletions
diff --git a/src/H5checksum.c b/src/H5checksum.c
index fd2a075..98260ef 100644
--- a/src/H5checksum.c
+++ b/src/H5checksum.c
@@ -97,7 +97,8 @@ static hbool_t H5_crc_table_computed = FALSE;
* Note #3: The algorithm below differs from that given in the Wikipedia
* page by copying the data into 'sum1' in a more portable way
* and also by initializing 'sum1' and 'sum2' to 0 instead of
- * 0xffff (for backward compatibility reasons, mostly).
+ * 0xffff (for backward compatibility reasons with earlier
+ * HDF5 fletcher32 I/O filter routine, mostly).
*
* Return: 32-bit fletcher checksum of input buffer (can't fail)
*
@@ -250,6 +251,184 @@ H5_checksum_crc(const void *_data, size_t len)
FUNC_LEAVE_NOAPI(H5_checksum_crc_update((uint32_t)0xffffffffL, (const uint8_t *)_data, len) ^ 0xffffffffL)
} /* end H5_checksum_crc() */
+/*
+-------------------------------------------------------------------------------
+H5_lookup3_mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define H5_lookup3_rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
+#define H5_lookup3_mix(a,b,c) \
+{ \
+ a -= c; a ^= H5_lookup3_rot(c, 4); c += b; \
+ b -= a; b ^= H5_lookup3_rot(a, 6); a += c; \
+ c -= b; c ^= H5_lookup3_rot(b, 8); b += a; \
+ a -= c; a ^= H5_lookup3_rot(c,16); c += b; \
+ b -= a; b ^= H5_lookup3_rot(a,19); a += c; \
+ c -= b; c ^= H5_lookup3_rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+H5_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define H5_lookup3_final(a,b,c) \
+{ \
+ c ^= b; c -= H5_lookup3_rot(b,14); \
+ a ^= c; a -= H5_lookup3_rot(c,11); \
+ b ^= a; b -= H5_lookup3_rot(a,25); \
+ c ^= b; c -= H5_lookup3_rot(b,16); \
+ a ^= c; a -= H5_lookup3_rot(c,4); \
+ b ^= a; b -= H5_lookup3_rot(a,14); \
+ c ^= b; c -= H5_lookup3_rot(b,24); \
+}
+
+/*
+-------------------------------------------------------------------------------
+H5_checksum_lookup3() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ length : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t
+H5_checksum_lookup3(const void *key, size_t length /*, uint32_t initval */)
+{
+ const uint8_t *k = (const uint8_t *)key;
+ uint32_t a, b, c; /* internal state */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5_checksum_lookup3)
+
+ /* Sanity check */
+ HDassert(key);
+ HDassert(length > 0);
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) /* + initval */;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ H5_lookup3_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : goto done;
+ }
+
+ H5_lookup3_final(a, b, c);
+
+done:
+ FUNC_LEAVE_NOAPI(c)
+} /* end H5_checksum_lookup3() */
+
/*-------------------------------------------------------------------------
* Function: H5_checksum_metadata
@@ -268,8 +447,6 @@ H5_checksum_crc(const void *_data, size_t len)
uint32_t
H5_checksum_metadata(const void *data, size_t len)
{
- uint32_t chksum; /* Checksum value to return */
-
FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5_checksum_metadata)
/* Sanity check */
@@ -277,12 +454,7 @@ H5_checksum_metadata(const void *data, size_t len)
HDassert(len > 0);
/* Choose the appropriate checksum routine */
- /* (use Fletcher's checksum for "larger" buffers and CRC for "shorter" ones) */
- if(len < 256)
- chksum = H5_checksum_crc(data, len);
- else
- chksum = H5_checksum_fletcher32(data, len);
-
- FUNC_LEAVE_NOAPI(chksum)
+ /* (use Bob Jenkin's "lookup3" algorithm for all buffer sizes) */
+ FUNC_LEAVE_NOAPI(H5_checksum_lookup3(data, len))
} /* end H5_checksum_metadata() */
diff --git a/src/H5private.h b/src/H5private.h
index e9beabd..254c2eb 100644
--- a/src/H5private.h
+++ b/src/H5private.h
@@ -1449,6 +1449,7 @@ H5_DLL int H5Z_term_interface(void);
/* Checksum functions */
H5_DLL uint32_t H5_checksum_fletcher32(const void *data, size_t len);
H5_DLL uint32_t H5_checksum_crc(const void *data, size_t len);
+H5_DLL uint32_t H5_checksum_lookup3(const void *data, size_t len);
H5_DLL uint32_t H5_checksum_metadata(const void *data, size_t len);
/* Functions for debugging */
diff --git a/test/tchecksum.c b/test/tchecksum.c
index ee492f8..4c860b7 100644
--- a/test/tchecksum.c
+++ b/test/tchecksum.c
@@ -57,6 +57,9 @@ test_chksum_size_one(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xfa2568b7, "H5_checksum_crc");
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0xa209c931, "H5_checksum_lookup3");
+
/* Buffer w/zero(s) for data */
HDmemset(buf, 0, sizeof(buf));
chksum = H5_checksum_fletcher32(buf, sizeof(buf));
@@ -64,6 +67,9 @@ test_chksum_size_one(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xfa60fb57, "H5_checksum_crc");
+
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x8ba9414b, "H5_checksum_lookup3");
} /* test_chksum_size_one() */
@@ -85,6 +91,9 @@ test_chksum_size_two(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xfc856608, "H5_checksum_crc");
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x8ba7a6c9, "H5_checksum_lookup3");
+
/* Buffer w/zero(s) for data */
HDmemset(buf, 0, sizeof(buf));
chksum = H5_checksum_fletcher32(buf, sizeof(buf));
@@ -92,6 +101,9 @@ test_chksum_size_two(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xfc7e9b20, "H5_checksum_crc");
+
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x62cd61b3, "H5_checksum_lookup3");
} /* test_chksum_size_two() */
@@ -113,6 +125,9 @@ test_chksum_size_three(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xfebc5d70, "H5_checksum_crc");
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0xcebdf4f0, "H5_checksum_lookup3");
+
/* Buffer w/zero(s) for data */
HDmemset(buf, 0, sizeof(buf));
chksum = H5_checksum_fletcher32(buf, sizeof(buf));
@@ -120,6 +135,9 @@ test_chksum_size_three(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xf9cc4c7a, "H5_checksum_crc");
+
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x6bd0060f, "H5_checksum_lookup3");
} /* test_chksum_size_three() */
@@ -141,6 +159,9 @@ test_chksum_size_four(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xff398a46, "H5_checksum_crc");
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x2c88bb51, "H5_checksum_lookup3");
+
/* Buffer w/zero(s) for data */
HDmemset(buf, 0, sizeof(buf));
chksum = H5_checksum_fletcher32(buf, sizeof(buf));
@@ -148,6 +169,9 @@ test_chksum_size_four(void)
chksum = H5_checksum_crc(buf, sizeof(buf));
VERIFY(chksum, 0xff117081, "H5_checksum_crc");
+
+ chksum = H5_checksum_lookup3(buf, sizeof(buf));
+ VERIFY(chksum, 0x049396b8, "H5_checksum_lookup3");
} /* test_chksum_size_four() */
@@ -173,6 +197,9 @@ test_chksum_large(void)
chksum = H5_checksum_crc(large_buf, sizeof(large_buf));
VERIFY(chksum, 0xfbd0f7c0, "H5_checksum_crc");
+ chksum = H5_checksum_lookup3(large_buf, sizeof(large_buf));
+ VERIFY(chksum, 0x1bd2ee7b, "H5_checksum_lookup3");
+
/* Buffer w/zero(s) for data */
HDmemset(large_buf, 0, sizeof(large_buf));
chksum = H5_checksum_fletcher32(large_buf, sizeof(large_buf));
@@ -180,6 +207,9 @@ test_chksum_large(void)
chksum = H5_checksum_crc(large_buf, sizeof(large_buf));
VERIFY(chksum, 0xfac8b4c4, "H5_checksum_crc");
+
+ chksum = H5_checksum_lookup3(large_buf, sizeof(large_buf));
+ VERIFY(chksum, 0x930c7afc, "H5_checksum_lookup3");
} /* test_chksum_large() */