summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Mainzer <mainzer@hdfgroup.org>2009-02-06 18:20:13 (GMT)
committerJohn Mainzer <mainzer@hdfgroup.org>2009-02-06 18:20:13 (GMT)
commit2933d2f80b01c5e46c0a8161b3bd82249ab0b289 (patch)
tree30d6d611afc55328787e08efd7a4a17fbe0c85f7
parent7418c063568d22fab2d34bd07664da60f44cdf98 (diff)
downloadhdf5-2933d2f80b01c5e46c0a8161b3bd82249ab0b289.zip
hdf5-2933d2f80b01c5e46c0a8161b3bd82249ab0b289.tar.gz
hdf5-2933d2f80b01c5e46c0a8161b3bd82249ab0b289.tar.bz2
[svn-r16451] Repaired intermittant failure of the t_cache test in testpar.
The failure was caused by some over active sanity checking code in unlock_entry(). In essence the code did not consider the possibility that under certain, very unusual circumstances, an entry could be flushed to disk during the H5AC_unprotect() call. Instead, it simply failed if a dirty entry was marked clean after the call to H5AC_unprotect(). This bug in the test code was exposed by recent changes to the default cache configuration made as part of the "metadata blizard" bug fix. Fixed the bug by adding code to detect when an entry is flushed during the call to H5AC_unprotect(), and not trigger a failure if a dirty entry is marked clean after a call to H5AC_unprotect() if the entry has been flushed. In passing also found and fixed another test bug in which expunged entries were erroneously marked as dirty in the test code's independant register of entry status. Tested parallel on Phoenix (AMD64 Linux) and Jam. Also ran t_cache manually hundreds of times looking for intermittant failures. Larry kindly tested (parallel) on Mercury.
-rw-r--r--testpar/t_cache.c154
1 files changed, 147 insertions, 7 deletions
diff --git a/testpar/t_cache.c b/testpar/t_cache.c
index c858e9b..0fdd78a 100644
--- a/testpar/t_cache.c
+++ b/testpar/t_cache.c
@@ -129,6 +129,12 @@ long local_pins = 0;
* processes, and thus cannot be marked as dirty unless they
* happen to overlap some collective operation.
*
+ * cleared: Boolean flag that is set to true whenever the entry is
+ * dirty, and is cleared via a call to clear_datum().
+ *
+ * flushed: Boolean flag that is set to true whenever the entry is
+ * dirty, and is flushed via a call to flush_datum().
+ *
* index: Index of this instance of datum in the data_index[] array
* discussed below.
*
@@ -146,6 +152,8 @@ struct datum
hbool_t locked;
hbool_t global_pinned;
hbool_t local_pinned;
+ hbool_t cleared;
+ hbool_t flushed;
int index;
};
@@ -691,6 +699,9 @@ addr_to_datum_index(haddr_t base_addr)
* JRM -- 7/11/06
* Added support for the local_len field.
*
+ * JRM -- 2/4/09
+ * Added initialization for the cleared and flushed fields.
+ *
*****************************************************************************/
void
@@ -728,6 +739,8 @@ init_data(void)
data[i].locked = FALSE;
data[i].global_pinned = FALSE;
data[i].local_pinned = FALSE;
+ data[i].cleared = FALSE;
+ data[i].flushed = FALSE;
data[i].index = i;
data_index[i] = i;
@@ -1662,13 +1675,17 @@ serve_write_request(struct mssg_t * mssg_ptr)
* length of the entry, while retaining the original
* value for communications with the server.
*
+ * JRM -- 2/4/09
+ * Added code to set the cleared flag when a dirty entry is
+ * cleared.
+ *
*-------------------------------------------------------------------------
*/
herr_t
clear_datum(H5F_t * f,
void * thing,
- hbool_t dest)
+ hbool_t dest)
{
int idx;
struct datum * entry_ptr;
@@ -1688,6 +1705,13 @@ clear_datum(H5F_t * f,
HDassert( ( entry_ptr->header.size == entry_ptr->len ) ||
( entry_ptr->header.size == entry_ptr->local_len ) );
+ HDassert( entry_ptr->header.is_dirty == entry_ptr->dirty );
+
+ if ( entry_ptr->header.is_dirty ) {
+
+ entry_ptr->cleared = TRUE;
+ }
+
entry_ptr->header.is_dirty = FALSE;
entry_ptr->dirty = FALSE;
@@ -1791,6 +1815,10 @@ destroy_datum(H5F_t UNUSED * f,
* length of the entry, while retaining the original
* value for communications with the server.
*
+ * JRM -- 2/4/09
+ * Added code to set the flushed flag when a dirty entry
+ * is flushed.
+ *
*-------------------------------------------------------------------------
*/
@@ -1859,6 +1887,7 @@ flush_datum(H5F_t *f,
{
entry_ptr->header.is_dirty = FALSE;
entry_ptr->dirty = FALSE;
+ entry_ptr->flushed = TRUE;
}
}
}
@@ -2175,8 +2204,6 @@ expunge_entry(H5C_t * cache_ptr,
HDassert( !(entry_ptr->global_pinned) );
HDassert( !(entry_ptr->local_pinned) );
- entry_ptr->dirty = TRUE;
-
if ( nerrors == 0 ) {
result = H5AC_expunge_entry(file_ptr, -1, &(types[0]),
@@ -3492,7 +3519,7 @@ setup_rand(void)
const char * fcn_name = "setup_rand()";
hbool_t use_predefined_seeds = FALSE;
int num_predefined_seeds = 3;
- unsigned predefined_seeds[3] = {18669, 89925, 12577};
+ unsigned predefined_seeds[3] = {33402, 33505, 33422};
unsigned seed;
struct timeval tv;
struct timezone tz;
@@ -3663,9 +3690,14 @@ unlock_entry(H5C_t * cache_ptr,
HDassert( ((entry_ptr->header).type)->id == DATUM_ENTRY_TYPE );
- if ( ( flags & H5AC__DIRTIED_FLAG ) != 0
- && ( (flags & H5C__DELETED_FLAG) == 0 ) ) {
-
+ if ( ( (flags & H5AC__DIRTIED_FLAG) != 0 ) &&
+ ( (flags & H5C__DELETED_FLAG) == 0 ) &&
+ ( ! ( ( ( world_mpi_rank == 0 ) && ( entry_ptr->flushed ) )
+ ||
+ ( ( world_mpi_rank != 0 ) && ( entry_ptr->cleared ) )
+ )
+ )
+ ) {
HDassert( entry_ptr->header.is_dirty );
HDassert( entry_ptr->dirty );
}
@@ -4412,6 +4444,8 @@ smoke_check_3(void)
{
const char * fcn_name = "smoke_check_3()";
hbool_t success = TRUE;
+ hbool_t verbose = FALSE;
+ int cp = 0;
int i;
int max_nerrors;
int min_count;
@@ -4428,12 +4462,18 @@ smoke_check_3(void)
TESTING("smoke check #3");
}
+ /* 0 */
+ if ( verbose ) { HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++); }
+
nerrors = 0;
init_data();
reset_stats();
if ( world_mpi_rank == world_server_mpi_rank ) {
+ /* 1 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
if ( ! server_main() ) {
/* some error occured in the server -- report failure */
@@ -4443,9 +4483,15 @@ smoke_check_3(void)
world_mpi_rank, fcn_name);
}
}
+
+ /* 2 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
}
else /* run the clients */
{
+ /* 1 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
nerrors++;
@@ -4457,6 +4503,9 @@ smoke_check_3(void)
}
}
+ /* 2 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
min_count = 100 / ((file_mpi_rank + 1) * (file_mpi_rank + 1));
max_count = min_count + 50;
@@ -4472,6 +4521,9 @@ smoke_check_3(void)
}
}
+ /* 3 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
min_count = 100 / ((file_mpi_rank + 2) * (file_mpi_rank + 2));
max_count = min_count + 50;
@@ -4512,6 +4564,9 @@ smoke_check_3(void)
}
+ /* 4 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* flush the file to be sure that we have no problems flushing
* pinned entries
@@ -4524,6 +4579,9 @@ smoke_check_3(void)
}
}
+ /* 5 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
min_idx = 0;
max_idx = ((virt_num_data_entries / 10) /
@@ -4558,6 +4616,9 @@ smoke_check_3(void)
}
}
+ /* 6 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
min_idx = 0;
max_idx = ((virt_num_data_entries / 10) /
((file_mpi_rank + 3) * (file_mpi_rank + 3))) - 1;
@@ -4574,6 +4635,9 @@ smoke_check_3(void)
min_idx, max_idx, 0, 100);
}
+ /* 7 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* we can't rename pinned entries, so release any local pins now. */
local_unpin_all_entries(cache_ptr, file_ptr, FALSE);
@@ -4592,6 +4656,9 @@ smoke_check_3(void)
min_count, max_count);
}
+ /* 8 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* ...and then rename them back. */
for ( i = (virt_num_data_entries / 2) - 1; i >= 0; i-- )
{
@@ -4604,6 +4671,9 @@ smoke_check_3(void)
min_count, max_count);
}
+ /* 9 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* finally, do some dirty lock/unlocks while we give the cache
* a chance t reduce its size.
*/
@@ -4627,6 +4697,9 @@ smoke_check_3(void)
}
}
+ /* 10 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* release any local pins before we take down the cache. */
local_unpin_all_entries(cache_ptr, file_ptr, FALSE);
@@ -4642,6 +4715,9 @@ smoke_check_3(void)
}
}
+ /* 11 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
+
/* verify that all instances of datum are back where the started
* and are clean.
*/
@@ -4676,6 +4752,9 @@ smoke_check_3(void)
}
}
}
+
+ /* 12 */
+ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
}
max_nerrors = get_max_nerrors();
@@ -5043,6 +5122,8 @@ smoke_check_5(void)
{
const char * fcn_name = "smoke_check_5()";
hbool_t success = TRUE;
+ hbool_t verbose = FALSE;
+ int cp = 0;
int i;
int max_nerrors;
hid_t fid = -1;
@@ -5055,12 +5136,20 @@ smoke_check_5(void)
TESTING("smoke check #5");
}
+ /* 0 */
+ if ( verbose ) { HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++); }
+
nerrors = 0;
init_data();
reset_stats();
if ( world_mpi_rank == world_server_mpi_rank ) {
+ /* 1 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
if ( ! server_main() ) {
/* some error occured in the server -- report failure */
@@ -5070,9 +5159,20 @@ smoke_check_5(void)
world_mpi_rank, fcn_name);
}
}
+
+ /* 2 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
}
else /* run the clients */
{
+
+ /* 1 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
nerrors++;
@@ -5084,11 +5184,21 @@ smoke_check_5(void)
}
}
+ /* 2 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
for ( i = 0; i < (virt_num_data_entries / 2); i++ )
{
insert_entry(cache_ptr, file_ptr, i, H5AC__NO_FLAGS_SET);
}
+ /* 3 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
/* flush the file so we can lock known clean entries. */
if ( H5Fflush(fid, H5F_SCOPE_GLOBAL) < 0 ) {
nerrors++;
@@ -5098,6 +5208,11 @@ smoke_check_5(void)
}
}
+ /* 4 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
for ( i = 0; i < (virt_num_data_entries / 4); i++ )
{
lock_entry(cache_ptr, file_ptr, i);
@@ -5121,6 +5236,11 @@ smoke_check_5(void)
}
}
+ /* 5 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
for ( i = (virt_num_data_entries / 2) - 1;
i >= (virt_num_data_entries / 4);
i-- )
@@ -5154,6 +5274,11 @@ smoke_check_5(void)
unpin_entry(cache_ptr, file_ptr, i, TRUE, FALSE, FALSE);
}
+ /* 6 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
if ( fid >= 0 ) {
if ( ! take_down_cache(fid) ) {
@@ -5166,6 +5291,11 @@ smoke_check_5(void)
}
}
+ /* 7 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
/* verify that all instance of datum are back where the started
* and are clean.
*/
@@ -5176,6 +5306,11 @@ smoke_check_5(void)
HDassert( ! (data[i].dirty) );
}
+ /* 8 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
+
/* compose the done message */
mssg.req = DONE_REQ_CODE;
mssg.src = world_mpi_rank;
@@ -5199,6 +5334,11 @@ smoke_check_5(void)
}
}
}
+
+ /* 9 */
+ if ( verbose ) {
+ HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
+ }
}
max_nerrors = get_max_nerrors();