diff options
-rw-r--r-- | src/H5AC.c | 12 | ||||
-rw-r--r-- | src/H5F.c | 30 | ||||
-rw-r--r-- | src/H5FD.c | 55 | ||||
-rw-r--r-- | src/H5FPclient.c | 173 | ||||
-rw-r--r-- | src/H5FPprivate.h | 6 | ||||
-rw-r--r-- | src/H5FPserver.c | 598 |
6 files changed, 814 insertions, 60 deletions
@@ -1283,17 +1283,27 @@ H5AC_protect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, if (H5FP_request_lock(H5FD_fphdf5_file_id(lf), addr, rw == H5AC_WRITE ? H5FP_LOCK_WRITE : H5FP_LOCK_READ, TRUE, &req_id, &status) < 0) { +#if 0 + HDfprintf(stdout, "H5AC_protect: Lock failed.\n"); /* * FIXME: Check the status variable. If the lock is got * by some other process, we can loop and wait or bail * out of this function */ -HDfprintf(stderr, "Couldn't get lock for metadata at address %a\n", addr); + HDfprintf(stderr, "Couldn't get lock for metadata at address %a\n", addr); +#endif /* 0 */ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTLOCK, NULL, "can't lock data on SAP!") } /* Load a thing from the SAP. */ if (NULL == (thing = type->load(f, dxpl_id, addr, udata1, udata2))) { +#if 0 + HDfprintf(stdout, + "%s: Load failed. addr = %a, type->id = %d.\n", + "H5AC_protect", + addr, + (int)(type->id)); +#endif /* 0 */ HCOMMON_ERROR(H5E_CACHE, H5E_CANTLOAD, "unable to load object") if (H5FP_request_release_lock(H5FD_fphdf5_file_id(lf), addr, @@ -1957,16 +1957,32 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d if (H5F_read_superblock(file, dxpl_id, &root_ent, super_info.addr, buf, (size_t)super_info.size) < 0) HGOTO_ERROR(H5E_FILE, H5E_READERROR, NULL, "unable to read superblock") + } + + /* The following barrier ensures that all set eoa operations + * associated with creating the superblock are complete before + * we attempt any allocations. + * JRM - 4/13/04 + */ + if ( (mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS ) + { + HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc) + } + if (!H5FD_fphdf5_is_captain(lf)) { if (H5G_mkroot(file, dxpl_id, &root_ent) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "unable to create/open root group") - } /* end if */ + } /* All clients free the buffer used for broadcasting the superblock */ buf = H5MM_xfree (buf); } /* end if */ #endif /* H5_HAVE_FPHDF5 */ } else if (1 == shared->nrefs) { +#ifdef H5_HAVE_FPHDF5 + int mrc; /*MPI return code */ +#endif /* H5_HAVE_FPHDF5 */ + /* Read the superblock if it hasn't been read before. */ if (HADDR_UNDEF == (shared->super_addr = H5F_locate_signature(lf,dxpl_id))) HGOTO_ERROR(H5E_FILE, H5E_NOTHDF5, NULL, "unable to find file signature") @@ -1974,6 +1990,18 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d if (H5F_read_superblock(file, dxpl_id, &root_ent, shared->super_addr, NULL, 0) < 0) HGOTO_ERROR(H5E_FILE, H5E_READERROR, NULL, "unable to read superblock") +#ifdef H5_HAVE_FPHDF5 + if (H5FD_is_fphdf5_driver(lf)) { + /* reading the superblock generates lots of set_eoa calls. To avoid + * race conditions with allocations, make sure that everyone is done + * reading the superblock before we proceed. + */ + if ( (mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS ) { + HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc) + } + } +#endif /* H5_HAVE_FPHDF5 */ + /* Make sure we can open the root group */ if (H5G_mkroot(file, dxpl_id, &root_ent) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to read root group") @@ -1518,6 +1518,10 @@ done: * Bill Wendling, 2003/02/19 * Added support for FPHDF5. * + * John Mainzer, 2004/04/13 + * Moved much of the FPHDF5 specific code into H5FP_client_alloc(), + * and re-worked it to get rid of a race condition on the eoa. + * *------------------------------------------------------------------------- */ haddr_t @@ -1540,51 +1544,16 @@ H5FD_alloc(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size) * is the SAP executing this code, then skip the send to the SAP and * try to do the actual allocations. */ - if (H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file)) { - unsigned req_id = 0; - unsigned capt_only = 0; - H5FP_status_t status = H5FP_STATUS_OK; - H5P_genplist_t *plist; - H5FP_alloc_t fp_alloc; - - /* Get the data xfer property list */ - if ((plist = H5I_object(dxpl_id)) == NULL) - HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, HADDR_UNDEF, "not a dataset transfer list") - - if (H5P_exist_plist(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY) > 0) - if (H5P_get(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY, &capt_only) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, HADDR_UNDEF, "can't retrieve FPHDF5 property") + if ( H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file) ) { + haddr_t addr; - HDmemset(&fp_alloc, 0, sizeof(fp_alloc)); - - /* - * If the captain is the only one who should allocate resources, - * then do just that... - */ - if (!capt_only || H5FD_fphdf5_is_captain(file)) { - /* Send the request to the SAP */ - if (H5FP_request_allocate(file, type, size, &fp_alloc.addr, - &fp_alloc.eoa, &req_id, &status) != SUCCEED) - /* FIXME: Should we check the "status" variable here? */ - HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF, - "server couldn't allocate from file") - } - - if (capt_only) { - int mrc; - - if ((mrc = MPI_Bcast(&fp_alloc, 1, H5FP_alloc, - (int)H5FP_capt_barrier_rank, - H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) - HMPI_GOTO_ERROR(HADDR_UNDEF, "MPI_Bcast failed", mrc); + if ( (addr = H5FP_client_alloc(file, type, dxpl_id, size)) + == HADDR_UNDEF) { + HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF, + "allocation failed.") + } else { + HGOTO_DONE(addr) } - - - /* Set the EOA for all processes. This doesn't fail. */ - file->cls->set_eoa(file, fp_alloc.eoa); - - /* We've succeeded -- return the value */ - HGOTO_DONE(fp_alloc.addr) } #endif /* H5_HAVE_FPHDF5 */ diff --git a/src/H5FPclient.c b/src/H5FPclient.c index 434fa06..8e52056 100644 --- a/src/H5FPclient.c +++ b/src/H5FPclient.c @@ -45,6 +45,7 @@ static int interface_initialize_g = 0; static unsigned H5FP_gen_request_id(void); static herr_t H5FP_dump_to_file(H5FD_t *file, hid_t dxpl_id); + /* *===----------------------------------------------------------------------=== * Public Library (non-API) Functions @@ -332,7 +333,27 @@ H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id, HDmemset(*buf, '\0', size); HDmemset(&mpi_status, 0, sizeof(mpi_status)); - if (size < sap_read.md_size) { + /* the following code is a bit odd and doubtless needs a bit + * of explanation. I certainly stumbled over it the first + * time I read it. + * + * For reasons unknown, read requests sent to the SAP only + * include a base address, not a length. Thus the SAP sends + * along the largest contiguous chunk it has starting at the + * specified address. + * + * If the chunk is bigger than we want, we just copy over what + * we want, and discard the rest. + * + * If it is just the right size, we receive it in the provided + * buffer. + * + * if it is too small to fulfil our request, we scream and die. + * + * JRM - 4/13/04 + */ + if (size < sap_read.md_size) + { char *mdata; if (H5FP_read_metadata(&mdata, (int)sap_read.md_size, (int)H5FP_sap_rank) == FAIL) { @@ -347,8 +368,11 @@ HDfprintf(stderr, "Metadata Read Failed!!!!\n"); H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc); } else { -HDfprintf(stderr, "Buffer not big enough to hold metadata!!!!\n"); -assert(0); + HDfprintf(stdout, + "H5FP_request_read_metadata: size = %d > md_size = %d.\n", + (int)size, (int)(sap_read.md_size)); + HDfprintf(stdout, "Mssg received from SAP is too small!!!!\n"); + assert(0); } break; @@ -628,6 +652,110 @@ done: FUNC_LEAVE_NOAPI(ret_value); } + +/* + * Function: H5FP_client_alloc + * Purpose: Handle the client side of an allocation in the FP case. + * In essence, this is simply a matter of referring the + * request to the SAP, and then returning the reply. + * + * A modified version of this code used to live in H5FD_alloc(), + * but I move it here to encapsulate it and generally tidy up. + * + * One can argue that we should all be done in an alloc + * routine in H5FDfdhdf5.c, but this invlves a smaller + * change to the code, and thus a smaller loss if I missed + * a major gotcha. If things go well, and we don't heave + * the current implementation of FP, I'll probably go that + * route eventually. + * Return: Success: The format address of the new file memory. + * Failure: The undefined address HADDR_UNDEF + * Programmer: JRM - 4/7/04 + * Modifications: + */ +haddr_t +H5FP_client_alloc(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size) +{ + haddr_t ret_value = HADDR_UNDEF; + unsigned req_id = 0; + unsigned capt_only = 0; + H5FP_status_t status = H5FP_STATUS_OK; + H5P_genplist_t *plist; + H5FP_alloc_t fp_alloc; + + FUNC_ENTER_NOAPI(H5FP_client_alloc, HADDR_UNDEF) + + /* check args */ + HDassert(file); + HDassert(file->cls); + HDassert(type >= 0 && type < H5FD_MEM_NTYPES); + HDassert(size > 0); + + /* verify that we are running FP and we are not the SAP. */ + HDassert(H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file)); + + /* Get the data xfer property list */ + if ( (plist = H5I_object(dxpl_id)) == NULL ) { + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, HADDR_UNDEF, "not a dataset transfer list") + } + + if ( H5P_exist_plist(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY) > 0 ) { + if ( H5P_get(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY, &capt_only) < 0 ) { + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, HADDR_UNDEF, "can't retrieve FPHDF5 property") + } + } + + HDmemset(&fp_alloc, 0, sizeof(fp_alloc)); + + /* + * If the captain is the only one who should allocate resources, + * then do just that... + */ + if ( !capt_only || H5FD_fphdf5_is_captain(file) ) { + /* Send the request to the SAP */ + if ( H5FP_request_allocate(file, type, size, &fp_alloc.addr, + &fp_alloc.eoa, &req_id, &status) + != SUCCEED ) { + HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF, + "server couldn't allocate from file") + } + } + + /* It should be impossible for this assertion to fail, but then + * that is what assertions are for. + */ + HDassert(status == H5FP_STATUS_OK); + + if ( capt_only ) { + int mrc; + + if ( (mrc = MPI_Bcast(&fp_alloc, 1, H5FP_alloc, + (int)H5FP_capt_barrier_rank, + H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS ) { + HMPI_GOTO_ERROR(HADDR_UNDEF, "MPI_Bcast failed", mrc); + } + } + + /* we used to send the eoa to the sap here, but that is silly, + * as the sap already knows, and it is possible that another + * interleaving allocation will result in a corrupted eoa. + * + * JRM - 4/7/04 + */ + + /* We've succeeded -- return the value */ + HGOTO_DONE(fp_alloc.addr) + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FP_client_alloc() */ + + +/* This function is now called only by H5FP_client_alloc() above. + * Should we make it a private function only accessible from this + * file? JRM - 4/8/04 + */ /* * Function: H5FP_request_allocate * Purpose: Request an allocation of space from the SAP. @@ -739,8 +867,17 @@ H5FP_request_free(H5FD_t *file, H5FD_mem_t mem_type, haddr_t addr, hsize_t size, if (sap_alloc.status != H5FP_STATUS_OK) HGOTO_ERROR(H5E_RESOURCE, H5E_CANTCHANGE, FAIL, "can't free space on server"); +#if 0 /* JRM */ + /* the set_eoa call just sends the eoa we received from the SAP back + * -- with obvious race condition problems if there are interleaving + * calls. Thus I am commenting this call out for now, and will delete + * it in time if I can't find a reason for it. + * + * JRM -- 4/7/04 + */ /* Set the EOA for all processes. This call doesn't fail. */ file->cls->set_eoa(file, sap_alloc.eoa); +#endif /* JRM */ *status = H5FP_STATUS_OK; done: @@ -834,6 +971,24 @@ H5FP_request_set_eoa(H5FD_t *file, haddr_t eoa, unsigned *req_id, H5FP_status_t req.proc_rank = H5FD_mpi_get_rank(file); req.addr = eoa; +#if 0 + /* This is useful debugging code -- lets keep for a while. + * JRM -- 4/13/04 + */ + /* dump stack each time we set the eoa */ + { + int mpi_rank; + + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + HDfprintf(stdout, + "%d: %s: setting eoa: last eoa = %a, new eoa = %a.\n", + mpi_rank, "H5FP_request_set_eoa", last_eoa_received, eoa); + H5FS_print(stdout); + + } +#endif + if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank, H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc); @@ -903,8 +1058,20 @@ H5FP_request_update_eoma_eosda(H5FD_t *file, unsigned *req_id, H5FP_status_t *st H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed!", mrc); +#if 0 + /* The following set_eoa just parrots back to the SAP the eoa + * we just received from it. While I don't think it is a problem + * in this case, there are obvious potentials for race conditions, + * and I don't see that it does anything useful. + * + * Thus I am commenting it out for now. I'll delete it completely + * as soon as I am sure that it serves no purpose whatsoever. + * + * JRM - 4/8/04 + */ /* Set the EOA for all processes. This doesn't fail. */ file->cls->set_eoa(file, sap_eoa.eoa); +#endif *status = H5FP_STATUS_OK; done: diff --git a/src/H5FPprivate.h b/src/H5FPprivate.h index aa85969..2c2f8b1 100644 --- a/src/H5FPprivate.h +++ b/src/H5FPprivate.h @@ -316,10 +316,16 @@ extern herr_t H5FP_request_flush_metadata(H5FD_t *file, unsigned file_id, H5FP_status_t *status); extern herr_t H5FP_request_close(H5FD_t *file, unsigned sap_file_id, unsigned *req_id, H5FP_status_t *status); + +/* the following function should probably become a private function + * in H5FPclient.c if H5FP_client_alloc() does the job. -- JRM + */ extern herr_t H5FP_request_allocate(H5FD_t *file, H5FD_mem_t mem_type, hsize_t size, haddr_t *addr, haddr_t *eoa, unsigned *req_id, H5FP_status_t *status); +extern haddr_t H5FP_client_alloc(H5FD_t *file, H5FD_mem_t type, + hid_t dxpl_id, hsize_t size); extern herr_t H5FP_request_free(H5FD_t *file, H5FD_mem_t mem_type, haddr_t addr, hsize_t size, unsigned *req_id, H5FP_status_t *status); diff --git a/src/H5FPserver.c b/src/H5FPserver.c index 1e2ca62..64535fd 100644 --- a/src/H5FPserver.c +++ b/src/H5FPserver.c @@ -454,6 +454,12 @@ H5FP_remove_object_lock_from_list(H5FP_file_info *info, * Return: <0, 0, or >0 * Programmer: Bill Wendling, 27. August, 2002 * Modifications: + * Altered the function to use the H5F_addr_cmp() macro + * from H5Fprivate. This has the effect of reversing + * the direction of the comparison. This in turn + * should make the next and less tree primitives + * behave as expected. + * JRM - 3/22/04 */ static int H5FP_file_mod_cmp(H5FP_mdata_mod *k1, @@ -463,7 +469,7 @@ H5FP_file_mod_cmp(H5FP_mdata_mod *k1, FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FP_file_mod_cmp); assert(k1); assert(k2); - FUNC_LEAVE_NOAPI(k2->addr - k1->addr); + FUNC_LEAVE_NOAPI(H5F_addr_cmp((k1->addr), (k2->addr))); } /* @@ -517,6 +523,410 @@ done: } /* + * Function: H5FP_merge_mod_node_with_next + * + * Purpose: Given a node in a mod tree which overlaps with the next + * node in the tree, merge the two. Where the two nodes + * overlap, use the data from the supplied node. + * + * WARNING!!! + * + * This function calls H5TB_rem(), which may not delete the + * node specified in its parameter list -- if the target node + * is internal, it may swap data with a leaf node and delete + * the leaf instead. + * + * This implies that any pointer into the supplied tree may + * be invalid after this functions returns. Thus the calling + * function must re-aquire the address of *node_ptr (and any + * other nodes in *tree_ptr) after this function returns if + * it needs to do anything further with the node. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: JRM - 3/18/04 + * + * Modifications: + * + * None. + */ +static herr_t +H5FP_merge_mod_node_with_next(H5TB_TREE *tree_ptr, H5TB_NODE *node_ptr) +{ + int i; + int j; + int offset; + herr_t ret_value; + H5TB_NODE *next_node_ptr; + H5FP_mdata_mod *mod_ptr; + H5FP_mdata_mod *next_mod_ptr; + H5FP_mdata_mod *key_ptr; + unsigned combined_md_size; + char *combined_metadata_ptr; + + FUNC_ENTER_NOAPI_NOINIT(H5FP_merge_mod_node_with_next); + + ret_value = SUCCEED; + + /* check parameters & do some initializations in passing */ + if ( ( tree_ptr == NULL ) || + ( node_ptr == NULL ) || + ( (mod_ptr = (H5FP_mdata_mod *)(node_ptr->data)) == NULL ) || + ( (next_node_ptr = H5TB_next(node_ptr)) == NULL ) || + ( (next_mod_ptr = next_node_ptr->data) == NULL ) || + ( mod_ptr->addr >= next_mod_ptr->addr ) || + ( (mod_ptr->addr + mod_ptr->md_size) <= next_mod_ptr->addr ) ) { + HGOTO_ERROR(H5E_FPHDF5, H5E_BADVALUE, FAIL, + "One or more bad params detected on entry."); + } + + if ( (mod_ptr->addr + mod_ptr->md_size) < + (next_mod_ptr->addr + next_mod_ptr->md_size) ) { + /* The next node address range is not completely subsumed in + * that of the current node. Must allocate a new buffer, and + * copy over the contents of the two buffers. Where the buffers + * overlap, give precidence to the data from *node_ptr + */ + combined_md_size = (next_mod_ptr->addr + next_mod_ptr->md_size) - + (mod_ptr->addr); + + combined_metadata_ptr = + (char *)H5MM_malloc((size_t)(combined_md_size + 1)); + + if ( combined_metadata_ptr == NULL ) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "can't allocate buffer for combined node."); + } + + i = 0; /* this is the index into the combined buffer */ + + for ( j = 0; j < mod_ptr->md_size; j++ ) { + combined_metadata_ptr[i++] = (mod_ptr->metadata)[j]; + } + + offset = (int)((mod_ptr->addr + mod_ptr->md_size) - next_mod_ptr->addr); + + for ( j = offset; j < next_mod_ptr->md_size; j++ ) { + combined_metadata_ptr[i++] = (next_mod_ptr->metadata)[j]; + } + + HDassert(i == combined_md_size); + + combined_metadata_ptr[i] = (char)0; + + HDfree(mod_ptr->metadata); + mod_ptr->metadata = combined_metadata_ptr; + mod_ptr->md_size = combined_md_size; + } + + /* We have copied metadata from the next node into the current node + * if this was necessary. All that remains is to delete the next + * node from the tree and free it. + */ + + H5TB_rem(&(tree_ptr->root), next_node_ptr, (void **)(&key_ptr)); + + /* WARNING!!! + * + * node_ptr or any other pointer to a node in *tree_ptr may be invalid + * at this point. Find the associated data in the tree again if you + * have any further need of it. + */ + + HDassert(key_ptr == next_mod_ptr); + H5FP_free_mod_node(next_mod_ptr); + +done: + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FP_merge_mod_node_with_next() */ + +/* + * Function: H5FP_merge_mod_node_with_prev + * + * Purpose: Given a node in a mod tree which overlaps with the previous + * node in the tree, merge the two. Where the two nodes + * overlap, use the data from the supplied node. + * + * WARNING!!! + * + * This function calls H5TB_rem() to delete node_ptr from + * the tree pointed to by tree_ptr. H5TB_rem() may not delete + * the node specified in its parameter list -- if the target + * node is internal, it may swap data with a leaf node and + * delete the leaf instead. + * + * This implies that any pointer into the supplied tree may + * be invalid after this functions returns. Thus the calling + * function must re-aquire the address of any node in *tree_ptr + * after this function returns if it needs to do anything + * further with the node in question. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: JRM - 3/19/04 + * + * Modifications: + * + * None. + */ +static herr_t +H5FP_merge_mod_node_with_prev(H5TB_TREE *tree_ptr, H5TB_NODE *node_ptr) +{ + int i; + int j; + int limit; + herr_t ret_value; + H5TB_NODE *prev_node_ptr; + H5FP_mdata_mod *mod_ptr; + H5FP_mdata_mod *prev_mod_ptr; + H5FP_mdata_mod *key_ptr; + unsigned combined_md_size; + char *combined_metadata_ptr; + + FUNC_ENTER_NOAPI_NOINIT(H5FP_merge_mod_node_with_prev); + + ret_value = SUCCEED; + + /* check parameters & do some initializations in passing */ + if ( ( tree_ptr == NULL ) || + ( node_ptr == NULL ) || + ( (mod_ptr = (H5FP_mdata_mod *)(node_ptr->data)) == NULL ) || + ( (prev_node_ptr = H5TB_prev(node_ptr)) == NULL ) || + ( (prev_mod_ptr = (H5FP_mdata_mod *)(prev_node_ptr->data)) == NULL ) || + ( mod_ptr->addr <= prev_mod_ptr->addr ) || + ( (prev_mod_ptr->addr + prev_mod_ptr->md_size) <= mod_ptr->addr ) ) { + HGOTO_ERROR(H5E_FPHDF5, H5E_BADVALUE, FAIL, + "One or more bad params detected on entry."); + } + + if ( (prev_mod_ptr->addr + prev_mod_ptr->md_size) < + (mod_ptr->addr + mod_ptr->md_size) ) { + /* The node address range is not completely subsumed in + * that of the previous node. Must allocate a new buffer, and + * copy over the contents of the two buffers. Where the buffers + * overlap, give precidence to the data from *node_ptr + */ + combined_md_size = (mod_ptr->addr + mod_ptr->md_size) - + (prev_mod_ptr->addr); + + combined_metadata_ptr = + (char *)H5MM_malloc((size_t)(combined_md_size + 1)); + + if ( combined_metadata_ptr == NULL ) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "can't allocate buffer for combined node."); + } + + i = 0; /* this is the index into the combined buffer */ + + limit = (int)(mod_ptr->addr - prev_mod_ptr->addr); + + HDassert(limit > 0 ); + + for ( j = 0; j < limit; j++ ) { + combined_metadata_ptr[i++] = (prev_mod_ptr->metadata)[j]; + } + + for ( j = 0; j < (int)(mod_ptr->md_size); j++ ) { + combined_metadata_ptr[i++] = (mod_ptr->metadata)[j]; + } + + HDassert(i == combined_md_size); + + combined_metadata_ptr[i] = (char)0; + + HDfree(prev_mod_ptr->metadata); + prev_mod_ptr->metadata = combined_metadata_ptr; + prev_mod_ptr->md_size = combined_md_size; + } else { /* supplied node is completely inside previous node */ + /* no need to allocate a new buffer. Just copy data from + * mod_ptr->metadata to the appropriate locations in + * prev_mod_ptr->metadata. + */ + + i = (int)(mod_ptr->addr - prev_mod_ptr->addr); + + for ( j = 0; j < (int)(mod_ptr->md_size); j++ ) { + (prev_mod_ptr->metadata)[i++] = (mod_ptr->metadata)[j]; + } + + HDassert(i <= prev_mod_ptr->md_size); + } + + /* We have copied metadata from the current node into the previous + * node. All that remains is to delete the current node from the + * tree and free it. + */ + + H5TB_rem(&(tree_ptr->root), node_ptr, (void **)(&key_ptr)); + + /* WARNING!!! + * + * Any pointer to a node in *tree_ptr may be invalid now as a result + * of the above call to H5TB_rem(). Find the associated data in the + * tree again if you have any further need of it. + */ + + HDassert(key_ptr == mod_ptr); + H5FP_free_mod_node(mod_ptr); + +done: + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FP_merge_mod_node_with_prev() */ + +/* + * Function: H5FP_mod_node_overlaps_with_next + * + * Purpose: Given a node in a mod tree, see if there is an overlap + * between the address range of the supplied node, and that + * of the next node in the tree (if any). + * + * Return: TRUE if there is an overlap, and FALSE if there + * isn't. + * + * Programmer: JRM - 3/18/04 + * + * Modifications: + * + * None. + */ +static hbool_t +H5FP_mod_node_overlaps_with_next(H5TB_NODE *node_ptr) +{ + hbool_t ret_value; + H5TB_NODE *next_node_ptr; + + FUNC_ENTER_NOAPI_NOINIT(H5FP_mod_node_overlaps_with_next); + + ret_value = FALSE; + + HDassert(node_ptr != NULL); + + next_node_ptr = H5TB_next(node_ptr); + + if ( next_node_ptr != NULL ) { + if ( ( ((H5FP_mdata_mod *)(node_ptr->data))->addr > 100000 ) || + ( (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size) > 1024 ) ) { + HDfprintf(stdout, "%s: addr = %a, size = %d, mem_type = %d.\n", + "H5FP_mod_node_overlaps_with_next(2)", + (haddr_t)(((H5FP_mdata_mod *)(node_ptr->data))->addr), + (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size), + (int)(((H5FP_mdata_mod *)(node_ptr->data))->mem_type)); + } + + if ( (((H5FP_mdata_mod *)(node_ptr->data))->addr) + >= + (((H5FP_mdata_mod *)(next_node_ptr->data))->addr) + ) { + HDfprintf(stdout, "%s: addr,len = %a,%d, next_addr,len = %a,%d.\n", + "H5FP_mod_node_overlaps_with_next", + (((H5FP_mdata_mod *)(node_ptr->data))->addr), + (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size), + (((H5FP_mdata_mod *)(next_node_ptr->data))->addr), + (int)(((H5FP_mdata_mod *)(next_node_ptr->data))->md_size)); + + HDassert((((H5FP_mdata_mod *)(node_ptr->data))->addr) + < + (((H5FP_mdata_mod *)(next_node_ptr->data))->addr) + ); + } + if ( ( (((H5FP_mdata_mod *)(node_ptr->data))->addr) + + + (((H5FP_mdata_mod *)(node_ptr->data))->md_size) + ) + > + (((H5FP_mdata_mod *)(next_node_ptr->data))->addr) + ) { +#if 0 + /* This is useful debugging code -- keep it around for + * a while. JRM -- 4/13/03 + */ + HDfprintf(stdout, + "H5FP_mod_node_overlaps_with_next: addr = %a, next_addr = %a.\n", + (((H5FP_mdata_mod *)(node_ptr->data))->addr), + (((H5FP_mdata_mod *)(next_node_ptr->data))->addr)); +#endif + ret_value = TRUE; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FP_mod_node_overlaps_with_next() */ + +/* + * Function: H5FP_mod_node_overlaps_with_prev + * + * Purpose: Givena node in a mod tree, see if there is an overlap + * between the address range of the supplied node, and that + * of the previous node in the tree (if any). + * + * Return: TRUE if there is an overlap, and FALSE if there + * isn't. + * + * Programmer: JRM - 3/18/04 + * + * Modifications: + * + * None. + */ +static hbool_t +H5FP_mod_node_overlaps_with_prev(H5TB_NODE *node_ptr) +{ + hbool_t ret_value; + H5TB_NODE *prev_node_ptr; + + FUNC_ENTER_NOAPI_NOINIT(H5FP_mod_node_overlaps_with_prev); + + ret_value = FALSE; + + HDassert(node_ptr != NULL); + + prev_node_ptr = H5TB_prev(node_ptr); + + + if ( prev_node_ptr != NULL ) + { + HDassert((((H5FP_mdata_mod *)(node_ptr->data))->addr) + > + (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr) + ); + + if ( ( (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr) + + + (((H5FP_mdata_mod *)(prev_node_ptr->data))->md_size) + ) + > + (((H5FP_mdata_mod *)(node_ptr->data))->addr) + ) { +#if 0 + /* This is useful debugging code -- keep it around for + * a while. JRM - 4/13/04 + */ + HDfprintf(stdout, + "H5FP_mod_node_overlaps_with_prev: addr = %a, prev_addr = %a.\n", + (((H5FP_mdata_mod *)(node_ptr->data))->addr), + (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr)); +#endif + ret_value = TRUE; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FP_mod_node_overlaps_with_prev() */ + +/* * Function: H5FP_add_file_mod_to_list * Purpose: Add a metadata write to a file ID. If the metadata is * already in the cache, then we just replace it with the @@ -526,12 +936,17 @@ done: * Failure: FAIL * Programmer: Bill Wendling, 02. August, 2002 * Modifications: + * Re-worked code to merge overlapping metadata changes, + * and to avoid discarding metadata if the supplied metadata + * is smaller than that already in the mod list. + * JRM -- 3/29/04 */ static herr_t H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type, haddr_t addr, unsigned md_size, char *metadata) { + int i; H5FP_mdata_mod *fm, mod; H5TB_NODE *node; herr_t ret_value = FAIL; @@ -542,7 +957,14 @@ H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type, assert(info); mod.addr = addr; /* This is the key field for the TBBT */ - +#if 0 + /* This is useful debugging code -- keep it around for a + * while. JRM -- 4/13/04 + */ + HDfprintf(stdout, + "H5FP_add_file_mod_to_list: Adding chunk at %a of length %d.\n", + addr, (int)md_size); +#endif if ((node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL)) != NULL) { /* * The metadata is in the cache already. All we have to do is @@ -550,18 +972,89 @@ H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type, * The only things to change is the metadata and its size. */ fm = (H5FP_mdata_mod *)node->data; - HDfree(fm->metadata); - fm->metadata = metadata; - fm->md_size = md_size; + + if ( fm->md_size > md_size ) { + for ( i = 0; i < md_size; i++ ) + { + (fm->metadata)[i] = metadata[i]; + } + HDfree(metadata); + } else if ( fm->md_size < md_size ) { + HDfree(fm->metadata); + fm->metadata = metadata; + fm->md_size = md_size; + + while ( H5FP_mod_node_overlaps_with_next(node) ) { + if ( H5FP_merge_mod_node_with_next(info->mod_tree, node) + == FAIL ) { + /* Need to define better errors here. -- JRM */ + HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL, + "Can't merge with next."); + } else { + (info->num_mods)--; /* since we just merged */ + + /* H5FP_merge_mod_node_with_next() contains a call + * to H5TB_rem(), which may clobber node. Hence we + * must look it up again before proceeding. + */ + node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL); + HDassert(node != NULL); + HDassert(node->data == fm); + HDassert(node->key == fm); + } + } + } else { /* fm->md_size == md_size */ + HDfree(fm->metadata); + fm->metadata = metadata; + } + HGOTO_DONE(SUCCEED); } - if ((fm = H5FP_new_file_mod_node(mem_type, addr, md_size, metadata)) != NULL) { - if (!H5TB_dins(info->mod_tree, (void *)fm, NULL)) + if ( (fm = H5FP_new_file_mod_node(mem_type, addr, md_size, metadata)) + != NULL) { + if ( (node = H5TB_dins(info->mod_tree, (void *)fm, NULL)) == NULL ) { HGOTO_ERROR(H5E_FPHDF5, H5E_CANTINSERT, FAIL, "can't insert modification into tree"); + } + + (info->num_mods)++; + + /* merge with next as required */ + while ( H5FP_mod_node_overlaps_with_next(node) ) { + if ( H5FP_merge_mod_node_with_next(info->mod_tree, node) == FAIL ) { + /* Need to define better errors here. -- JRM */ + HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL, + "Can't merge new node with next."); + } else { + (info->num_mods)--; /* since we just merged */ + + /* H5FP_merge_mod_node_with_next() contains a call + * to H5TB_rem(), which may clobber node. Hence we + * must look it up again before proceeding. + */ + node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL); + HDassert(node != NULL); + HDassert(node->data == fm); + HDassert(node->key == fm); + } + } + + /* if the tree was valid to begin with, we must merge with at + * most one previous node. + */ + if ( H5FP_mod_node_overlaps_with_prev(node) ) { + if ( H5FP_merge_mod_node_with_prev(info->mod_tree, node) == FAIL ) { + /* Need to define better errors here. -- JRM */ + HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL, + "Can't merge new node with prev."); + } + /* H5FP_merge_mod_node_with_prev() calls H5TB_rem() to delete + * node after it merges with the previous node. Thus node is + * invalid at this point. + */ + } - ++info->num_mods; HGOTO_DONE(SUCCEED); } @@ -1274,6 +1767,12 @@ H5FP_sap_handle_read_request(H5FP_request_t *req) int mrc; FUNC_ENTER_NOAPI_NOINIT(H5FP_sap_handle_read_request); +#if 0 + /* More useful debugging code to keep for a time. JRM - 4/13/04 */ + HDfprintf(stdout, + "H5FP_sap_handle_read_request: req->addr = %a.\n", + req->addr); +#endif r.req_id = req->req_id; r.file_id = req->file_id; @@ -1356,7 +1855,12 @@ H5FP_sap_handle_write_request(H5FP_request_t *req, char *mdata, unsigned md_size herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI_NOINIT(H5FP_sap_handle_write_request); - +#if 0 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, + "H5FP_sap_handle_write_request: addr = %a, md_size = %d.\n", + (haddr_t)(req->addr), (int)md_size); +#endif if ((info = H5FP_find_file_info(req->file_id)) != NULL) { if (info->num_mods >= H5FP_MDATA_CACHE_HIGHWATER_MARK) { /* @@ -1544,14 +2048,26 @@ H5FP_sap_handle_alloc_request(H5FP_request_t *req) sap_alloc.eoa = HADDR_UNDEF; sap_alloc.status = H5FP_STATUS_CANT_ALLOC; } - +#if 0 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, + "%s: req_size = %d, req_type = %d, addr = %a, eoa = %a, status = %d, rp_rank = %d.\n", + "H5FP_sap_handle_alloc_request", + (int)(req->meta_block_size), + (int)(req->mem_type), + sap_alloc.addr, + sap_alloc.eoa, + (int)(sap_alloc.status), + (int)(req->proc_rank)); +#endif done: if ((mrc = MPI_Send(&sap_alloc, 1, H5FP_alloc, (int)req->proc_rank, H5FP_TAG_ALLOC, H5FP_SAP_COMM)) != MPI_SUCCESS) HMPI_DONE_ERROR(FAIL, "MPI_Send failed", mrc); FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5FP_sap_handle_alloc_request() */ /* * Function: H5FP_sap_handle_free_request @@ -1577,6 +2093,17 @@ H5FP_sap_handle_free_request(H5FP_request_t *req) sap_alloc.status = H5FP_STATUS_OK; sap_alloc.mem_type = req->mem_type; +#if 0 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, + "%s: addr = %a, block_size = %a, mem_type = %d, rp_rank = %d.\n", + "H5FP_sap_handle_free_request", + req->addr, + req->meta_block_size, + (int)(req->mem_type), + (int)(req->proc_rank)); +#endif + if ((info = H5FP_find_file_info(req->file_id)) != NULL) { if (H5FD_free((H5FD_t*)&info->file, req->mem_type, H5P_DEFAULT, req->addr, req->meta_block_size) != SUCCEED) { @@ -1599,7 +2126,8 @@ done: HMPI_DONE_ERROR(FAIL, "MPI_Send failed", mrc); FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5FP_sap_handle_free_request() */ /* * Function: H5FP_sap_handle_get_eoa_request @@ -1623,10 +2151,25 @@ H5FP_sap_handle_get_eoa_request(H5FP_request_t *req) sap_eoa.file_id = req->file_id; if ((info = H5FP_find_file_info(req->file_id)) != NULL) { + +#if 0 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, "%s: eoa = %a, rp_rank = %d.\n", + "H5FP_sap_handle_get_eoa_request", + ((H5FD_fphdf5_t*)&info->file)->eoa, + (int)(req->proc_rank)); +#endif + /* Get the EOA. */ sap_eoa.eoa = ((H5FD_fphdf5_t*)&info->file)->eoa; sap_eoa.status = H5FP_STATUS_OK; } else { +#if 1 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, "%s: function failed. rp_rank = %d.\n", + "H5FP_sap_handle_get_eoa_request", + (int)(req->proc_rank)); +#endif sap_eoa.eoa = HADDR_UNDEF; sap_eoa.status = H5FP_STATUS_CANT_ALLOC; ret_value = FAIL; @@ -1660,10 +2203,41 @@ H5FP_sap_handle_set_eoa_request(H5FP_request_t *req) FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FP_sap_handle_set_eoa_request); if ((info = H5FP_find_file_info(req->file_id)) != NULL) { + +#if 0 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + if ( req->addr < ((H5FD_fphdf5_t*)&info->file)->eoa ) { + HDfprintf(stdout, + "%s: old eoa = %a, new eoa = %a, rp_rank = %d. %s\n", + "H5FP_sap_handle_set_eoa_request", + ((H5FD_fphdf5_t*)&info->file)->eoa, + req->addr, + (int)(req->proc_rank), + "<---- eoa reduced!!! -------"); + } +#if 0 + else { + HDfprintf(stdout, + "%s: old eoa = %a, new eoa = %a, rp_rank = %d.\n", + "H5FP_sap_handle_set_eoa_request", + ((H5FD_fphdf5_t*)&info->file)->eoa, + req->addr, + (int)(req->proc_rank)); + } +#endif +#endif + /* Get the EOA. */ ((H5FD_fphdf5_t*)&info->file)->eoa = req->addr; exit_state = H5FP_STATUS_OK; } else { +#if 1 + /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */ + HDfprintf(stdout, + "%s: Function failed -- Couldn't get info. new eoa = %a.\n", + "H5FP_sap_handle_set_eoa_request", + req->addr); +#endif exit_state = H5FP_STATUS_CANT_ALLOC; ret_value = FAIL; } |