summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/H5AC.c12
-rw-r--r--src/H5F.c30
-rw-r--r--src/H5FD.c55
-rw-r--r--src/H5FPclient.c173
-rw-r--r--src/H5FPprivate.h6
-rw-r--r--src/H5FPserver.c598
6 files changed, 814 insertions, 60 deletions
diff --git a/src/H5AC.c b/src/H5AC.c
index fb0bafb..494bf98 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -1283,17 +1283,27 @@ H5AC_protect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
if (H5FP_request_lock(H5FD_fphdf5_file_id(lf), addr,
rw == H5AC_WRITE ? H5FP_LOCK_WRITE : H5FP_LOCK_READ,
TRUE, &req_id, &status) < 0) {
+#if 0
+ HDfprintf(stdout, "H5AC_protect: Lock failed.\n");
/*
* FIXME: Check the status variable. If the lock is got
* by some other process, we can loop and wait or bail
* out of this function
*/
-HDfprintf(stderr, "Couldn't get lock for metadata at address %a\n", addr);
+ HDfprintf(stderr, "Couldn't get lock for metadata at address %a\n", addr);
+#endif /* 0 */
HGOTO_ERROR(H5E_FPHDF5, H5E_CANTLOCK, NULL, "can't lock data on SAP!")
}
/* Load a thing from the SAP. */
if (NULL == (thing = type->load(f, dxpl_id, addr, udata1, udata2))) {
+#if 0
+ HDfprintf(stdout,
+ "%s: Load failed. addr = %a, type->id = %d.\n",
+ "H5AC_protect",
+ addr,
+ (int)(type->id));
+#endif /* 0 */
HCOMMON_ERROR(H5E_CACHE, H5E_CANTLOAD, "unable to load object")
if (H5FP_request_release_lock(H5FD_fphdf5_file_id(lf), addr,
diff --git a/src/H5F.c b/src/H5F.c
index 007b2bf..63ebdde 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -1957,16 +1957,32 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d
if (H5F_read_superblock(file, dxpl_id, &root_ent, super_info.addr,
buf, (size_t)super_info.size) < 0)
HGOTO_ERROR(H5E_FILE, H5E_READERROR, NULL, "unable to read superblock")
+ }
+
+ /* The following barrier ensures that all set eoa operations
+ * associated with creating the superblock are complete before
+ * we attempt any allocations.
+ * JRM - 4/13/04
+ */
+ if ( (mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS )
+ {
+ HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc)
+ }
+ if (!H5FD_fphdf5_is_captain(lf)) {
if (H5G_mkroot(file, dxpl_id, &root_ent) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "unable to create/open root group")
- } /* end if */
+ }
/* All clients free the buffer used for broadcasting the superblock */
buf = H5MM_xfree (buf);
} /* end if */
#endif /* H5_HAVE_FPHDF5 */
} else if (1 == shared->nrefs) {
+#ifdef H5_HAVE_FPHDF5
+ int mrc; /*MPI return code */
+#endif /* H5_HAVE_FPHDF5 */
+
/* Read the superblock if it hasn't been read before. */
if (HADDR_UNDEF == (shared->super_addr = H5F_locate_signature(lf,dxpl_id)))
HGOTO_ERROR(H5E_FILE, H5E_NOTHDF5, NULL, "unable to find file signature")
@@ -1974,6 +1990,18 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d
if (H5F_read_superblock(file, dxpl_id, &root_ent, shared->super_addr, NULL, 0) < 0)
HGOTO_ERROR(H5E_FILE, H5E_READERROR, NULL, "unable to read superblock")
+#ifdef H5_HAVE_FPHDF5
+ if (H5FD_is_fphdf5_driver(lf)) {
+ /* reading the superblock generates lots of set_eoa calls. To avoid
+ * race conditions with allocations, make sure that everyone is done
+ * reading the superblock before we proceed.
+ */
+ if ( (mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS ) {
+ HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc)
+ }
+ }
+#endif /* H5_HAVE_FPHDF5 */
+
/* Make sure we can open the root group */
if (H5G_mkroot(file, dxpl_id, &root_ent) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to read root group")
diff --git a/src/H5FD.c b/src/H5FD.c
index 2ab1666..d7ac465 100644
--- a/src/H5FD.c
+++ b/src/H5FD.c
@@ -1518,6 +1518,10 @@ done:
* Bill Wendling, 2003/02/19
* Added support for FPHDF5.
*
+ * John Mainzer, 2004/04/13
+ * Moved much of the FPHDF5 specific code into H5FP_client_alloc(),
+ * and re-worked it to get rid of a race condition on the eoa.
+ *
*-------------------------------------------------------------------------
*/
haddr_t
@@ -1540,51 +1544,16 @@ H5FD_alloc(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size)
* is the SAP executing this code, then skip the send to the SAP and
* try to do the actual allocations.
*/
- if (H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file)) {
- unsigned req_id = 0;
- unsigned capt_only = 0;
- H5FP_status_t status = H5FP_STATUS_OK;
- H5P_genplist_t *plist;
- H5FP_alloc_t fp_alloc;
-
- /* Get the data xfer property list */
- if ((plist = H5I_object(dxpl_id)) == NULL)
- HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, HADDR_UNDEF, "not a dataset transfer list")
-
- if (H5P_exist_plist(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY) > 0)
- if (H5P_get(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY, &capt_only) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, HADDR_UNDEF, "can't retrieve FPHDF5 property")
+ if ( H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file) ) {
+ haddr_t addr;
- HDmemset(&fp_alloc, 0, sizeof(fp_alloc));
-
- /*
- * If the captain is the only one who should allocate resources,
- * then do just that...
- */
- if (!capt_only || H5FD_fphdf5_is_captain(file)) {
- /* Send the request to the SAP */
- if (H5FP_request_allocate(file, type, size, &fp_alloc.addr,
- &fp_alloc.eoa, &req_id, &status) != SUCCEED)
- /* FIXME: Should we check the "status" variable here? */
- HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF,
- "server couldn't allocate from file")
- }
-
- if (capt_only) {
- int mrc;
-
- if ((mrc = MPI_Bcast(&fp_alloc, 1, H5FP_alloc,
- (int)H5FP_capt_barrier_rank,
- H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
- HMPI_GOTO_ERROR(HADDR_UNDEF, "MPI_Bcast failed", mrc);
+ if ( (addr = H5FP_client_alloc(file, type, dxpl_id, size))
+ == HADDR_UNDEF) {
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF,
+ "allocation failed.")
+ } else {
+ HGOTO_DONE(addr)
}
-
-
- /* Set the EOA for all processes. This doesn't fail. */
- file->cls->set_eoa(file, fp_alloc.eoa);
-
- /* We've succeeded -- return the value */
- HGOTO_DONE(fp_alloc.addr)
}
#endif /* H5_HAVE_FPHDF5 */
diff --git a/src/H5FPclient.c b/src/H5FPclient.c
index 434fa06..8e52056 100644
--- a/src/H5FPclient.c
+++ b/src/H5FPclient.c
@@ -45,6 +45,7 @@ static int interface_initialize_g = 0;
static unsigned H5FP_gen_request_id(void);
static herr_t H5FP_dump_to_file(H5FD_t *file, hid_t dxpl_id);
+
/*
*===----------------------------------------------------------------------===
* Public Library (non-API) Functions
@@ -332,7 +333,27 @@ H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id,
HDmemset(*buf, '\0', size);
HDmemset(&mpi_status, 0, sizeof(mpi_status));
- if (size < sap_read.md_size) {
+ /* the following code is a bit odd and doubtless needs a bit
+ * of explanation. I certainly stumbled over it the first
+ * time I read it.
+ *
+ * For reasons unknown, read requests sent to the SAP only
+ * include a base address, not a length. Thus the SAP sends
+ * along the largest contiguous chunk it has starting at the
+ * specified address.
+ *
+ * If the chunk is bigger than we want, we just copy over what
+ * we want, and discard the rest.
+ *
+ * If it is just the right size, we receive it in the provided
+ * buffer.
+ *
+ * if it is too small to fulfil our request, we scream and die.
+ *
+ * JRM - 4/13/04
+ */
+ if (size < sap_read.md_size)
+ {
char *mdata;
if (H5FP_read_metadata(&mdata, (int)sap_read.md_size, (int)H5FP_sap_rank) == FAIL) {
@@ -347,8 +368,11 @@ HDfprintf(stderr, "Metadata Read Failed!!!!\n");
H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc);
} else {
-HDfprintf(stderr, "Buffer not big enough to hold metadata!!!!\n");
-assert(0);
+ HDfprintf(stdout,
+ "H5FP_request_read_metadata: size = %d > md_size = %d.\n",
+ (int)size, (int)(sap_read.md_size));
+ HDfprintf(stdout, "Mssg received from SAP is too small!!!!\n");
+ assert(0);
}
break;
@@ -628,6 +652,110 @@ done:
FUNC_LEAVE_NOAPI(ret_value);
}
+
+/*
+ * Function: H5FP_client_alloc
+ * Purpose: Handle the client side of an allocation in the FP case.
+ * In essence, this is simply a matter of referring the
+ * request to the SAP, and then returning the reply.
+ *
+ * A modified version of this code used to live in H5FD_alloc(),
+ * but I move it here to encapsulate it and generally tidy up.
+ *
+ * One can argue that we should all be done in an alloc
+ * routine in H5FDfdhdf5.c, but this invlves a smaller
+ * change to the code, and thus a smaller loss if I missed
+ * a major gotcha. If things go well, and we don't heave
+ * the current implementation of FP, I'll probably go that
+ * route eventually.
+ * Return: Success: The format address of the new file memory.
+ * Failure: The undefined address HADDR_UNDEF
+ * Programmer: JRM - 4/7/04
+ * Modifications:
+ */
+haddr_t
+H5FP_client_alloc(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size)
+{
+ haddr_t ret_value = HADDR_UNDEF;
+ unsigned req_id = 0;
+ unsigned capt_only = 0;
+ H5FP_status_t status = H5FP_STATUS_OK;
+ H5P_genplist_t *plist;
+ H5FP_alloc_t fp_alloc;
+
+ FUNC_ENTER_NOAPI(H5FP_client_alloc, HADDR_UNDEF)
+
+ /* check args */
+ HDassert(file);
+ HDassert(file->cls);
+ HDassert(type >= 0 && type < H5FD_MEM_NTYPES);
+ HDassert(size > 0);
+
+ /* verify that we are running FP and we are not the SAP. */
+ HDassert(H5FD_is_fphdf5_driver(file) && !H5FD_fphdf5_is_sap(file));
+
+ /* Get the data xfer property list */
+ if ( (plist = H5I_object(dxpl_id)) == NULL ) {
+ HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, HADDR_UNDEF, "not a dataset transfer list")
+ }
+
+ if ( H5P_exist_plist(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY) > 0 ) {
+ if ( H5P_get(plist, H5FD_FPHDF5_CAPTN_ALLOC_ONLY, &capt_only) < 0 ) {
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, HADDR_UNDEF, "can't retrieve FPHDF5 property")
+ }
+ }
+
+ HDmemset(&fp_alloc, 0, sizeof(fp_alloc));
+
+ /*
+ * If the captain is the only one who should allocate resources,
+ * then do just that...
+ */
+ if ( !capt_only || H5FD_fphdf5_is_captain(file) ) {
+ /* Send the request to the SAP */
+ if ( H5FP_request_allocate(file, type, size, &fp_alloc.addr,
+ &fp_alloc.eoa, &req_id, &status)
+ != SUCCEED ) {
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTALLOC, HADDR_UNDEF,
+ "server couldn't allocate from file")
+ }
+ }
+
+ /* It should be impossible for this assertion to fail, but then
+ * that is what assertions are for.
+ */
+ HDassert(status == H5FP_STATUS_OK);
+
+ if ( capt_only ) {
+ int mrc;
+
+ if ( (mrc = MPI_Bcast(&fp_alloc, 1, H5FP_alloc,
+ (int)H5FP_capt_barrier_rank,
+ H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS ) {
+ HMPI_GOTO_ERROR(HADDR_UNDEF, "MPI_Bcast failed", mrc);
+ }
+ }
+
+ /* we used to send the eoa to the sap here, but that is silly,
+ * as the sap already knows, and it is possible that another
+ * interleaving allocation will result in a corrupted eoa.
+ *
+ * JRM - 4/7/04
+ */
+
+ /* We've succeeded -- return the value */
+ HGOTO_DONE(fp_alloc.addr)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FP_client_alloc() */
+
+
+/* This function is now called only by H5FP_client_alloc() above.
+ * Should we make it a private function only accessible from this
+ * file? JRM - 4/8/04
+ */
/*
* Function: H5FP_request_allocate
* Purpose: Request an allocation of space from the SAP.
@@ -739,8 +867,17 @@ H5FP_request_free(H5FD_t *file, H5FD_mem_t mem_type, haddr_t addr, hsize_t size,
if (sap_alloc.status != H5FP_STATUS_OK)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTCHANGE, FAIL, "can't free space on server");
+#if 0 /* JRM */
+ /* the set_eoa call just sends the eoa we received from the SAP back
+ * -- with obvious race condition problems if there are interleaving
+ * calls. Thus I am commenting this call out for now, and will delete
+ * it in time if I can't find a reason for it.
+ *
+ * JRM -- 4/7/04
+ */
/* Set the EOA for all processes. This call doesn't fail. */
file->cls->set_eoa(file, sap_alloc.eoa);
+#endif /* JRM */
*status = H5FP_STATUS_OK;
done:
@@ -834,6 +971,24 @@ H5FP_request_set_eoa(H5FD_t *file, haddr_t eoa, unsigned *req_id, H5FP_status_t
req.proc_rank = H5FD_mpi_get_rank(file);
req.addr = eoa;
+#if 0
+ /* This is useful debugging code -- lets keep for a while.
+ * JRM -- 4/13/04
+ */
+ /* dump stack each time we set the eoa */
+ {
+ int mpi_rank;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+ HDfprintf(stdout,
+ "%d: %s: setting eoa: last eoa = %a, new eoa = %a.\n",
+ mpi_rank, "H5FP_request_set_eoa", last_eoa_received, eoa);
+ H5FS_print(stdout);
+
+ }
+#endif
+
if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank,
H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc);
@@ -903,8 +1058,20 @@ H5FP_request_update_eoma_eosda(H5FD_t *file, unsigned *req_id, H5FP_status_t *st
H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed!", mrc);
+#if 0
+ /* The following set_eoa just parrots back to the SAP the eoa
+ * we just received from it. While I don't think it is a problem
+ * in this case, there are obvious potentials for race conditions,
+ * and I don't see that it does anything useful.
+ *
+ * Thus I am commenting it out for now. I'll delete it completely
+ * as soon as I am sure that it serves no purpose whatsoever.
+ *
+ * JRM - 4/8/04
+ */
/* Set the EOA for all processes. This doesn't fail. */
file->cls->set_eoa(file, sap_eoa.eoa);
+#endif
*status = H5FP_STATUS_OK;
done:
diff --git a/src/H5FPprivate.h b/src/H5FPprivate.h
index aa85969..2c2f8b1 100644
--- a/src/H5FPprivate.h
+++ b/src/H5FPprivate.h
@@ -316,10 +316,16 @@ extern herr_t H5FP_request_flush_metadata(H5FD_t *file, unsigned file_id,
H5FP_status_t *status);
extern herr_t H5FP_request_close(H5FD_t *file, unsigned sap_file_id,
unsigned *req_id, H5FP_status_t *status);
+
+/* the following function should probably become a private function
+ * in H5FPclient.c if H5FP_client_alloc() does the job. -- JRM
+ */
extern herr_t H5FP_request_allocate(H5FD_t *file, H5FD_mem_t mem_type,
hsize_t size, haddr_t *addr,
haddr_t *eoa, unsigned *req_id,
H5FP_status_t *status);
+extern haddr_t H5FP_client_alloc(H5FD_t *file, H5FD_mem_t type,
+ hid_t dxpl_id, hsize_t size);
extern herr_t H5FP_request_free(H5FD_t *file, H5FD_mem_t mem_type,
haddr_t addr, hsize_t size,
unsigned *req_id, H5FP_status_t *status);
diff --git a/src/H5FPserver.c b/src/H5FPserver.c
index 1e2ca62..64535fd 100644
--- a/src/H5FPserver.c
+++ b/src/H5FPserver.c
@@ -454,6 +454,12 @@ H5FP_remove_object_lock_from_list(H5FP_file_info *info,
* Return: <0, 0, or >0
* Programmer: Bill Wendling, 27. August, 2002
* Modifications:
+ * Altered the function to use the H5F_addr_cmp() macro
+ * from H5Fprivate. This has the effect of reversing
+ * the direction of the comparison. This in turn
+ * should make the next and less tree primitives
+ * behave as expected.
+ * JRM - 3/22/04
*/
static int
H5FP_file_mod_cmp(H5FP_mdata_mod *k1,
@@ -463,7 +469,7 @@ H5FP_file_mod_cmp(H5FP_mdata_mod *k1,
FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FP_file_mod_cmp);
assert(k1);
assert(k2);
- FUNC_LEAVE_NOAPI(k2->addr - k1->addr);
+ FUNC_LEAVE_NOAPI(H5F_addr_cmp((k1->addr), (k2->addr)));
}
/*
@@ -517,6 +523,410 @@ done:
}
/*
+ * Function: H5FP_merge_mod_node_with_next
+ *
+ * Purpose: Given a node in a mod tree which overlaps with the next
+ * node in the tree, merge the two. Where the two nodes
+ * overlap, use the data from the supplied node.
+ *
+ * WARNING!!!
+ *
+ * This function calls H5TB_rem(), which may not delete the
+ * node specified in its parameter list -- if the target node
+ * is internal, it may swap data with a leaf node and delete
+ * the leaf instead.
+ *
+ * This implies that any pointer into the supplied tree may
+ * be invalid after this functions returns. Thus the calling
+ * function must re-aquire the address of *node_ptr (and any
+ * other nodes in *tree_ptr) after this function returns if
+ * it needs to do anything further with the node.
+ *
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ *
+ * Programmer: JRM - 3/18/04
+ *
+ * Modifications:
+ *
+ * None.
+ */
+static herr_t
+H5FP_merge_mod_node_with_next(H5TB_TREE *tree_ptr, H5TB_NODE *node_ptr)
+{
+ int i;
+ int j;
+ int offset;
+ herr_t ret_value;
+ H5TB_NODE *next_node_ptr;
+ H5FP_mdata_mod *mod_ptr;
+ H5FP_mdata_mod *next_mod_ptr;
+ H5FP_mdata_mod *key_ptr;
+ unsigned combined_md_size;
+ char *combined_metadata_ptr;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5FP_merge_mod_node_with_next);
+
+ ret_value = SUCCEED;
+
+ /* check parameters & do some initializations in passing */
+ if ( ( tree_ptr == NULL ) ||
+ ( node_ptr == NULL ) ||
+ ( (mod_ptr = (H5FP_mdata_mod *)(node_ptr->data)) == NULL ) ||
+ ( (next_node_ptr = H5TB_next(node_ptr)) == NULL ) ||
+ ( (next_mod_ptr = next_node_ptr->data) == NULL ) ||
+ ( mod_ptr->addr >= next_mod_ptr->addr ) ||
+ ( (mod_ptr->addr + mod_ptr->md_size) <= next_mod_ptr->addr ) ) {
+ HGOTO_ERROR(H5E_FPHDF5, H5E_BADVALUE, FAIL,
+ "One or more bad params detected on entry.");
+ }
+
+ if ( (mod_ptr->addr + mod_ptr->md_size) <
+ (next_mod_ptr->addr + next_mod_ptr->md_size) ) {
+ /* The next node address range is not completely subsumed in
+ * that of the current node. Must allocate a new buffer, and
+ * copy over the contents of the two buffers. Where the buffers
+ * overlap, give precidence to the data from *node_ptr
+ */
+ combined_md_size = (next_mod_ptr->addr + next_mod_ptr->md_size) -
+ (mod_ptr->addr);
+
+ combined_metadata_ptr =
+ (char *)H5MM_malloc((size_t)(combined_md_size + 1));
+
+ if ( combined_metadata_ptr == NULL ) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "can't allocate buffer for combined node.");
+ }
+
+ i = 0; /* this is the index into the combined buffer */
+
+ for ( j = 0; j < mod_ptr->md_size; j++ ) {
+ combined_metadata_ptr[i++] = (mod_ptr->metadata)[j];
+ }
+
+ offset = (int)((mod_ptr->addr + mod_ptr->md_size) - next_mod_ptr->addr);
+
+ for ( j = offset; j < next_mod_ptr->md_size; j++ ) {
+ combined_metadata_ptr[i++] = (next_mod_ptr->metadata)[j];
+ }
+
+ HDassert(i == combined_md_size);
+
+ combined_metadata_ptr[i] = (char)0;
+
+ HDfree(mod_ptr->metadata);
+ mod_ptr->metadata = combined_metadata_ptr;
+ mod_ptr->md_size = combined_md_size;
+ }
+
+ /* We have copied metadata from the next node into the current node
+ * if this was necessary. All that remains is to delete the next
+ * node from the tree and free it.
+ */
+
+ H5TB_rem(&(tree_ptr->root), next_node_ptr, (void **)(&key_ptr));
+
+ /* WARNING!!!
+ *
+ * node_ptr or any other pointer to a node in *tree_ptr may be invalid
+ * at this point. Find the associated data in the tree again if you
+ * have any further need of it.
+ */
+
+ HDassert(key_ptr == next_mod_ptr);
+ H5FP_free_mod_node(next_mod_ptr);
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FP_merge_mod_node_with_next() */
+
+/*
+ * Function: H5FP_merge_mod_node_with_prev
+ *
+ * Purpose: Given a node in a mod tree which overlaps with the previous
+ * node in the tree, merge the two. Where the two nodes
+ * overlap, use the data from the supplied node.
+ *
+ * WARNING!!!
+ *
+ * This function calls H5TB_rem() to delete node_ptr from
+ * the tree pointed to by tree_ptr. H5TB_rem() may not delete
+ * the node specified in its parameter list -- if the target
+ * node is internal, it may swap data with a leaf node and
+ * delete the leaf instead.
+ *
+ * This implies that any pointer into the supplied tree may
+ * be invalid after this functions returns. Thus the calling
+ * function must re-aquire the address of any node in *tree_ptr
+ * after this function returns if it needs to do anything
+ * further with the node in question.
+ *
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ *
+ * Programmer: JRM - 3/19/04
+ *
+ * Modifications:
+ *
+ * None.
+ */
+static herr_t
+H5FP_merge_mod_node_with_prev(H5TB_TREE *tree_ptr, H5TB_NODE *node_ptr)
+{
+ int i;
+ int j;
+ int limit;
+ herr_t ret_value;
+ H5TB_NODE *prev_node_ptr;
+ H5FP_mdata_mod *mod_ptr;
+ H5FP_mdata_mod *prev_mod_ptr;
+ H5FP_mdata_mod *key_ptr;
+ unsigned combined_md_size;
+ char *combined_metadata_ptr;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5FP_merge_mod_node_with_prev);
+
+ ret_value = SUCCEED;
+
+ /* check parameters & do some initializations in passing */
+ if ( ( tree_ptr == NULL ) ||
+ ( node_ptr == NULL ) ||
+ ( (mod_ptr = (H5FP_mdata_mod *)(node_ptr->data)) == NULL ) ||
+ ( (prev_node_ptr = H5TB_prev(node_ptr)) == NULL ) ||
+ ( (prev_mod_ptr = (H5FP_mdata_mod *)(prev_node_ptr->data)) == NULL ) ||
+ ( mod_ptr->addr <= prev_mod_ptr->addr ) ||
+ ( (prev_mod_ptr->addr + prev_mod_ptr->md_size) <= mod_ptr->addr ) ) {
+ HGOTO_ERROR(H5E_FPHDF5, H5E_BADVALUE, FAIL,
+ "One or more bad params detected on entry.");
+ }
+
+ if ( (prev_mod_ptr->addr + prev_mod_ptr->md_size) <
+ (mod_ptr->addr + mod_ptr->md_size) ) {
+ /* The node address range is not completely subsumed in
+ * that of the previous node. Must allocate a new buffer, and
+ * copy over the contents of the two buffers. Where the buffers
+ * overlap, give precidence to the data from *node_ptr
+ */
+ combined_md_size = (mod_ptr->addr + mod_ptr->md_size) -
+ (prev_mod_ptr->addr);
+
+ combined_metadata_ptr =
+ (char *)H5MM_malloc((size_t)(combined_md_size + 1));
+
+ if ( combined_metadata_ptr == NULL ) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "can't allocate buffer for combined node.");
+ }
+
+ i = 0; /* this is the index into the combined buffer */
+
+ limit = (int)(mod_ptr->addr - prev_mod_ptr->addr);
+
+ HDassert(limit > 0 );
+
+ for ( j = 0; j < limit; j++ ) {
+ combined_metadata_ptr[i++] = (prev_mod_ptr->metadata)[j];
+ }
+
+ for ( j = 0; j < (int)(mod_ptr->md_size); j++ ) {
+ combined_metadata_ptr[i++] = (mod_ptr->metadata)[j];
+ }
+
+ HDassert(i == combined_md_size);
+
+ combined_metadata_ptr[i] = (char)0;
+
+ HDfree(prev_mod_ptr->metadata);
+ prev_mod_ptr->metadata = combined_metadata_ptr;
+ prev_mod_ptr->md_size = combined_md_size;
+ } else { /* supplied node is completely inside previous node */
+ /* no need to allocate a new buffer. Just copy data from
+ * mod_ptr->metadata to the appropriate locations in
+ * prev_mod_ptr->metadata.
+ */
+
+ i = (int)(mod_ptr->addr - prev_mod_ptr->addr);
+
+ for ( j = 0; j < (int)(mod_ptr->md_size); j++ ) {
+ (prev_mod_ptr->metadata)[i++] = (mod_ptr->metadata)[j];
+ }
+
+ HDassert(i <= prev_mod_ptr->md_size);
+ }
+
+ /* We have copied metadata from the current node into the previous
+ * node. All that remains is to delete the current node from the
+ * tree and free it.
+ */
+
+ H5TB_rem(&(tree_ptr->root), node_ptr, (void **)(&key_ptr));
+
+ /* WARNING!!!
+ *
+ * Any pointer to a node in *tree_ptr may be invalid now as a result
+ * of the above call to H5TB_rem(). Find the associated data in the
+ * tree again if you have any further need of it.
+ */
+
+ HDassert(key_ptr == mod_ptr);
+ H5FP_free_mod_node(mod_ptr);
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FP_merge_mod_node_with_prev() */
+
+/*
+ * Function: H5FP_mod_node_overlaps_with_next
+ *
+ * Purpose: Given a node in a mod tree, see if there is an overlap
+ * between the address range of the supplied node, and that
+ * of the next node in the tree (if any).
+ *
+ * Return: TRUE if there is an overlap, and FALSE if there
+ * isn't.
+ *
+ * Programmer: JRM - 3/18/04
+ *
+ * Modifications:
+ *
+ * None.
+ */
+static hbool_t
+H5FP_mod_node_overlaps_with_next(H5TB_NODE *node_ptr)
+{
+ hbool_t ret_value;
+ H5TB_NODE *next_node_ptr;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5FP_mod_node_overlaps_with_next);
+
+ ret_value = FALSE;
+
+ HDassert(node_ptr != NULL);
+
+ next_node_ptr = H5TB_next(node_ptr);
+
+ if ( next_node_ptr != NULL ) {
+ if ( ( ((H5FP_mdata_mod *)(node_ptr->data))->addr > 100000 ) ||
+ ( (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size) > 1024 ) ) {
+ HDfprintf(stdout, "%s: addr = %a, size = %d, mem_type = %d.\n",
+ "H5FP_mod_node_overlaps_with_next(2)",
+ (haddr_t)(((H5FP_mdata_mod *)(node_ptr->data))->addr),
+ (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size),
+ (int)(((H5FP_mdata_mod *)(node_ptr->data))->mem_type));
+ }
+
+ if ( (((H5FP_mdata_mod *)(node_ptr->data))->addr)
+ >=
+ (((H5FP_mdata_mod *)(next_node_ptr->data))->addr)
+ ) {
+ HDfprintf(stdout, "%s: addr,len = %a,%d, next_addr,len = %a,%d.\n",
+ "H5FP_mod_node_overlaps_with_next",
+ (((H5FP_mdata_mod *)(node_ptr->data))->addr),
+ (int)(((H5FP_mdata_mod *)(node_ptr->data))->md_size),
+ (((H5FP_mdata_mod *)(next_node_ptr->data))->addr),
+ (int)(((H5FP_mdata_mod *)(next_node_ptr->data))->md_size));
+
+ HDassert((((H5FP_mdata_mod *)(node_ptr->data))->addr)
+ <
+ (((H5FP_mdata_mod *)(next_node_ptr->data))->addr)
+ );
+ }
+ if ( ( (((H5FP_mdata_mod *)(node_ptr->data))->addr)
+ +
+ (((H5FP_mdata_mod *)(node_ptr->data))->md_size)
+ )
+ >
+ (((H5FP_mdata_mod *)(next_node_ptr->data))->addr)
+ ) {
+#if 0
+ /* This is useful debugging code -- keep it around for
+ * a while. JRM -- 4/13/03
+ */
+ HDfprintf(stdout,
+ "H5FP_mod_node_overlaps_with_next: addr = %a, next_addr = %a.\n",
+ (((H5FP_mdata_mod *)(node_ptr->data))->addr),
+ (((H5FP_mdata_mod *)(next_node_ptr->data))->addr));
+#endif
+ ret_value = TRUE;
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FP_mod_node_overlaps_with_next() */
+
+/*
+ * Function: H5FP_mod_node_overlaps_with_prev
+ *
+ * Purpose: Givena node in a mod tree, see if there is an overlap
+ * between the address range of the supplied node, and that
+ * of the previous node in the tree (if any).
+ *
+ * Return: TRUE if there is an overlap, and FALSE if there
+ * isn't.
+ *
+ * Programmer: JRM - 3/18/04
+ *
+ * Modifications:
+ *
+ * None.
+ */
+static hbool_t
+H5FP_mod_node_overlaps_with_prev(H5TB_NODE *node_ptr)
+{
+ hbool_t ret_value;
+ H5TB_NODE *prev_node_ptr;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5FP_mod_node_overlaps_with_prev);
+
+ ret_value = FALSE;
+
+ HDassert(node_ptr != NULL);
+
+ prev_node_ptr = H5TB_prev(node_ptr);
+
+
+ if ( prev_node_ptr != NULL )
+ {
+ HDassert((((H5FP_mdata_mod *)(node_ptr->data))->addr)
+ >
+ (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr)
+ );
+
+ if ( ( (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr)
+ +
+ (((H5FP_mdata_mod *)(prev_node_ptr->data))->md_size)
+ )
+ >
+ (((H5FP_mdata_mod *)(node_ptr->data))->addr)
+ ) {
+#if 0
+ /* This is useful debugging code -- keep it around for
+ * a while. JRM - 4/13/04
+ */
+ HDfprintf(stdout,
+ "H5FP_mod_node_overlaps_with_prev: addr = %a, prev_addr = %a.\n",
+ (((H5FP_mdata_mod *)(node_ptr->data))->addr),
+ (((H5FP_mdata_mod *)(prev_node_ptr->data))->addr));
+#endif
+ ret_value = TRUE;
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FP_mod_node_overlaps_with_prev() */
+
+/*
* Function: H5FP_add_file_mod_to_list
* Purpose: Add a metadata write to a file ID. If the metadata is
* already in the cache, then we just replace it with the
@@ -526,12 +936,17 @@ done:
* Failure: FAIL
* Programmer: Bill Wendling, 02. August, 2002
* Modifications:
+ * Re-worked code to merge overlapping metadata changes,
+ * and to avoid discarding metadata if the supplied metadata
+ * is smaller than that already in the mod list.
+ * JRM -- 3/29/04
*/
static herr_t
H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type,
haddr_t addr, unsigned md_size,
char *metadata)
{
+ int i;
H5FP_mdata_mod *fm, mod;
H5TB_NODE *node;
herr_t ret_value = FAIL;
@@ -542,7 +957,14 @@ H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type,
assert(info);
mod.addr = addr; /* This is the key field for the TBBT */
-
+#if 0
+ /* This is useful debugging code -- keep it around for a
+ * while. JRM -- 4/13/04
+ */
+ HDfprintf(stdout,
+ "H5FP_add_file_mod_to_list: Adding chunk at %a of length %d.\n",
+ addr, (int)md_size);
+#endif
if ((node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL)) != NULL) {
/*
* The metadata is in the cache already. All we have to do is
@@ -550,18 +972,89 @@ H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type,
* The only things to change is the metadata and its size.
*/
fm = (H5FP_mdata_mod *)node->data;
- HDfree(fm->metadata);
- fm->metadata = metadata;
- fm->md_size = md_size;
+
+ if ( fm->md_size > md_size ) {
+ for ( i = 0; i < md_size; i++ )
+ {
+ (fm->metadata)[i] = metadata[i];
+ }
+ HDfree(metadata);
+ } else if ( fm->md_size < md_size ) {
+ HDfree(fm->metadata);
+ fm->metadata = metadata;
+ fm->md_size = md_size;
+
+ while ( H5FP_mod_node_overlaps_with_next(node) ) {
+ if ( H5FP_merge_mod_node_with_next(info->mod_tree, node)
+ == FAIL ) {
+ /* Need to define better errors here. -- JRM */
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL,
+ "Can't merge with next.");
+ } else {
+ (info->num_mods)--; /* since we just merged */
+
+ /* H5FP_merge_mod_node_with_next() contains a call
+ * to H5TB_rem(), which may clobber node. Hence we
+ * must look it up again before proceeding.
+ */
+ node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL);
+ HDassert(node != NULL);
+ HDassert(node->data == fm);
+ HDassert(node->key == fm);
+ }
+ }
+ } else { /* fm->md_size == md_size */
+ HDfree(fm->metadata);
+ fm->metadata = metadata;
+ }
+
HGOTO_DONE(SUCCEED);
}
- if ((fm = H5FP_new_file_mod_node(mem_type, addr, md_size, metadata)) != NULL) {
- if (!H5TB_dins(info->mod_tree, (void *)fm, NULL))
+ if ( (fm = H5FP_new_file_mod_node(mem_type, addr, md_size, metadata))
+ != NULL) {
+ if ( (node = H5TB_dins(info->mod_tree, (void *)fm, NULL)) == NULL ) {
HGOTO_ERROR(H5E_FPHDF5, H5E_CANTINSERT, FAIL,
"can't insert modification into tree");
+ }
+
+ (info->num_mods)++;
+
+ /* merge with next as required */
+ while ( H5FP_mod_node_overlaps_with_next(node) ) {
+ if ( H5FP_merge_mod_node_with_next(info->mod_tree, node) == FAIL ) {
+ /* Need to define better errors here. -- JRM */
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL,
+ "Can't merge new node with next.");
+ } else {
+ (info->num_mods)--; /* since we just merged */
+
+ /* H5FP_merge_mod_node_with_next() contains a call
+ * to H5TB_rem(), which may clobber node. Hence we
+ * must look it up again before proceeding.
+ */
+ node = H5TB_dfind(info->mod_tree, (void *)&mod, NULL);
+ HDassert(node != NULL);
+ HDassert(node->data == fm);
+ HDassert(node->key == fm);
+ }
+ }
+
+ /* if the tree was valid to begin with, we must merge with at
+ * most one previous node.
+ */
+ if ( H5FP_mod_node_overlaps_with_prev(node) ) {
+ if ( H5FP_merge_mod_node_with_prev(info->mod_tree, node) == FAIL ) {
+ /* Need to define better errors here. -- JRM */
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTCHANGE, FAIL,
+ "Can't merge new node with prev.");
+ }
+ /* H5FP_merge_mod_node_with_prev() calls H5TB_rem() to delete
+ * node after it merges with the previous node. Thus node is
+ * invalid at this point.
+ */
+ }
- ++info->num_mods;
HGOTO_DONE(SUCCEED);
}
@@ -1274,6 +1767,12 @@ H5FP_sap_handle_read_request(H5FP_request_t *req)
int mrc;
FUNC_ENTER_NOAPI_NOINIT(H5FP_sap_handle_read_request);
+#if 0
+ /* More useful debugging code to keep for a time. JRM - 4/13/04 */
+ HDfprintf(stdout,
+ "H5FP_sap_handle_read_request: req->addr = %a.\n",
+ req->addr);
+#endif
r.req_id = req->req_id;
r.file_id = req->file_id;
@@ -1356,7 +1855,12 @@ H5FP_sap_handle_write_request(H5FP_request_t *req, char *mdata, unsigned md_size
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT(H5FP_sap_handle_write_request);
-
+#if 0
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout,
+ "H5FP_sap_handle_write_request: addr = %a, md_size = %d.\n",
+ (haddr_t)(req->addr), (int)md_size);
+#endif
if ((info = H5FP_find_file_info(req->file_id)) != NULL) {
if (info->num_mods >= H5FP_MDATA_CACHE_HIGHWATER_MARK) {
/*
@@ -1544,14 +2048,26 @@ H5FP_sap_handle_alloc_request(H5FP_request_t *req)
sap_alloc.eoa = HADDR_UNDEF;
sap_alloc.status = H5FP_STATUS_CANT_ALLOC;
}
-
+#if 0
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout,
+ "%s: req_size = %d, req_type = %d, addr = %a, eoa = %a, status = %d, rp_rank = %d.\n",
+ "H5FP_sap_handle_alloc_request",
+ (int)(req->meta_block_size),
+ (int)(req->mem_type),
+ sap_alloc.addr,
+ sap_alloc.eoa,
+ (int)(sap_alloc.status),
+ (int)(req->proc_rank));
+#endif
done:
if ((mrc = MPI_Send(&sap_alloc, 1, H5FP_alloc, (int)req->proc_rank,
H5FP_TAG_ALLOC, H5FP_SAP_COMM)) != MPI_SUCCESS)
HMPI_DONE_ERROR(FAIL, "MPI_Send failed", mrc);
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5FP_sap_handle_alloc_request() */
/*
* Function: H5FP_sap_handle_free_request
@@ -1577,6 +2093,17 @@ H5FP_sap_handle_free_request(H5FP_request_t *req)
sap_alloc.status = H5FP_STATUS_OK;
sap_alloc.mem_type = req->mem_type;
+#if 0
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout,
+ "%s: addr = %a, block_size = %a, mem_type = %d, rp_rank = %d.\n",
+ "H5FP_sap_handle_free_request",
+ req->addr,
+ req->meta_block_size,
+ (int)(req->mem_type),
+ (int)(req->proc_rank));
+#endif
+
if ((info = H5FP_find_file_info(req->file_id)) != NULL) {
if (H5FD_free((H5FD_t*)&info->file, req->mem_type, H5P_DEFAULT,
req->addr, req->meta_block_size) != SUCCEED) {
@@ -1599,7 +2126,8 @@ done:
HMPI_DONE_ERROR(FAIL, "MPI_Send failed", mrc);
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5FP_sap_handle_free_request() */
/*
* Function: H5FP_sap_handle_get_eoa_request
@@ -1623,10 +2151,25 @@ H5FP_sap_handle_get_eoa_request(H5FP_request_t *req)
sap_eoa.file_id = req->file_id;
if ((info = H5FP_find_file_info(req->file_id)) != NULL) {
+
+#if 0
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout, "%s: eoa = %a, rp_rank = %d.\n",
+ "H5FP_sap_handle_get_eoa_request",
+ ((H5FD_fphdf5_t*)&info->file)->eoa,
+ (int)(req->proc_rank));
+#endif
+
/* Get the EOA. */
sap_eoa.eoa = ((H5FD_fphdf5_t*)&info->file)->eoa;
sap_eoa.status = H5FP_STATUS_OK;
} else {
+#if 1
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout, "%s: function failed. rp_rank = %d.\n",
+ "H5FP_sap_handle_get_eoa_request",
+ (int)(req->proc_rank));
+#endif
sap_eoa.eoa = HADDR_UNDEF;
sap_eoa.status = H5FP_STATUS_CANT_ALLOC;
ret_value = FAIL;
@@ -1660,10 +2203,41 @@ H5FP_sap_handle_set_eoa_request(H5FP_request_t *req)
FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FP_sap_handle_set_eoa_request);
if ((info = H5FP_find_file_info(req->file_id)) != NULL) {
+
+#if 0
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ if ( req->addr < ((H5FD_fphdf5_t*)&info->file)->eoa ) {
+ HDfprintf(stdout,
+ "%s: old eoa = %a, new eoa = %a, rp_rank = %d. %s\n",
+ "H5FP_sap_handle_set_eoa_request",
+ ((H5FD_fphdf5_t*)&info->file)->eoa,
+ req->addr,
+ (int)(req->proc_rank),
+ "<---- eoa reduced!!! -------");
+ }
+#if 0
+ else {
+ HDfprintf(stdout,
+ "%s: old eoa = %a, new eoa = %a, rp_rank = %d.\n",
+ "H5FP_sap_handle_set_eoa_request",
+ ((H5FD_fphdf5_t*)&info->file)->eoa,
+ req->addr,
+ (int)(req->proc_rank));
+ }
+#endif
+#endif
+
/* Get the EOA. */
((H5FD_fphdf5_t*)&info->file)->eoa = req->addr;
exit_state = H5FP_STATUS_OK;
} else {
+#if 1
+ /* Debugging code -- lets keep it for a time. JRM -- 4/13/04 */
+ HDfprintf(stdout,
+ "%s: Function failed -- Couldn't get info. new eoa = %a.\n",
+ "H5FP_sap_handle_set_eoa_request",
+ req->addr);
+#endif
exit_state = H5FP_STATUS_CANT_ALLOC;
ret_value = FAIL;
}