summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJordan Henderson <jhenderson@hdfgroup.org>2017-01-26 01:34:10 (GMT)
committerJordan Henderson <jhenderson@hdfgroup.org>2017-01-26 01:34:10 (GMT)
commit0b6016a3fc6f7cf5b60160e638e3ae7280952106 (patch)
treebb61fee5ad69a71a346861f15d6607af90d86c7c
parentaab742c9a2faa5d2136245b1370b250022804861 (diff)
downloadhdf5-0b6016a3fc6f7cf5b60160e638e3ae7280952106.zip
hdf5-0b6016a3fc6f7cf5b60160e638e3ae7280952106.tar.gz
hdf5-0b6016a3fc6f7cf5b60160e638e3ae7280952106.tar.bz2
Fix bug with chunk data updating
-rw-r--r--src/H5Dmpio.c135
1 files changed, 68 insertions, 67 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 70a859e..39a5e15 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -2661,12 +2661,12 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
H5D_filtered_collective_io_info_t *local_info_array = NULL;
H5D_filtered_collective_io_info_t *overlap_info_array = NULL;
H5S_sel_iter_t *mem_iter = NULL;
+ unsigned char *mod_data = NULL;
H5SL_node_t *chunk_node;
MPI_Request *send_requests = NULL;
MPI_Status *send_statuses = NULL;
hbool_t no_overlap = FALSE;
hbool_t mem_iter_init = FALSE;
- uint8_t *mod_data = NULL;
size_t num_send_requests;
size_t num_chunks_selected;
size_t overlap_info_array_num_entries;
@@ -2680,6 +2680,8 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
HDassert(type_info);
HDassert(fm);
HDassert(chunk_list);
+ HDassert(num_entries);
+ HDassert(_num_chunks_selected_array);
if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
@@ -2723,10 +2725,7 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
HDfprintf(debug_file, "| - Chunk Num Elmts Sel.: %zd\n", H5S_GET_SELECT_NPOINTS(local_info_array[j].chunk_info.mspace));
}
HDfprintf(debug_file, "------------------------------\n\n");
-#endif
-
-#ifdef PARALLEL_COMPRESS_DEBUG
HDfprintf(debug_file, "Testing mem/file space addresses:\n");
HDfprintf(debug_file, "-----------------------------------\n");
@@ -2805,19 +2804,23 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
overlap_info_array[num_chunks_selected++] = chunk_entry;
} /* end if */
else {
+ unsigned char *mod_data_p = NULL;
hssize_t iter_nelmts;
- hssize_t mod_data_size;
- uint8_t *mod_data_p = NULL;
+ size_t mod_data_size;
/* XXX: Need some way of checking chunk entry to validate that this process
* is actually contributing some data for this chunk update
*/
- /* Determine size of serialized chunk selection plus the size
+ /* Determine size of serialized chunk memory dataspace plus the size
* of the data being written
*/
- if ((mod_data_size = H5S_SELECT_SERIAL_SIZE(chunk_entry.chunk_info.mspace)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to check dataspace selection size")
+ /* XXX: Using the file dataspace seems to work here, presumably because the file
+ * space reflects how the data is actually stored in the file, not in memory. e.g.
+ * it reflects the chunking setup
+ */
+ if (H5S_encode(chunk_entry.chunk_info.fspace, &mod_data, &mod_data_size) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to get encoded dataspace size")
if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_entry.chunk_info.mspace)) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
@@ -2828,16 +2831,16 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
mod_data_size += iter_nelmts * type_info->src_type_size;
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Allocing %zd bytes for mod. data buffer.\n", (size_t) mod_data_size);
+ HDfprintf(debug_file, "| Allocing %zd bytes for mod. data buffer.\n", (size_t) mod_data_size);
#endif
- if (NULL == (mod_data = (uint8_t *) H5MM_malloc(mod_data_size)))
+ if (NULL == (mod_data = (unsigned char *) H5MM_malloc(mod_data_size)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk modification send buffer")
- /* Serialize the current selection into the buffer */
+ /* Serialize the chunk's file dataspace into the buffer */
mod_data_p = mod_data;
- if (H5S_SELECT_SERIALIZE(chunk_entry.chunk_info.mspace, &mod_data_p) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to serialize dataspace selection")
+ if (H5S_encode(chunk_entry.chunk_info.fspace, &mod_data_p, &mod_data_size) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace")
/* Initialize iterator for memory selection */
if (H5S_select_iter_init(mem_iter, chunk_entry.chunk_info.mspace, type_info->src_type_size) < 0)
@@ -2845,7 +2848,7 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
mem_iter_init = TRUE;
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Iterating over %lld elements.\n", iter_nelmts);
+ HDfprintf(debug_file, "| Iterating over %lld elements.\n", iter_nelmts);
#endif
/* Collect the modification data into the buffer */
@@ -2862,27 +2865,28 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
}
HDfprintf(debug_file, "]\n|\n");
- HDfprintf(debug_file, "Sending modification data for chunk at address %a to process %d.\n", chunk_entry.old_chunk.offset, new_owner);
+ HDfprintf(debug_file, "| Sending modification data for chunk at address %a to process %d.\n", chunk_entry.old_chunk.offset, new_owner);
#endif
/* Send modification data to new owner */
- H5_CHECK_OVERFLOW(mod_data_size, hssize_t, int)
+ H5_CHECK_OVERFLOW(mod_data_size, size_t, int)
if (MPI_SUCCESS != (mpi_code = MPI_Isend(mod_data, (int) mod_data_size, MPI_BYTE, new_owner,
chunk_entry.chunk_info.index, io_info->comm, &send_requests[num_send_requests++])))
HMPI_GOTO_ERROR(FAIL, "MPI_Isend failed", mpi_code)
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Mod. data sent.\n");
+ HDfprintf(debug_file, "| Mod. data sent.\n|\n");
#endif
if (mod_data)
- mod_data = (uint8_t *) H5MM_free(mod_data);
+ mod_data = (unsigned char *) H5MM_free(mod_data);
if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
+ mem_iter_init = FALSE;
} /* end else */
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Chunk at address %a re-assigned to process %d.\n", chunk_addr, new_owner);
+ HDfprintf(debug_file, "| Chunk at address %a re-assigned to process %d.\n|\n", chunk_addr, new_owner);
#endif
} /* end for */
@@ -2902,9 +2906,9 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
HDfprintf(debug_file, "| Chunk Entry %zd:\n", j);
HDfprintf(debug_file, "| - Chunk Address: %a\n", local_info_array[j].old_chunk.offset);
HDfprintf(debug_file, "| - Chunk Length: %zd\n", local_info_array[j].old_chunk.length);
- HDfprintf(debug_file, "| - Address of mspace: %x\n", local_info_array[j].chunk_info.mspace);
- HDfprintf(debug_file, "| - Chunk Selection Type: %d\n", H5S_GET_SELECT_TYPE(local_info_array[j].chunk_info.mspace));
- HDfprintf(debug_file, "| - Chunk Num Elmts Sel.: %zd\n", H5S_GET_SELECT_NPOINTS(local_info_array[j].chunk_info.mspace));
+ HDfprintf(debug_file, "| - Address of mspace: %x\n", local_info_array[j].chunk_info.fspace);
+ HDfprintf(debug_file, "| - Chunk Selection Type: %d\n", H5S_GET_SELECT_TYPE(local_info_array[j].chunk_info.fspace));
+ HDfprintf(debug_file, "| - Chunk Num Elmts Sel.: %zd\n", H5S_GET_SELECT_NPOINTS(local_info_array[j].chunk_info.fspace));
}
HDfprintf(debug_file, "------------------------------\n\n");
#endif
@@ -2947,6 +2951,8 @@ done:
H5MM_free(send_statuses);
if (mod_data)
H5MM_free(mod_data);
+ if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
if (mem_iter)
H5MM_free(mem_iter);
@@ -3076,11 +3082,13 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
const H5D_io_info_t *io_info, const H5D_type_info_t *type_info)
{
H5S_sel_iter_t *mem_iter = NULL;
+ unsigned char *mod_data = NULL;
unsigned filter_mask = 0;
hssize_t iter_nelmts;
hbool_t full_overwrite = FALSE;
hbool_t mem_iter_init = FALSE;
size_t buf_size;
+ H5S_t *dataspace = NULL;
herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
@@ -3179,12 +3187,10 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
/* Update the chunk data with any modifications from other processes */
while (chunk_entry->num_writers > 1) {
- MPI_Status status;
- uint8_t *buf = NULL;
- uint8_t *buf_p = NULL;
- H5S_t *selection = NULL;
- int count;
- int mpi_code;
+ unsigned char *mod_data_p = NULL;
+ MPI_Status status;
+ int count;
+ int mpi_code;
/* XXX: Since the receive tag needs to be an int, it is possible that a chunk's index
* may fall outside the range of an int and cause an overflow problem when casting down
@@ -3196,7 +3202,7 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
HMPI_GOTO_ERROR(FAIL, "MPI_Probe failed", mpi_code)
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Found message from source %d with tag %d.\n", status.MPI_SOURCE, status.MPI_TAG);
+ HDfprintf(debug_file, "| - Found message from source %d with tag %d.\n", status.MPI_SOURCE, status.MPI_TAG);
#endif
/* Retrieve the message size */
@@ -3204,81 +3210,73 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code)
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Message size is %d bytes.\n", count);
+ HDfprintf(debug_file, "| - Message size is %d bytes.\n", count);
#endif
- if (NULL == (buf = (uint8_t *) H5MM_malloc(count)))
+ if (NULL == (mod_data = (unsigned char *) H5MM_malloc(count)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate modification data receive buffer")
- buf_p = buf;
+ mod_data_p = mod_data;
- if (MPI_SUCCESS != (mpi_code = MPI_Recv(buf, count, MPI_BYTE, MPI_ANY_SOURCE,
+ if (MPI_SUCCESS != (mpi_code = MPI_Recv(mod_data, count, MPI_BYTE, MPI_ANY_SOURCE,
chunk_entry->chunk_info.index, io_info->comm, &status)))
HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mpi_code)
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Received the message.\n");
+ HDfprintf(debug_file, "| - Received the message.\n");
#endif
- /* Deserialize the selection in the chunk's dataspace */
- if (H5S_SELECT_DESERIALIZE(&selection, &buf_p) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to deserialize dataspace selection")
-
- /* H5S_extent_copy(selection, chunk_entry->chunk_info.mspace); */
+ /* Decode the chunk's memory dataspace */
+ if (NULL == (dataspace = H5S_decode(&mod_data_p)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace")
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Deserialized selection info:\n");
+ HDfprintf(debug_file, "| Deserialized selection info:\n");
HDfprintf(debug_file, "| Mem Space:\n");
- HDfprintf(debug_file, "| - Extent Num elements: %zd\n", H5S_GET_EXTENT_NPOINTS(selection));
- HDfprintf(debug_file, "| - Extent type: %d\n", H5S_GET_EXTENT_TYPE(selection));
- HDfprintf(debug_file, "| - Selection Num elements: %zd\n", H5S_GET_SELECT_NPOINTS(selection));
- HDfprintf(debug_file, "| - Selection type: %d\n", H5S_GET_SELECT_TYPE(selection));
+ HDfprintf(debug_file, "| - Extent Num elements: %zd\n", H5S_GET_EXTENT_NPOINTS(dataspace));
+ HDfprintf(debug_file, "| - Extent type: %d\n", H5S_GET_EXTENT_TYPE(dataspace));
+ HDfprintf(debug_file, "| - Selection Num elements: %zd\n", H5S_GET_SELECT_NPOINTS(dataspace));
+ HDfprintf(debug_file, "| - Selection type: %d\n", H5S_GET_SELECT_TYPE(dataspace));
#endif
- /* XXX: After receiving the selection information, the extent is not
- * set/valid. Possibly copy the selection information directly into
- * chunk entry mem space, or copy extent into received selection H5S type
- */
-
-
- if (H5S_select_iter_init(mem_iter, selection, type_info->dst_type_size) < 0)
+ if (H5S_select_iter_init(mem_iter, dataspace, type_info->dst_type_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information")
mem_iter_init = TRUE;
- if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(selection)) < 0)
+ if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace)) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Contents of message:\n[");
+ HDfprintf(debug_file, "| Contents of message:\n| [");
for (size_t j = 0; j < iter_nelmts; j++) {
if (j > 0) HDfprintf(debug_file, ", ");
- HDfprintf(debug_file, "%lld", ((long *) buf_p)[j]);
+ HDfprintf(debug_file, "%lld", ((long *) mod_data_p)[j]);
}
HDfprintf(debug_file, "]\n");
#endif
#ifdef PARALLEL_COMPRESS_DEBUG
- HDfprintf(debug_file, "Iter nelmts=%lld.\n", iter_nelmts);
- HDfprintf(debug_file, "Mem space selected points: %zd.\n", H5S_GET_SELECT_NPOINTS(selection));
+ HDfprintf(debug_file, "| Iter nelmts=%lld.\n", iter_nelmts);
+ HDfprintf(debug_file, "| Mem space selected points: %zd.\n| \n", H5S_GET_SELECT_NPOINTS(dataspace));
#endif
/* Update the chunk data with the received modification data */
- /* if (!H5D__gather_mem(buf_p, selection, mem_iter, (size_t) iter_nelmts,
- io_info->dxpl_cache, chunk_entry->buf))
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't gather to write buffer") */
- if (H5D__scatter_mem(buf_p, selection, mem_iter, (size_t) iter_nelmts,
+ if (H5D__scatter_mem(mod_data_p, dataspace, mem_iter, (size_t) iter_nelmts,
io_info->dxpl_cache, chunk_entry->buf) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't scatter to write buffer")
chunk_entry->num_writers--;
- if (buf)
- H5MM_free(buf);
- if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
+ if (mod_data)
+ H5MM_free(mod_data);
+ if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
- if (selection)
- if (H5S_close(selection) < 0)
+ if (dataspace) {
+ if (H5S_close(dataspace) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
- }
+ dataspace = NULL;
+ }
+ mem_iter_init = FALSE;
+ } /* end while */
#ifdef PARALLEL_COMPRESS_DEBUG
@@ -3314,6 +3312,9 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
done:
if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
+ if (dataspace)
+ if (H5S_close(dataspace) < 0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
if (mem_iter)
H5MM_free(mem_iter)