summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--SUBFILING_README.txt30
-rw-r--r--src/H5FDsubfile_mpi.c91
-rw-r--r--src/H5FDsubfiling.c17
-rw-r--r--test/h5subfiling_vol.c16
4 files changed, 40 insertions, 114 deletions
diff --git a/SUBFILING_README.txt b/SUBFILING_README.txt
index 71dd9e4..b09c592 100644
--- a/SUBFILING_README.txt
+++ b/SUBFILING_README.txt
@@ -42,10 +42,40 @@ that it supports Multithreading, but only MPI_THREAD_FUNNELED or potentially
MPI_THREAD_SERIALIZED. The initial benchmarking efforts utilize OpenMPI to
provide the necessary thread safe functionality.
+At login, the modules preloaded by default are:
+
+rawarren@cori06:~> module list
+Currently Loaded Modulefiles:
+ 1) modules/3.2.11.4 12) gni-headers/5.0.12.0-7.0.1.1_6.31__g3b1768f.ari
+ 2) altd/2.0 13) xpmem/2.2.20-7.0.1.1_4.13__g0475745.ari
+ 3) darshan/3.1.7 14) job/2.2.4-7.0.1.1_3.40__g36b56f4.ari
+ 4) intel/19.0.3.199 15) dvs/2.12_2.2.157-7.0.1.1_9.4__g083131db
+ 5) craype-network-aries 16) alps/6.6.58-7.0.1.1_6.8__g437d88db.ari
+ 6) craype/2.6.2 17) rca/2.2.20-7.0.1.1_4.51__g8e3fb5b.ari
+ 7) cray-libsci/19.06.1 18) atp/2.1.3
+ 8) udreg/2.3.2-7.0.1.1_3.36__g8175d3d.ari 19) PrgEnv-intel/6.0.5
+ 9) ugni/6.0.14.0-7.0.1.1_7.38__ge78e5b0.ari 20) craype-haswell
+ 10) pmi/5.0.14 21) cray-mpich/7.7.10
+ 11) dmapp/7.1.1-7.0.1.1_4.52__g38cf134.ari 22) craype-hugepages2M
+rawarren@cori06:~>
+
Interestingly, the default C compiler (or at least a compiler wrapper which
calls the actual C compiler) appears to know about the default MPI and thus
avoids the necessity of users providing an include path (-I<path>) or a library
link path (-L<path>) when creating executables or shared objects (libraries).
As a consequence, we unload these specific modules prior to selecting
alternatives for compilation and for the MPI library implementation.
+These defaults need to overridden, e.g.
+
+MODULE UNLOADS::
+rawarren@cori06:~> module unload cray-mpich/7.7.10
+rawarren@cori06:~> module unload craype/2.6.2
+
+MODULE LOADS::
+rawarren@cori06:~> module load gcc/9.3.0
+rawarren@cori06:~> module load openmpi/4.0.2
+
+
+
+
diff --git a/src/H5FDsubfile_mpi.c b/src/H5FDsubfile_mpi.c
index 57add71..5bffdad 100644
--- a/src/H5FDsubfile_mpi.c
+++ b/src/H5FDsubfile_mpi.c
@@ -1192,6 +1192,7 @@ read__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
int64_t ioc_read_datasize[n_io_concentrators];
int64_t ioc_read_offset[n_io_concentrators];
MPI_Datatype ioc_read_type[n_io_concentrators];
+ useconds_t delay = 50;
subfiling_context_t *sf_context = get_subfiling_object(context_id);
assert(sf_context != NULL);
@@ -1229,22 +1230,12 @@ read__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
#ifndef NDEBUG
if (sf_verbose_flag) {
-#if 0
- if (sf_logfile) {
- fprintf(sf_logfile,
- "[%d %s] Requesting %ld read bytes from IOC(%d): "
- "sourceOffset=%ld subfile_offset=%ld\n",
- sf_world_rank, __func__, msg[0], io_concentrator[ioc],
- sourceOffset, msg[1]);
- }
-#else
fprintf(stdout,
"[%d %s] Requesting %ld read bytes from IOC(%d): "
"sourceOffset=%ld subfile_offset=%ld\n",
sf_world_rank, __func__, msg[0], io_concentrator[ioc],
sourceOffset, msg[1]);
fflush(stdout);
-#endif
}
#endif
@@ -1299,22 +1290,12 @@ read__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
for (i = 0; i < ready; i++) {
#ifndef NDEBUG
if (sf_verbose_flag) {
-#if 0
- if (sf_logfile) {
- fprintf(sf_logfile,
- "[%d] READ bytes(%ld) of data from ioc_concentrator %d "
- "complete\n",
- sf_world_rank, ioc_read_datasize[indices[i]],
- indices[i]);
- }
-#else
fprintf(stdout,
"[%d] READ bytes(%ld) of data from ioc_concentrator %d "
"complete\n",
sf_world_rank, ioc_read_datasize[indices[i]],
indices[i]);
fflush(stdout);
-#endif
}
#endif
if (ioc_read_type[indices[i]] != MPI_BYTE) {
@@ -1322,6 +1303,8 @@ read__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
}
n_waiting--;
}
+ if (n_waiting)
+ usleep(delay);
}
return status;
}
@@ -1455,11 +1438,12 @@ write__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
int64_t ioc_write_datasize[n_io_concentrators];
int64_t ioc_write_offset[n_io_concentrators];
MPI_Datatype ioc_write_type[n_io_concentrators];
-
+ useconds_t delay = 50;
subfiling_context_t *sf_context = get_subfiling_object(context_id);
int i, target, ioc, n_waiting = 0, status = 0;
int errors = 0;
+
io_concentrator = sf_context->topology->io_concentrator;
if (sf_context->topology->rank_is_ioc) {
@@ -1516,22 +1500,12 @@ write__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
#ifndef NDEBUG
if (sf_verbose_flag)
{
-#if 0
- if (sf_logfile) {
- fprintf(sf_logfile,
- "[%d %s]: write_dest[ioc(%d), "
- "sourceOffset=%ld, datasize=%ld, foffset=%ld]\n",
- sf_world_rank, __func__, ioc, sourceOffset,
- ioc_write_datasize[ioc], ioc_write_offset[ioc]);
- }
-#else
fprintf(stdout,
"[%d %s]: write_dest[ioc(%d), "
"sourceOffset=%ld, datasize=%ld, foffset=%ld]\n",
sf_world_rank, __func__, ioc, sourceOffset,
ioc_write_datasize[ioc], ioc_write_offset[ioc]);
fflush(stdout);
-#endif
}
#endif
@@ -1619,7 +1593,6 @@ write__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
fflush(stdout);
errors++;
}
-
for (i = 0; i < ready; i++) {
/* One of the Issend calls has completed
* If we used a derived type to send data, then should free
@@ -1630,6 +1603,8 @@ write__independent(int n_io_concentrators, hid_t context_id, int64_t offset,
}
n_waiting--;
}
+ if (n_waiting)
+ usleep(delay);
}
if (errors)
return -1;
@@ -2041,9 +2016,6 @@ sf_open_subfiles(hid_t fid, char *filename, char *prefix, int flags)
puts("H5FDsubfiling_init failed!");
return -1;
}
-#if 0
- printf("[%d %s]\n", sf_world_rank, __func__);
-#endif
sf_context = get_subfiling_object(context_id);
assert(sf_context != NULL);
@@ -2194,11 +2166,6 @@ ioc_main(int64_t context_id)
sf_close_file_count = 0;
sf_ops_after_first_close = 0;
-#if 0
- printf("Starting IOC! mpi_rank=%d\n", sf_world_rank);
- fflush(stdout);
-#endif
-
while (!sf_shutdown_flag || sf_work_pending) {
flag = 0;
ret = MPI_Iprobe(
@@ -2220,29 +2187,6 @@ ioc_main(int64_t context_id)
MPI_BYTE, source, tag, context->sf_msg_comm, &msg_status);
}
if (ret == MPI_SUCCESS) {
-#if 0
- if (tag == OPEN_OP) {
- sf_open_file_count++;
- printf("source=%d: sf_open_file_count = %d\n", source, sf_open_file_count);
- fflush(stdout);
- }
- else if (tag == CLOSE_OP) {
- sf_close_file_count++;
- printf("source=%d: sf_close_file_count = %d\n", source, sf_close_file_count);
- fflush(stdout);
- }
- else {
- printf("ioc(0): tag=%d\n", tag);
- fflush(stdout);
- if (sf_close_file_count) {
- sf_ops_after_first_close++;
- if (sf_close_file_count == sf_world_size) {
- printf("op=%d from source(%d) after file close! sf_open_file_count=%d\n", tag, source, sf_open_file_count);
- fflush(stdout);
- }
- }
- }
-#endif
if (msg) {
msg->source = source;
msg->subfile_rank = subfile_rank;
@@ -2620,14 +2564,9 @@ queue_file_open(
}
}
#endif
-#if 0
- printf("[ioc(%d) %s]\n", subfile_rank, __func__);
- fflush(stdout);
-#endif
errors = subfiling_open_file(msg, sf_subfile_prefix, subfile_rank, flags);
// open_count = atomic_load(&sf_file_refcount);
-#if 1
ret = MPI_Send(&errors, 1, MPI_INT, source, COMPLETED, comm);
if (ret != MPI_SUCCESS) {
printf("[ioc(%d)] MPI_Send FILE_OPEN, COMPLETED to source(%d) FAILED\n",
@@ -2635,21 +2574,7 @@ queue_file_open(
fflush(stdout);
errors++;
}
-#else
- if (open_count == sf_world_size) {
- int i, k = (sf_world_rank +1);
- for (i=0; i < sf_world_size; i++, k++) {
- source = k % sf_world_size;
- ret = MPI_Send(&errors, 1, MPI_INT, source, COMPLETED, comm);
- if (ret != MPI_SUCCESS) {
- printf("[ioc(%d)] MPI_Send FILE_OPEN, COMPLETED to source(%d) FAILED\n",
- subfile_rank, source);
- fflush(stdout);
- errors++;
- }
- }
- }
-#endif
+
if (errors) {
#ifndef NDEBUG
if (sf_verbose_flag) {
diff --git a/src/H5FDsubfiling.c b/src/H5FDsubfiling.c
index 7fbfdc7..99ea9ac 100644
--- a/src/H5FDsubfiling.c
+++ b/src/H5FDsubfiling.c
@@ -697,16 +697,8 @@ H5FD_subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t max
* We can be a bit more efficient by having rank 0 broadcast
* the stat buffer.
*/
-#if 0
- if (mpi_enabled && (my_rank == 0)) {
- int sb_size = sizeof(sb);
-
- MPI_Bcast(&sb, sb_size, MPI_BYTE, 0, MPI_COMM_WORLD);
- }
-#else
if(HDfstat(fd, &sb) < 0)
HSYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file")
-#endif
/* Create the new file struct */
if(NULL == (file = H5FL_CALLOC(H5FD_subfiling_t)))
@@ -929,9 +921,6 @@ H5FD_subfiling_query(const H5FD_t *_file, unsigned long *flags /* out */)
*flags |= H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
*flags |= H5FD_FEAT_POSIX_COMPAT_HANDLE; /* get_handle callback returns a POSIX file descriptor */
*flags |= H5FD_FEAT_SUPPORTS_SWMR_IO; /* VFD supports the single-writer/multiple-readers (SWMR) pattern */
-#if 0
- *flags |= H5FD_FEAT_HAS_MPI; /* FIXME:: for experimentation only... */
-#endif
/* Check for flags that are set by h5repart */
if(file && file->fam_to_single)
*flags |= H5FD_FEAT_IGNORE_DRVRINFO; /* Ignore the driver info when file is opened (which eliminates it) */
@@ -1755,13 +1744,9 @@ H5FD__dataset_write_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_
{
int status;
haddr_t rank_baseAddr;
-#if 0
- rank_baseAddr = get_data_offset(mpi_rank, mpi_size, dtype_extent, mem_space, file_space);
-
-#else
rank_baseAddr = get_base_offset(mpi_rank, mpi_size, mem_space_id, file_space_id);
rank_baseAddr += dataset_baseAddr;
-#endif
+
// printf("[%d] H5S_SEL_HYPERSLABS, file_offset = %lld\n", mpi_rank, rank_baseAddr );
if ((status = H5Sis_regular_hyperslab(file_space_id)) < 0) {
puts("H5Sis_regular_hyperslab returned an error");
diff --git a/test/h5subfiling_vol.c b/test/h5subfiling_vol.c
index 25f1292..6db5aa2 100644
--- a/test/h5subfiling_vol.c
+++ b/test/h5subfiling_vol.c
@@ -1884,11 +1884,6 @@ H5VL_subfiling_file_create(const char *name, unsigned flags, hid_t fcpl_id,
void *ret_value = NULL;
void *under = NULL;
-#if 0
- file_create_count++;
- printf("%s: count=%d\n", __func__, file_create_count);
- fflush(stdout);
-#endif
#ifdef ENABLE_EXT_PASSTHRU_LOGGING
printf("------- SUBFILING VOL FILE Create\n");
#endif
@@ -2008,11 +2003,6 @@ H5VL_subfiling_file_open(const char *name, unsigned flags, hid_t fapl_id,
void *ret_value = NULL;
void *under = NULL;
-#if 0
- file_open_count++;
- printf("%s: count=%d\n", __func__, file_open_count);
- fflush(stdout);
-#endif
#ifdef ENABLE_EXT_PASSTHRU_LOGGING
printf("------- SUBFILING VOL FILE Open\n");
#endif
@@ -2328,11 +2318,7 @@ H5VL_subfiling_file_close(void *_file, hid_t dxpl_id, void **req)
H5VL_subfiling_file_t *subfiling_file = (H5VL_subfiling_file_t *)o->obj.item.file;
int mpi_enabled = 0;
herr_t ret_value;
-#if 0
- file_close_count++;
- printf("%s: count=%d\n", __func__, file_close_count);
- fflush(stdout);
-#endif
+
if (file_create_count > 0) {
if (file_create_count != file_close_count)
puts("mismatched file_create_count and file_close_count");