summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfile_int.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5FDsubfile_int.c')
-rw-r--r--src/H5FDsubfile_int.c552
1 files changed, 281 insertions, 271 deletions
diff --git a/src/H5FDsubfile_int.c b/src/H5FDsubfile_int.c
index 8abda5e..b99ea36 100644
--- a/src/H5FDsubfile_int.c
+++ b/src/H5FDsubfile_int.c
@@ -39,7 +39,6 @@ Private functions
=========================================
*/
-
/*
--------------------------------------------------------------------------
sf_context_limit -- How many contexts can be recorded (default = 4)
@@ -48,11 +47,11 @@ sf_context_cache -- Storage for contexts
--------------------------------------------------------------------------
*/
// static size_t twoGIG_LIMIT = (1 << 30);
-static size_t sf_context_limit = 16;
-static subfiling_context_t *sf_context_cache = NULL;
-static size_t sf_topology_limit = 4;
-static sf_topology_t *sf_topology_cache = NULL;
-static app_layout_t *sf_app_layout = NULL;
+static size_t sf_context_limit = 16;
+static subfiling_context_t *sf_context_cache = NULL;
+static size_t sf_topology_limit = 4;
+static sf_topology_t * sf_topology_cache = NULL;
+static app_layout_t * sf_app_layout = NULL;
static file_map_to_context_t *sf_open_file_map = NULL;
static int sf_file_map_size = 0;
@@ -64,19 +63,18 @@ static int sf_file_map_size = 0;
---------------------------------------
*/
static stat_record_t subfiling_stats[TOTAL_STAT_COUNT];
-#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count)
-#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total/(double)subfiling_stats[WRITE_STAT].op_count)
-#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total/(double)subfiling_stats[WRITE_WAIT].op_count)
-#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count)
-#define SF_READ_TIME (subfiling_stats[READ_STAT].total/(double)subfiling_stats[READ_STAT].op_count)
-#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total/(double)subfiling_stats[READ_WAIT].op_count)
-#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total)
-
+#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count)
+#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total / (double)subfiling_stats[WRITE_STAT].op_count)
+#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total / (double)subfiling_stats[WRITE_WAIT].op_count)
+#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count)
+#define SF_READ_TIME (subfiling_stats[READ_STAT].total / (double)subfiling_stats[READ_STAT].op_count)
+#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count)
+#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total)
static void
maybe_initialize_statistics(void)
{
- memset(subfiling_stats, 0, sizeof(subfiling_stats));
+ memset(subfiling_stats, 0, sizeof(subfiling_stats));
}
static void clear_fid_map_entry(uint64_t sf_fid);
@@ -87,9 +85,6 @@ Public functions
=========================================
*/
-
-
-
/*
-------------------------------------------------------------------------
Programmer: Richard Warren
@@ -118,7 +113,7 @@ Public functions
void *
get__subfiling_object(int64_t object_id)
{
- int obj_type = (int) ((object_id >> 32) & 0x0FFFF);
+ int obj_type = (int)((object_id >> 32) & 0x0FFFF);
/* We don't require a large indexing space
* 16 bits should be enough..
*/
@@ -130,16 +125,17 @@ get__subfiling_object(int64_t object_id)
* nodes along with the number of MPI ranks on a node.
*/
if (sf_topology_cache == NULL) {
- sf_topology_cache = (sf_topology_t *) calloc(
- sf_topology_limit, sizeof(sf_topology_t));
+ sf_topology_cache = (sf_topology_t *)calloc(sf_topology_limit, sizeof(sf_topology_t));
assert(sf_topology_cache != NULL);
}
if (index < sf_topology_limit) {
- return (void *) &sf_topology_cache[index];
- } else {
+ return (void *)&sf_topology_cache[index];
+ }
+ else {
HDputs("Illegal toplogy object index");
}
- } else if (obj_type == SF_CONTEXT) {
+ }
+ else if (obj_type == SF_CONTEXT) {
/* Contexts provide information principally about
* the application and how the data layout is managed
* over some number of sub-files. The important
@@ -150,27 +146,25 @@ get__subfiling_object(int64_t object_id)
* to facilitate the communication of IO requests.
*/
if (sf_context_cache == NULL) {
- sf_context_cache = (subfiling_context_t *) calloc(
- sf_context_limit, sizeof(subfiling_context_t));
+ sf_context_cache = (subfiling_context_t *)calloc(sf_context_limit, sizeof(subfiling_context_t));
assert(sf_context_cache != NULL);
}
if (index == sf_context_limit) {
sf_context_limit *= 2;
- sf_context_cache = (subfiling_context_t *) realloc(sf_context_cache,
- sf_context_limit * sizeof(subfiling_context_t));
+ sf_context_cache = (subfiling_context_t *)realloc(sf_context_cache,
+ sf_context_limit * sizeof(subfiling_context_t));
assert(sf_context_cache != NULL);
- } else {
- return (void *) &sf_context_cache[index];
}
- } else {
- printf(
- "get__subfiling_object: UNKNOWN Subfiling object type id = 0x%lx\n",
- object_id);
+ else {
+ return (void *)&sf_context_cache[index];
+ }
+ }
+ else {
+ printf("get__subfiling_object: UNKNOWN Subfiling object type id = 0x%lx\n", object_id);
}
return NULL;
} /* end get__subfiling_object() */
-
/*-------------------------------------------------------------------------
* Function: UTILITY FUNCTIONS:
* delete_subfiling_context - removes a context entry in the
@@ -199,7 +193,7 @@ delete_subfiling_context(hid_t context_id)
MPI_Comm_free(&sf_context->sf_intercomm);
}
}
- free(sf_context);
+ /* free(sf_context); */
}
return;
@@ -211,7 +205,7 @@ Public vars (for subfiling) and functions
We probably need a function to set and clear this
======================================================
*/
-int sf_verbose_flag = 0;
+int sf_verbose_flag = 0;
int sf_open_file_count = 0;
/*-------------------------------------------------------------------------
@@ -234,14 +228,16 @@ void
set_verbose_flag(int subfile_rank, int new_value)
{
#ifndef NDEBUG
- sf_verbose_flag = (int) (new_value & 0x0FF);
+ sf_verbose_flag = (int)(new_value & 0x0FF);
if (sf_verbose_flag) {
char logname[64];
sprintf(logname, "ioc_%d.log", subfile_rank);
if (sf_open_file_count > 1)
sf_logfile = fopen(logname, "a+");
- else sf_logfile = fopen(logname, "w+");
- } else if (sf_logfile) {
+ else
+ sf_logfile = fopen(logname, "w+");
+ }
+ else if (sf_logfile) {
fclose(sf_logfile);
sf_logfile = NULL;
}
@@ -287,21 +283,21 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index)
int index;
if (sf_file_map_size == 0) {
int i;
- sf_open_file_map = (file_map_to_context_t *) malloc(
- (size_t) DEFAULT_MAP_ENTRIES * sizeof(file_map_to_context_t));
+ sf_open_file_map =
+ (file_map_to_context_t *)malloc((size_t)DEFAULT_MAP_ENTRIES * sizeof(file_map_to_context_t));
if (sf_open_file_map == NULL) {
perror("malloc");
return FAIL;
}
sf_file_map_size = DEFAULT_MAP_ENTRIES;
for (i = 0; i < sf_file_map_size; i++) {
- sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID;
+ sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID;
sf_open_file_map[i].sf_context_id = 0;
}
}
for (index = 0; index < sf_file_map_size; index++) {
if (sf_open_file_map[index].h5_file_id == (uint64_t)H5I_INVALID_HID) {
- sf_open_file_map[index].h5_file_id = fid;
+ sf_open_file_map[index].h5_file_id = fid;
sf_open_file_map[index].sf_context_id = subfile_context_id;
if (next_index) {
@@ -312,8 +308,8 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index)
}
if (index == sf_file_map_size) {
int i;
- sf_open_file_map = realloc(sf_open_file_map,
- ((size_t)(sf_file_map_size * 2) * sizeof(file_map_to_context_t)));
+ sf_open_file_map =
+ realloc(sf_open_file_map, ((size_t)(sf_file_map_size * 2) * sizeof(file_map_to_context_t)));
if (sf_open_file_map == NULL) {
perror("realloc");
return FAIL;
@@ -327,7 +323,7 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index)
*next_index = index;
}
- sf_open_file_map[index].h5_file_id = fid;
+ sf_open_file_map[index].h5_file_id = fid;
sf_open_file_map[index++].sf_context_id = subfile_context_id;
}
return status;
@@ -370,16 +366,15 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index)
int
open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int flags)
{
- int ret;
- int g_errors = 0;
- int l_errors = 0;
- double start_t = MPI_Wtime();
+ int ret;
+ int g_errors = 0;
+ int l_errors = 0;
+ double start_t = MPI_Wtime();
assert(sf_context != NULL);
#ifdef VERBOSE
- printf("[%s %d]: context_id=%ld\n", __func__,
- sf_context->topology->app_layout->world_rank,
- sf_context->sf_context_id);
+ printf("[%s %d]: context_id=%ld\n", __func__, sf_context->topology->app_layout->world_rank,
+ sf_context->sf_context_id);
#endif
/*
@@ -391,18 +386,22 @@ open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int fla
ret = record_fid_to_subfile(fid, sf_context->sf_context_id, NULL);
if (ret != SUCCEED) {
printf("[%d - %s] Error mapping hdf5 file to a subfiling context\n",
- sf_context->topology->app_layout->world_rank, __func__);
+ sf_context->topology->app_layout->world_rank, __func__);
return -1;
}
if (sf_context->topology->rank_is_ioc) {
- sf_work_request_t msg = {{flags,(int64_t)fid, sf_context->sf_context_id},
- OPEN_OP, sf_context->topology->app_layout->world_rank,
+ sf_work_request_t msg = {{flags, (int64_t)fid, sf_context->sf_context_id},
+ OPEN_OP,
+ sf_context->topology->app_layout->world_rank,
sf_context->topology->subfile_rank,
- sf_context->sf_context_id, start_t, NULL, 0};
+ sf_context->sf_context_id,
+ start_t,
+ NULL,
+ 0};
if (flags & O_CREAT) {
- sf_context->sf_fid = -2;
+ sf_context->sf_fid = -2;
}
l_errors = subfiling_open_file(&msg, sf_context->topology->subfile_rank, flags);
@@ -445,17 +444,34 @@ open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int fla
*-------------------------------------------------------------------------
*/
static int
-close__subfiles( subfiling_context_t *sf_context, uint64_t fid)
+close__subfiles(subfiling_context_t *sf_context, uint64_t fid)
{
- int global_errors = 0, errors = 0;
- int file_open_count;
- int subfile_fid = 0;
- double t0 = 0.0, t1 = 0.0, t2 = 0.0;
- double t_main_exit = 0.0, t_finalize_threads = 0.0;
+ int global_errors = 0, errors = 0;
+ int file_open_count;
+ int subfile_fid = 0;
+ double t0 = 0.0, t1 = 0.0, t2 = 0.0;
+ double t_main_exit = 0.0, t_finalize_threads = 0.0;
HDassert((sf_context != NULL));
t0 = MPI_Wtime();
+#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
+ MPI_Request b_req = MPI_REQUEST_NULL;
+ int mpi_status = MPI_Ibarrier(MPI_COMM_WORLD, &b_req);
+ if (mpi_status == MPI_SUCCESS) {
+ int completed = 0;
+ while (!completed) {
+ useconds_t t_delay = 5;
+ usleep(t_delay);
+ mpi_status = MPI_Test(&b_req, &completed, MPI_STATUS_IGNORE);
+ if (mpi_status != MPI_SUCCESS)
+ completed = 1;
+ }
+ }
+#else
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
/* We make the subfile close operation collective.
* Otherwise, there may be a race condition between
* our closing the subfiles and the user application
@@ -482,67 +498,73 @@ close__subfiles( subfiling_context_t *sf_context, uint64_t fid)
* as part of the file close.
*/
if (file_open_count == 1) {
- /* Shutdown the main IOC thread */
- H5FD_ioc_set_shutdown_flag(1);
- /* Allow ioc_main to exit.*/
- usleep(20);
-
- t1 = MPI_Wtime();
- H5FD_ioc_wait_thread_main();
- t2 = MPI_Wtime();
- t1 = t2;
- t_main_exit = t2-t1;
- H5FD_ioc_finalize_threads();
- t2 = MPI_Wtime();
+ /* Shutdown the main IOC thread */
+ H5FD_ioc_set_shutdown_flag(1);
+ /* Allow ioc_main to exit.*/
+ usleep(20);
+
+ t1 = MPI_Wtime();
+ H5FD_ioc_wait_thread_main();
+ t2 = MPI_Wtime();
+ t1 = t2;
+ t_main_exit = t2 - t1;
+ H5FD_ioc_finalize_threads();
+ t2 = MPI_Wtime();
}
- t_finalize_threads = t2-t1;
+ t_finalize_threads = t2 - t1;
if ((subfile_fid = sf_context->sf_fid) > 0) {
- if (HDclose(subfile_fid) < 0)
+ if (HDclose(subfile_fid) < 0) {
+ perror("close(subfile_fid)");
errors++;
- else {
- sf_context->sf_fid = -1;
- }
+ }
+ else {
+ sf_context->sf_fid = -1;
+ }
}
#ifndef NDEBUG
- /* FIXME: If we've had multiple files open, our statistics
- * will be messed up!
+ /* FIXME: If we've had multiple files open, our statistics
+ * will be messed up!
*/
if (sf_verbose_flag) {
t1 = t2;
if (sf_logfile != NULL) {
- fprintf(sf_logfile, "[%d] main_exit=%lf, finalize_threads=%lf\n",
- sf_context->sf_group_rank, t_main_exit, t_finalize_threads);
- if (SF_WRITE_OPS > 0)
- fprintf(sf_logfile, "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n",
- sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME, (t1 - t0));
- if (SF_READ_OPS > 0)
- fprintf(sf_logfile, "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n",
- sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME, (t1 - t0));
-
- fprintf(sf_logfile,"[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank,
- SF_QUEUE_DELAYS/(double)(SF_WRITE_OPS + SF_READ_OPS));
-
- fflush(sf_logfile);
-
- fclose(sf_logfile);
- sf_logfile = NULL;
+ fprintf(sf_logfile, "[%d] main_exit=%lf, finalize_threads=%lf\n", sf_context->sf_group_rank,
+ t_main_exit, t_finalize_threads);
+ if (SF_WRITE_OPS > 0)
+ fprintf(sf_logfile,
+ "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n",
+ sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME,
+ (t1 - t0));
+ if (SF_READ_OPS > 0)
+ fprintf(sf_logfile,
+ "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n",
+ sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME,
+ (t1 - t0));
+
+ fprintf(sf_logfile, "[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank,
+ SF_QUEUE_DELAYS / (double)(SF_WRITE_OPS + SF_READ_OPS));
+
+ fflush(sf_logfile);
+
+ fclose(sf_logfile);
+ sf_logfile = NULL;
}
}
- if (sf_context->filename) {
- free(sf_context->filename);
- sf_context->filename = NULL;
+ if (sf_context->h5_filename) {
+ free(sf_context->h5_filename);
+ sf_context->h5_filename = NULL;
}
if (sf_context->subfile_prefix) {
- free(sf_context->subfile_prefix);
- sf_context->subfile_prefix = NULL;
+ free(sf_context->subfile_prefix);
+ sf_context->subfile_prefix = NULL;
}
#endif
}
- MPI_Allreduce(&errors, &global_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Allreduce(&errors, &global_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#ifndef NDEBUG
if (sf_verbose_flag) {
@@ -551,9 +573,7 @@ close__subfiles( subfiling_context_t *sf_context, uint64_t fid)
client_log = NULL;
}
}
-#endif
-
-
+#endif
return global_errors;
} /* end close__subfiles() */
@@ -568,83 +588,80 @@ being thread safe.
*/
int
-sf_read_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size,
- int subfile_rank)
+sf_read_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, int subfile_rank)
{
- int ret = 0;
- int retries = MIN_RETRIES;
- useconds_t delay = 100;
- ssize_t bytes_read;
- ssize_t bytes_remaining = (ssize_t) data_size;
- char * this_buffer = data_buffer;
+ int ret = 0;
+ int retries = MIN_RETRIES;
+ useconds_t delay = 100;
+ ssize_t bytes_read;
+ ssize_t bytes_remaining = (ssize_t)data_size;
+ char * this_buffer = data_buffer;
while (bytes_remaining) {
- if ((bytes_read = (ssize_t) pread(
- fd, this_buffer, (size_t) bytes_remaining, file_offset)) < 0) {
+ if ((bytes_read = (ssize_t)pread(fd, this_buffer, (size_t)bytes_remaining, file_offset)) < 0) {
perror("pread failed!");
HDprintf("[ioc(%d) %s] pread(fd, buf, bytes_remaining=%ld, "
- "file_offset =%ld)\n",
- subfile_rank, __func__, bytes_remaining, file_offset);
+ "file_offset =%ld)\n",
+ subfile_rank, __func__, bytes_remaining, file_offset);
HDfflush(stdout);
return -1;
- } else if (bytes_read > 0) {
+ }
+ else if (bytes_read > 0) {
/* reset retry params */
- retries = MIN_RETRIES;
- delay = 100;
+ retries = MIN_RETRIES;
+ delay = 100;
bytes_remaining -= bytes_read;
#ifdef VERBOSE
- printf("[ioc(%d) %s]: read %ld bytes, remaining=%ld, file_offset=%ld\n",subfile_rank,
- __func__, bytes_read, bytes_remaining, file_offset);
+ printf("[ioc(%d) %s]: read %ld bytes, remaining=%ld, file_offset=%ld\n", subfile_rank, __func__,
+ bytes_read, bytes_remaining, file_offset);
fflush(stdout);
#endif
this_buffer += bytes_read;
file_offset += bytes_read;
-
- } else {
- if (retries == 0) {
+ }
+ else {
+ if (retries == 0) {
#ifdef VERBOSE
- printf("[ioc(%d) %s] TIMEOUT: file_offset=%ld, data_size=%ld\n",
- subfile_rank, __func__, file_offset, data_size);
- printf("[ioc(%d) %s] ERROR! read of 0 bytes == eof!\n",
- subfile_rank, __func__);
+ printf("[ioc(%d) %s] TIMEOUT: file_offset=%ld, data_size=%ld\n", subfile_rank, __func__,
+ file_offset, data_size);
+ printf("[ioc(%d) %s] ERROR! read of 0 bytes == eof!\n", subfile_rank, __func__);
fflush(stdout);
#endif
return -2;
}
- retries--;
- usleep(delay);
- delay *= 2;
+ retries--;
+ usleep(delay);
+ delay *= 2;
}
}
return ret;
} /* end sf_read_data() */
int
-sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size,
- int subfile_rank)
+sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, int subfile_rank)
{
- int ret = 0;
- char * this_data = (char *) data_buffer;
- ssize_t bytes_remaining = (ssize_t) data_size;
- ssize_t written = 0;
+ int ret = 0;
+ char * this_data = (char *)data_buffer;
+ ssize_t bytes_remaining = (ssize_t)data_size;
+ ssize_t written = 0;
while (bytes_remaining) {
- if ((written = pwrite(
- fd, this_data, (size_t) bytes_remaining, file_offset)) < 0) {
+ if ((written = pwrite(fd, this_data, (size_t)bytes_remaining, file_offset)) < 0) {
struct stat statbuf;
perror("pwrite failed!");
fstat(fd, &statbuf);
HDprintf("[ioc(%d) %s] pwrite(fd, data, bytes_remaining=%ld, "
- "file_offset=%ld), fd=%d, st_size=%ld\n",
- subfile_rank, __func__, bytes_remaining, file_offset, fd, statbuf.st_size);
+ "file_offset=%ld), fd=%d, st_size=%ld\n",
+ subfile_rank, __func__, bytes_remaining, file_offset, fd, statbuf.st_size);
HDfflush(stdout);
return -1;
- } else {
+ }
+ else {
bytes_remaining -= written;
#ifdef VERBOSE
- printf("[ioc(%d) %s]: wrote %ld bytes, remaining=%ld, file_offset=%ld\n",subfile_rank,
- __func__, written, bytes_remaining, file_offset);
+ printf("[ioc(%d) %s]: wrote %ld bytes, remaining=%ld, file_offset=%ld\n", subfile_rank, __func__,
+ written, bytes_remaining, file_offset);
fflush(stdout);
#endif
this_data += written;
@@ -660,7 +677,6 @@ sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size,
return ret;
} /* end sf_write_data() */
-
/*
* ---------------------------------------------------
* Topology discovery related functions for choosing
@@ -700,8 +716,8 @@ sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size,
static int
compare_hostid(const void *h1, const void *h2)
{
- const layout_t *host1 = (const layout_t *) h1;
- const layout_t *host2 = (const layout_t *) h2;
+ const layout_t *host1 = (const layout_t *)h1;
+ const layout_t *host2 = (const layout_t *)h2;
return (host1->hostid > host2->hostid);
}
@@ -725,8 +741,8 @@ compare_hostid(const void *h1, const void *h2)
static void
gather_topology_info(sf_topology_t *info)
{
- int sf_world_size;
- int sf_world_rank;
+ int sf_world_size;
+ int sf_world_rank;
app_layout_t *app_layout = NULL;
HDassert(info != NULL);
@@ -739,24 +755,22 @@ gather_topology_info(sf_topology_t *info)
return;
if (1) {
- long hostid = gethostid();
- layout_t my_hostinfo;
- app_layout->layout =
- (layout_t *) calloc((size_t) sf_world_size + 1, sizeof(layout_t));
+ long hostid = gethostid();
+ layout_t my_hostinfo;
+ app_layout->layout = (layout_t *)calloc((size_t)sf_world_size + 1, sizeof(layout_t));
if (app_layout->layout == NULL) {
perror("calloc failure!");
MPI_Abort(MPI_COMM_WORLD, 1);
}
app_layout->hostid = hostid;
- my_hostinfo.rank = sf_world_rank;
- my_hostinfo.hostid = hostid;
+ my_hostinfo.rank = sf_world_rank;
+ my_hostinfo.hostid = hostid;
app_layout->layout[sf_world_rank] = my_hostinfo;
if (sf_world_size > 1) {
- if (MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2,
- MPI_LONG, MPI_COMM_WORLD) == MPI_SUCCESS) {
- qsort(app_layout->layout, (size_t) sf_world_size, sizeof(layout_t),
- compare_hostid);
+ if (MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, MPI_COMM_WORLD) ==
+ MPI_SUCCESS) {
+ qsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid);
}
}
}
@@ -785,9 +799,9 @@ gather_topology_info(sf_topology_t *info)
static int
count_nodes(sf_topology_t *info, int my_rank)
{
- int k, node_count, hostid_index = -1;
+ int k, node_count, hostid_index = -1;
app_layout_t *app_layout = NULL;
- long nextid;
+ long nextid;
HDassert(info != NULL);
HDassert((app_layout = info->app_layout) != NULL);
@@ -797,8 +811,7 @@ count_nodes(sf_topology_t *info, int my_rank)
}
if (app_layout->node_ranks == NULL) {
- app_layout->node_ranks = (int *)
- calloc((size_t)(app_layout->world_size + 1), sizeof(int));
+ app_layout->node_ranks = (int *)calloc((size_t)(app_layout->world_size + 1), sizeof(int));
}
HDassert(app_layout->node_ranks != NULL);
@@ -809,8 +822,8 @@ count_nodes(sf_topology_t *info, int my_rank)
hostid_index = 0;
}
- app_layout->node_ranks[0] = 0; /* Add index */
- node_count = 1;
+ app_layout->node_ranks[0] = 0; /* Add index */
+ node_count = 1;
/* Recall that the topology array has been sorted! */
for (k = 1; k < app_layout->world_size; k++) {
@@ -827,11 +840,10 @@ count_nodes(sf_topology_t *info, int my_rank)
/* Mark the end of the node_ranks */
app_layout->node_ranks[node_count] = app_layout->world_size;
/* Save the index where we first located my hostid */
- app_layout->node_index = hostid_index;
+ app_layout->node_index = hostid_index;
return app_layout->node_count = node_count;
} /* end count_nodes() */
-
/*-------------------------------------------------------------------------
* Function: identify_ioc_ranks
*
@@ -848,34 +860,34 @@ count_nodes(sf_topology_t *info, int my_rank)
* As a side effect, we fill the 'ioc_concentrator' vector
* and set the 'rank_is_ioc' flag to TRUE if our rank is
* identified as owning an IO Concentrator (IOC).
- *
+ *
*-------------------------------------------------------------------------
*/
-static int
+static int
identify_ioc_ranks(int node_count, int iocs_per_node, sf_topology_t *info)
{
- int n;
- int total_ioc_count = 0;
- app_layout_t *app_layout = NULL;
+ int n;
+ int total_ioc_count = 0;
+ app_layout_t *app_layout = NULL;
HDassert(info != NULL);
- HDassert((app_layout = info->app_layout) != NULL);
+ HDassert((app_layout = info->app_layout) != NULL);
- for (n=0; n < node_count; n++) {
+ for (n = 0; n < node_count; n++) {
int k;
- int node_index = app_layout->node_ranks[n];
- int local_peer_count = app_layout->node_ranks[n+1] - app_layout->node_ranks[n];
+ int node_index = app_layout->node_ranks[n];
+ int local_peer_count = app_layout->node_ranks[n + 1] - app_layout->node_ranks[n];
info->io_concentrator[total_ioc_count++] = (int)(app_layout->layout[node_index++].rank);
- if (app_layout->layout[node_index-1].rank == app_layout->world_rank) {
- info->subfile_rank = total_ioc_count-1;
- info->rank_is_ioc = TRUE;
+ if (app_layout->layout[node_index - 1].rank == app_layout->world_rank) {
+ info->subfile_rank = total_ioc_count - 1;
+ info->rank_is_ioc = TRUE;
}
- for(k=1; k < iocs_per_node; k++) {
+ for (k = 1; k < iocs_per_node; k++) {
if (k < local_peer_count) {
if (app_layout->layout[node_index].rank == app_layout->world_rank) {
- info->rank_is_ioc = TRUE;
+ info->rank_is_ioc = TRUE;
info->subfile_rank = total_ioc_count;
}
info->io_concentrator[total_ioc_count++] = (int)(app_layout->layout[node_index++].rank);
@@ -888,18 +900,18 @@ identify_ioc_ranks(int node_count, int iocs_per_node, sf_topology_t *info)
} /* end identify_ioc_ranks() */
static inline void
-assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topology_t *app_topology )
+assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topology_t *app_topology)
{
app_layout_t *app_layout = NULL;
/* Validate that the input pointers are not NULL */
HDassert(io_concentrator);
HDassert(app_topology);
HDassert((app_layout = app_topology->app_layout) != NULL);
- /* fill the io_concentrator values based on the application layout */
+ /* fill the io_concentrator values based on the application layout */
if (io_concentrator) {
int k, ioc_next, ioc_index;
- for ( k=0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) {
- ioc_index = rank_multiple * k++;
+ for (k = 0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) {
+ ioc_index = rank_multiple * k++;
io_concentrator[ioc_next] = (int)(app_layout->layout[ioc_index].rank);
if (io_concentrator[ioc_next] == app_layout->world_rank)
app_topology->rank_is_ioc = TRUE;
@@ -908,8 +920,6 @@ assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topo
}
} /* end assign_ioc_ranks() */
-
-
/*-------------------------------------------------------------------------
* Function: fid_map_to_context
*
@@ -932,7 +942,7 @@ fid_map_to_context(uint64_t sf_fid)
if (sf_open_file_map) {
int i;
for (i = 0; i < sf_file_map_size; i++) {
- hid_t sf_context_id = sf_open_file_map[i].sf_context_id;
+ hid_t sf_context_id = sf_open_file_map[i].sf_context_id;
if (sf_open_file_map[i].h5_file_id == sf_fid) {
return sf_context_id;
}
@@ -941,7 +951,6 @@ fid_map_to_context(uint64_t sf_fid)
return H5I_INVALID_HID;
} /* end fid_map_to_context() */
-
/*-------------------------------------------------------------------------
* Function: clear_fid_map_entry
*
@@ -965,7 +974,7 @@ clear_fid_map_entry(uint64_t sf_fid)
int i;
for (i = 0; i < sf_file_map_size; i++) {
if (sf_open_file_map[i].h5_file_id == sf_fid) {
- sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID;
+ sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID;
sf_open_file_map[i].sf_context_id = 0;
return;
}
@@ -1037,39 +1046,38 @@ active_map_entries(void)
*-------------------------------------------------------------------------
*/
int
-H5FD__determine_ioc_count(int world_size, int world_rank,
- ioc_selection_t ioc_select_method, char *ioc_select_option,
- sf_topology_t **thisapp)
+H5FD__determine_ioc_count(int world_size, int world_rank, ioc_selection_t ioc_select_method,
+ char *ioc_select_option, sf_topology_t **thisapp)
{
- int ioc_count = 0;
+ int ioc_count = 0;
ioc_selection_t ioc_selection = ioc_selection_options;
- sf_topology_t * app_topology = NULL;
+ sf_topology_t * app_topology = NULL;
HDassert(thisapp != NULL);
if (!ioc_count || (ioc_selection != ioc_select_method)) {
- int rank_multiple = 0;
- int iocs_per_node = 1;
- char * envValue = NULL;
- int * io_concentrator = NULL;
+ int rank_multiple = 0;
+ int iocs_per_node = 1;
+ char *envValue = NULL;
+ int * io_concentrator = NULL;
if ((app_topology = *thisapp) == NULL) {
- app_topology = (sf_topology_t *) calloc(1, sizeof(sf_topology_t));
+ app_topology = (sf_topology_t *)calloc(1, sizeof(sf_topology_t));
HDassert(app_topology != NULL);
- }
+ }
if (sf_app_layout == NULL) {
- sf_app_layout = (app_layout_t *) calloc(1, sizeof(app_layout_t));
- HDassert (sf_app_layout != NULL);
- }
- /* Once the application layout has been filled once, any additional
+ sf_app_layout = (app_layout_t *)calloc(1, sizeof(app_layout_t));
+ HDassert(sf_app_layout != NULL);
+ }
+ /* Once the application layout has been filled once, any additional
* file open operations won't be required to gather that information.
*/
- app_topology->app_layout = sf_app_layout;
+ app_topology->app_layout = sf_app_layout;
sf_app_layout->world_size = world_size;
sf_app_layout->world_rank = world_rank;
if (app_topology->io_concentrator == NULL) {
app_topology->io_concentrator = io_concentrator =
- (int *) HDmalloc(((size_t) world_size * sizeof(int)));
+ (int *)HDmalloc(((size_t)world_size * sizeof(int)));
}
assert(io_concentrator != NULL);
app_topology->selection_type = ioc_selection = ioc_select_method;
@@ -1082,13 +1090,13 @@ H5FD__determine_ioc_count(int world_size, int world_rank,
if (ioc_select_method == SELECT_IOC_TOTAL) {
if (ioc_select_option) {
int checkValue = atoi(ioc_select_option);
- if ((checkValue <= 0) || (checkValue >= world_size)) {
+ if ((checkValue <= 0) || (checkValue >= world_size)) {
ioc_select_method = SELECT_IOC_ONE_PER_NODE;
goto next;
}
- ioc_count = checkValue;
- rank_multiple = (world_size/checkValue);
+ ioc_count = checkValue;
+ rank_multiple = (world_size / checkValue);
assign_ioc_ranks(io_concentrator, ioc_count, rank_multiple, app_topology);
*thisapp = app_topology;
}
@@ -1112,8 +1120,8 @@ H5FD__determine_ioc_count(int world_size, int world_rank,
goto next;
}
rank_multiple = checkValue;
- ioc_count = (world_size / rank_multiple);
-
+ ioc_count = (world_size / rank_multiple);
+
if ((world_size % rank_multiple) != 0) {
ioc_count++;
}
@@ -1131,35 +1139,35 @@ next:
if (ioc_select_method == SELECT_IOC_ONE_PER_NODE) {
app_topology->selection_type = ioc_select_method;
- ioc_count = count_nodes(app_topology, world_rank);
+ ioc_count = count_nodes(app_topology, world_rank);
- if ((envValue = HDgetenv("IOC_COUNT_PER_NODE")) != NULL) {
+ if ((envValue = HDgetenv("H5_IOC_COUNT_PER_NODE")) != NULL) {
int value_check = atoi(envValue);
if (value_check > 0) {
iocs_per_node = value_check;
}
}
- ioc_count = identify_ioc_ranks( ioc_count, iocs_per_node, app_topology);
+ ioc_count = identify_ioc_ranks(ioc_count, iocs_per_node, app_topology);
}
if (ioc_count > 0) {
app_topology->n_io_concentrators = ioc_count;
/* Create a vector of "potential" file descriptors
- * which can be indexed by the IOC id.
+ * which can be indexed by the IOC id.
*/
- app_topology->subfile_fd = (int *) HDcalloc((size_t)ioc_count, sizeof(int));
+ app_topology->subfile_fd = (int *)HDcalloc((size_t)ioc_count, sizeof(int));
if (app_topology->subfile_fd == NULL) {
HDputs("Failed to allocate vector of subfile fds");
}
*thisapp = app_topology;
}
- } else {
+ }
+ else {
HDputs("Unable to create app_toplogy");
}
return ioc_count;
} /* end H5FD__determine_ioc_count() */
-
/*
-------------------------------------------------------------------------
Programmer: Richard Warren
@@ -1193,8 +1201,9 @@ get_ioc_selection_criteria(ioc_selection_t *selection)
if ((checkValue < 0) || (checkValue >= ioc_selection_options)) {
*selection = SELECT_IOC_ONE_PER_NODE;
return NULL;
- } else {
- *selection = (ioc_selection_t) checkValue;
+ }
+ else {
+ *selection = (ioc_selection_t)checkValue;
return optValue;
}
}
@@ -1219,9 +1228,10 @@ get_ioc_selection_criteria(ioc_selection_t *selection)
*-------------------------------------------------------------------------
*/
int
-H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, subfiling_context_t *newContext)
+H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank,
+ subfiling_context_t *newContext)
{
- MPI_Comm sf_msg_comm = MPI_COMM_NULL;
+ MPI_Comm sf_msg_comm = MPI_COMM_NULL;
MPI_Comm sf_data_comm = MPI_COMM_NULL;
assert(newContext != NULL);
@@ -1229,37 +1239,35 @@ H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, s
int status;
char *envValue = NULL;
- newContext->topology = thisApp;
- newContext->sf_msg_comm = MPI_COMM_NULL;
- newContext->sf_data_comm = MPI_COMM_NULL;
- newContext->sf_group_comm = MPI_COMM_NULL;
- newContext->sf_intercomm = MPI_COMM_NULL;
+ newContext->topology = thisApp;
+ newContext->sf_msg_comm = MPI_COMM_NULL;
+ newContext->sf_data_comm = MPI_COMM_NULL;
+ newContext->sf_group_comm = MPI_COMM_NULL;
+ newContext->sf_intercomm = MPI_COMM_NULL;
newContext->sf_stripe_size = H5FD_DEFAULT_STRIPE_DEPTH;
newContext->sf_write_count = 0;
- newContext->sf_read_count = 0;
- newContext->sf_eof = 0;
+ newContext->sf_read_count = 0;
+ newContext->sf_eof = 0;
- if ((envValue = HDgetenv("IOC_STRIPE_SIZE")) != NULL) {
+ if ((envValue = HDgetenv("H5_IOC_STRIPE_SIZE")) != NULL) {
long value_check = atol(envValue);
if (value_check > 0) {
- newContext->sf_stripe_size = (int64_t) value_check;
+ newContext->sf_stripe_size = (int64_t)value_check;
}
}
- if ((envValue = HDgetenv("IOC_SUBFILE_PREFIX")) != NULL) {
+ if ((envValue = HDgetenv("H5_IOC_SUBFILE_PREFIX")) != NULL) {
char temp[PATH_MAX];
sprintf(temp, "%s", envValue);
newContext->subfile_prefix = strdup(temp);
/* sf_subfile_prefix = strdup(temp); */
}
- newContext->sf_blocksize_per_stripe =
- newContext->sf_stripe_size * n_iocs;
+ newContext->sf_blocksize_per_stripe = newContext->sf_stripe_size * n_iocs;
if (sf_msg_comm == MPI_COMM_NULL) {
status = MPI_Comm_dup(MPI_COMM_WORLD, &newContext->sf_msg_comm);
if (status != MPI_SUCCESS)
goto err_exit;
- status = MPI_Comm_set_errhandler(
- newContext->sf_msg_comm, MPI_ERRORS_RETURN);
+ status = MPI_Comm_set_errhandler(newContext->sf_msg_comm, MPI_ERRORS_RETURN);
if (status != MPI_SUCCESS)
goto err_exit;
sf_msg_comm = newContext->sf_msg_comm;
@@ -1268,31 +1276,29 @@ H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, s
status = MPI_Comm_dup(MPI_COMM_WORLD, &newContext->sf_data_comm);
if (status != MPI_SUCCESS)
goto err_exit;
- status = MPI_Comm_set_errhandler(
- newContext->sf_data_comm, MPI_ERRORS_RETURN);
+ status = MPI_Comm_set_errhandler(newContext->sf_data_comm, MPI_ERRORS_RETURN);
if (status != MPI_SUCCESS)
goto err_exit;
sf_data_comm = newContext->sf_data_comm;
}
if (n_iocs > 1) {
- status = MPI_Comm_split(MPI_COMM_WORLD, thisApp->rank_is_ioc,
- world_rank, &newContext->sf_group_comm);
+ status =
+ MPI_Comm_split(MPI_COMM_WORLD, thisApp->rank_is_ioc, world_rank, &newContext->sf_group_comm);
if (status != MPI_SUCCESS)
goto err_exit;
- status = MPI_Comm_size(
- newContext->sf_group_comm, &newContext->sf_group_size);
+ status = MPI_Comm_size(newContext->sf_group_comm, &newContext->sf_group_size);
if (status != MPI_SUCCESS)
goto err_exit;
- status = MPI_Comm_rank(
- newContext->sf_group_comm, &newContext->sf_group_rank);
+ status = MPI_Comm_rank(newContext->sf_group_comm, &newContext->sf_group_rank);
if (status != MPI_SUCCESS)
goto err_exit;
/*
* There may be additional functionality we need for the IOCs...
* If so, then can probably initialize those things here!
*/
- } else {
+ }
+ else {
newContext->sf_group_size = 1;
newContext->sf_group_rank = 0;
}
@@ -1303,7 +1309,6 @@ err_exit:
return -1;
} /* end H5FD__init_subfile_context() */
-
/*
-------------------------------------------------------------------------
Programmer: Richard Warren
@@ -1329,12 +1334,12 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i
herr_t ret_value = SUCCEED;
int ioc_count;
int world_rank, world_size;
- sf_topology_t * thisApp = NULL;
+ sf_topology_t * thisApp = NULL;
int file_index = active_map_entries();
- int64_t tag = SF_CONTEXT;
+ int64_t tag = SF_CONTEXT;
int64_t context_id = ((tag << 32) | file_index);
- subfiling_context_t *newContext = (subfiling_context_t *) get__subfiling_object(context_id);
- char *envValue = NULL;
+ subfiling_context_t *newContext = (subfiling_context_t *)get__subfiling_object(context_id);
+ char * envValue = NULL;
FUNC_ENTER_API(FAIL)
H5TRACE3("e", "IO*s*!", ioc_select_method, ioc_select_option, sf_context);
@@ -1351,8 +1356,8 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i
}
/* Compute the number an distribution map of the set of IO Concentrators */
- if ((ioc_count = H5FD__determine_ioc_count(world_size, world_rank,
- ioc_select_method, ioc_select_option, &thisApp)) <= 0) {
+ if ((ioc_count = H5FD__determine_ioc_count(world_size, world_rank, ioc_select_method, ioc_select_option,
+ &thisApp)) <= 0) {
HDputs("Unable to register subfiling topology!");
ret_value = FAIL;
goto done;
@@ -1361,15 +1366,16 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i
newContext->sf_context_id = context_id;
/* Maybe set the verbose flag for more debugging info */
- envValue = HDgetenv("SF_VERBOSE_FLAG");
+ envValue = HDgetenv("H5_SF_VERBOSE_FLAG");
if (envValue != NULL) {
int check_value = atoi(envValue);
- if (check_value > 0) sf_verbose_flag = 1;
+ if (check_value > 0)
+ sf_verbose_flag = 1;
}
/* Maybe open client-side log files */
- if (sf_verbose_flag ) {
- manage_client_logfile(world_rank,sf_verbose_flag);
+ if (sf_verbose_flag) {
+ manage_client_logfile(world_rank, sf_verbose_flag);
}
if (H5FD__init_subfile_context(thisApp, ioc_count, world_rank, newContext) != SUCCEED) {
@@ -1382,6 +1388,11 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i
ret_value = FAIL;
goto done;
}
+
+ newContext->sf_base_addr = 0;
+ if (newContext->topology->rank_is_ioc) {
+ newContext->sf_base_addr = (int64_t)(newContext->topology->subfile_rank * newContext->sf_stripe_size);
+ }
*sf_context = context_id;
done:
@@ -1427,7 +1438,7 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags)
// char filepath[PATH_MAX];
// char *slash;
config_common_t *config_info = _config_info;
- char *option_arg = get_ioc_selection_criteria(&ioc_selection);
+ char * option_arg = get_ioc_selection_criteria(&ioc_selection);
HDassert(config_info);
/* Check to see who is calling ths function::
@@ -1437,7 +1448,7 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags)
(config_info->magic != H5FD_SUBFILING_FAPL_T_MAGIC)) {
HDputs("Unrecgonized driver!");
return -1;
- }
+ }
/* Initialize/identify IO Concentrators based on the
* config information that we have...
@@ -1451,19 +1462,19 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags)
/* For statistics gathering */
maybe_initialize_statistics();
- /* Create a new context which is associated with
+ /* Create a new context which is associated with
* this file (context_id)
*/
sf_context = get__subfiling_object(context_id);
assert(sf_context != NULL);
/* Save some basic things in the new context */
- config_info->context_id = context_id;
- sf_context->sf_fid = 0;
+ config_info->context_id = context_id;
+ sf_context->sf_fid = 0;
sf_context->sf_context_id = context_id;
- sf_context->h5_file_id = h5_file_id;
- sf_context->filename = strdup(config_info->file_path);
-
+ sf_context->h5_file_id = h5_file_id;
+ sf_context->h5_filename = strdup(config_info->file_path);
+ sf_context->sf_filename = NULL;
/* Ensure that the IOC service won't exit
* as we prepare to start up..
*/
@@ -1499,4 +1510,3 @@ H5FD__close_subfiles(int64_t context_id)
assert(sf_context != NULL);
return close__subfiles(sf_context, sf_context->h5_file_id);
}
-