diff options
author | Richard.Warren <richard.warren@jelly.ad.hdfgroup.org> | 2021-09-28 01:09:46 (GMT) |
---|---|---|
committer | Richard.Warren <richard.warren@jelly.ad.hdfgroup.org> | 2021-09-28 01:09:46 (GMT) |
commit | ed1979339aac623c7e8049603453c514777816b3 (patch) | |
tree | 3cff579094fc16f931a1d14a52d7e9af96cab41a /src/H5FDsubfile_int.c | |
parent | efbefc77d0f04ce3484baf4a3bdcd2bd72174e5f (diff) | |
download | hdf5-ed1979339aac623c7e8049603453c514777816b3.zip hdf5-ed1979339aac623c7e8049603453c514777816b3.tar.gz hdf5-ed1979339aac623c7e8049603453c514777816b3.tar.bz2 |
Various fixes which allow the IOR benchmark to run correctly
Diffstat (limited to 'src/H5FDsubfile_int.c')
-rw-r--r-- | src/H5FDsubfile_int.c | 552 |
1 files changed, 281 insertions, 271 deletions
diff --git a/src/H5FDsubfile_int.c b/src/H5FDsubfile_int.c index 8abda5e..b99ea36 100644 --- a/src/H5FDsubfile_int.c +++ b/src/H5FDsubfile_int.c @@ -39,7 +39,6 @@ Private functions ========================================= */ - /* -------------------------------------------------------------------------- sf_context_limit -- How many contexts can be recorded (default = 4) @@ -48,11 +47,11 @@ sf_context_cache -- Storage for contexts -------------------------------------------------------------------------- */ // static size_t twoGIG_LIMIT = (1 << 30); -static size_t sf_context_limit = 16; -static subfiling_context_t *sf_context_cache = NULL; -static size_t sf_topology_limit = 4; -static sf_topology_t *sf_topology_cache = NULL; -static app_layout_t *sf_app_layout = NULL; +static size_t sf_context_limit = 16; +static subfiling_context_t *sf_context_cache = NULL; +static size_t sf_topology_limit = 4; +static sf_topology_t * sf_topology_cache = NULL; +static app_layout_t * sf_app_layout = NULL; static file_map_to_context_t *sf_open_file_map = NULL; static int sf_file_map_size = 0; @@ -64,19 +63,18 @@ static int sf_file_map_size = 0; --------------------------------------- */ static stat_record_t subfiling_stats[TOTAL_STAT_COUNT]; -#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total/(double)subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total/(double)subfiling_stats[WRITE_WAIT].op_count) -#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count) -#define SF_READ_TIME (subfiling_stats[READ_STAT].total/(double)subfiling_stats[READ_STAT].op_count) -#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total/(double)subfiling_stats[READ_WAIT].op_count) -#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total) - +#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count) +#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total / (double)subfiling_stats[WRITE_STAT].op_count) +#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total / (double)subfiling_stats[WRITE_WAIT].op_count) +#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count) +#define SF_READ_TIME (subfiling_stats[READ_STAT].total / (double)subfiling_stats[READ_STAT].op_count) +#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count) +#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total) static void maybe_initialize_statistics(void) { - memset(subfiling_stats, 0, sizeof(subfiling_stats)); + memset(subfiling_stats, 0, sizeof(subfiling_stats)); } static void clear_fid_map_entry(uint64_t sf_fid); @@ -87,9 +85,6 @@ Public functions ========================================= */ - - - /* ------------------------------------------------------------------------- Programmer: Richard Warren @@ -118,7 +113,7 @@ Public functions void * get__subfiling_object(int64_t object_id) { - int obj_type = (int) ((object_id >> 32) & 0x0FFFF); + int obj_type = (int)((object_id >> 32) & 0x0FFFF); /* We don't require a large indexing space * 16 bits should be enough.. */ @@ -130,16 +125,17 @@ get__subfiling_object(int64_t object_id) * nodes along with the number of MPI ranks on a node. */ if (sf_topology_cache == NULL) { - sf_topology_cache = (sf_topology_t *) calloc( - sf_topology_limit, sizeof(sf_topology_t)); + sf_topology_cache = (sf_topology_t *)calloc(sf_topology_limit, sizeof(sf_topology_t)); assert(sf_topology_cache != NULL); } if (index < sf_topology_limit) { - return (void *) &sf_topology_cache[index]; - } else { + return (void *)&sf_topology_cache[index]; + } + else { HDputs("Illegal toplogy object index"); } - } else if (obj_type == SF_CONTEXT) { + } + else if (obj_type == SF_CONTEXT) { /* Contexts provide information principally about * the application and how the data layout is managed * over some number of sub-files. The important @@ -150,27 +146,25 @@ get__subfiling_object(int64_t object_id) * to facilitate the communication of IO requests. */ if (sf_context_cache == NULL) { - sf_context_cache = (subfiling_context_t *) calloc( - sf_context_limit, sizeof(subfiling_context_t)); + sf_context_cache = (subfiling_context_t *)calloc(sf_context_limit, sizeof(subfiling_context_t)); assert(sf_context_cache != NULL); } if (index == sf_context_limit) { sf_context_limit *= 2; - sf_context_cache = (subfiling_context_t *) realloc(sf_context_cache, - sf_context_limit * sizeof(subfiling_context_t)); + sf_context_cache = (subfiling_context_t *)realloc(sf_context_cache, + sf_context_limit * sizeof(subfiling_context_t)); assert(sf_context_cache != NULL); - } else { - return (void *) &sf_context_cache[index]; } - } else { - printf( - "get__subfiling_object: UNKNOWN Subfiling object type id = 0x%lx\n", - object_id); + else { + return (void *)&sf_context_cache[index]; + } + } + else { + printf("get__subfiling_object: UNKNOWN Subfiling object type id = 0x%lx\n", object_id); } return NULL; } /* end get__subfiling_object() */ - /*------------------------------------------------------------------------- * Function: UTILITY FUNCTIONS: * delete_subfiling_context - removes a context entry in the @@ -199,7 +193,7 @@ delete_subfiling_context(hid_t context_id) MPI_Comm_free(&sf_context->sf_intercomm); } } - free(sf_context); + /* free(sf_context); */ } return; @@ -211,7 +205,7 @@ Public vars (for subfiling) and functions We probably need a function to set and clear this ====================================================== */ -int sf_verbose_flag = 0; +int sf_verbose_flag = 0; int sf_open_file_count = 0; /*------------------------------------------------------------------------- @@ -234,14 +228,16 @@ void set_verbose_flag(int subfile_rank, int new_value) { #ifndef NDEBUG - sf_verbose_flag = (int) (new_value & 0x0FF); + sf_verbose_flag = (int)(new_value & 0x0FF); if (sf_verbose_flag) { char logname[64]; sprintf(logname, "ioc_%d.log", subfile_rank); if (sf_open_file_count > 1) sf_logfile = fopen(logname, "a+"); - else sf_logfile = fopen(logname, "w+"); - } else if (sf_logfile) { + else + sf_logfile = fopen(logname, "w+"); + } + else if (sf_logfile) { fclose(sf_logfile); sf_logfile = NULL; } @@ -287,21 +283,21 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index) int index; if (sf_file_map_size == 0) { int i; - sf_open_file_map = (file_map_to_context_t *) malloc( - (size_t) DEFAULT_MAP_ENTRIES * sizeof(file_map_to_context_t)); + sf_open_file_map = + (file_map_to_context_t *)malloc((size_t)DEFAULT_MAP_ENTRIES * sizeof(file_map_to_context_t)); if (sf_open_file_map == NULL) { perror("malloc"); return FAIL; } sf_file_map_size = DEFAULT_MAP_ENTRIES; for (i = 0; i < sf_file_map_size; i++) { - sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID; + sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID; sf_open_file_map[i].sf_context_id = 0; } } for (index = 0; index < sf_file_map_size; index++) { if (sf_open_file_map[index].h5_file_id == (uint64_t)H5I_INVALID_HID) { - sf_open_file_map[index].h5_file_id = fid; + sf_open_file_map[index].h5_file_id = fid; sf_open_file_map[index].sf_context_id = subfile_context_id; if (next_index) { @@ -312,8 +308,8 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index) } if (index == sf_file_map_size) { int i; - sf_open_file_map = realloc(sf_open_file_map, - ((size_t)(sf_file_map_size * 2) * sizeof(file_map_to_context_t))); + sf_open_file_map = + realloc(sf_open_file_map, ((size_t)(sf_file_map_size * 2) * sizeof(file_map_to_context_t))); if (sf_open_file_map == NULL) { perror("realloc"); return FAIL; @@ -327,7 +323,7 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index) *next_index = index; } - sf_open_file_map[index].h5_file_id = fid; + sf_open_file_map[index].h5_file_id = fid; sf_open_file_map[index++].sf_context_id = subfile_context_id; } return status; @@ -370,16 +366,15 @@ record_fid_to_subfile(uint64_t fid, hid_t subfile_context_id, int *next_index) int open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int flags) { - int ret; - int g_errors = 0; - int l_errors = 0; - double start_t = MPI_Wtime(); + int ret; + int g_errors = 0; + int l_errors = 0; + double start_t = MPI_Wtime(); assert(sf_context != NULL); #ifdef VERBOSE - printf("[%s %d]: context_id=%ld\n", __func__, - sf_context->topology->app_layout->world_rank, - sf_context->sf_context_id); + printf("[%s %d]: context_id=%ld\n", __func__, sf_context->topology->app_layout->world_rank, + sf_context->sf_context_id); #endif /* @@ -391,18 +386,22 @@ open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int fla ret = record_fid_to_subfile(fid, sf_context->sf_context_id, NULL); if (ret != SUCCEED) { printf("[%d - %s] Error mapping hdf5 file to a subfiling context\n", - sf_context->topology->app_layout->world_rank, __func__); + sf_context->topology->app_layout->world_rank, __func__); return -1; } if (sf_context->topology->rank_is_ioc) { - sf_work_request_t msg = {{flags,(int64_t)fid, sf_context->sf_context_id}, - OPEN_OP, sf_context->topology->app_layout->world_rank, + sf_work_request_t msg = {{flags, (int64_t)fid, sf_context->sf_context_id}, + OPEN_OP, + sf_context->topology->app_layout->world_rank, sf_context->topology->subfile_rank, - sf_context->sf_context_id, start_t, NULL, 0}; + sf_context->sf_context_id, + start_t, + NULL, + 0}; if (flags & O_CREAT) { - sf_context->sf_fid = -2; + sf_context->sf_fid = -2; } l_errors = subfiling_open_file(&msg, sf_context->topology->subfile_rank, flags); @@ -445,17 +444,34 @@ open_subfile_with_context(subfiling_context_t *sf_context, uint64_t fid, int fla *------------------------------------------------------------------------- */ static int -close__subfiles( subfiling_context_t *sf_context, uint64_t fid) +close__subfiles(subfiling_context_t *sf_context, uint64_t fid) { - int global_errors = 0, errors = 0; - int file_open_count; - int subfile_fid = 0; - double t0 = 0.0, t1 = 0.0, t2 = 0.0; - double t_main_exit = 0.0, t_finalize_threads = 0.0; + int global_errors = 0, errors = 0; + int file_open_count; + int subfile_fid = 0; + double t0 = 0.0, t1 = 0.0, t2 = 0.0; + double t_main_exit = 0.0, t_finalize_threads = 0.0; HDassert((sf_context != NULL)); t0 = MPI_Wtime(); +#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1 + MPI_Request b_req = MPI_REQUEST_NULL; + int mpi_status = MPI_Ibarrier(MPI_COMM_WORLD, &b_req); + if (mpi_status == MPI_SUCCESS) { + int completed = 0; + while (!completed) { + useconds_t t_delay = 5; + usleep(t_delay); + mpi_status = MPI_Test(&b_req, &completed, MPI_STATUS_IGNORE); + if (mpi_status != MPI_SUCCESS) + completed = 1; + } + } +#else + MPI_Barrier(MPI_COMM_WORLD); +#endif + /* We make the subfile close operation collective. * Otherwise, there may be a race condition between * our closing the subfiles and the user application @@ -482,67 +498,73 @@ close__subfiles( subfiling_context_t *sf_context, uint64_t fid) * as part of the file close. */ if (file_open_count == 1) { - /* Shutdown the main IOC thread */ - H5FD_ioc_set_shutdown_flag(1); - /* Allow ioc_main to exit.*/ - usleep(20); - - t1 = MPI_Wtime(); - H5FD_ioc_wait_thread_main(); - t2 = MPI_Wtime(); - t1 = t2; - t_main_exit = t2-t1; - H5FD_ioc_finalize_threads(); - t2 = MPI_Wtime(); + /* Shutdown the main IOC thread */ + H5FD_ioc_set_shutdown_flag(1); + /* Allow ioc_main to exit.*/ + usleep(20); + + t1 = MPI_Wtime(); + H5FD_ioc_wait_thread_main(); + t2 = MPI_Wtime(); + t1 = t2; + t_main_exit = t2 - t1; + H5FD_ioc_finalize_threads(); + t2 = MPI_Wtime(); } - t_finalize_threads = t2-t1; + t_finalize_threads = t2 - t1; if ((subfile_fid = sf_context->sf_fid) > 0) { - if (HDclose(subfile_fid) < 0) + if (HDclose(subfile_fid) < 0) { + perror("close(subfile_fid)"); errors++; - else { - sf_context->sf_fid = -1; - } + } + else { + sf_context->sf_fid = -1; + } } #ifndef NDEBUG - /* FIXME: If we've had multiple files open, our statistics - * will be messed up! + /* FIXME: If we've had multiple files open, our statistics + * will be messed up! */ if (sf_verbose_flag) { t1 = t2; if (sf_logfile != NULL) { - fprintf(sf_logfile, "[%d] main_exit=%lf, finalize_threads=%lf\n", - sf_context->sf_group_rank, t_main_exit, t_finalize_threads); - if (SF_WRITE_OPS > 0) - fprintf(sf_logfile, "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME, (t1 - t0)); - if (SF_READ_OPS > 0) - fprintf(sf_logfile, "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME, (t1 - t0)); - - fprintf(sf_logfile,"[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank, - SF_QUEUE_DELAYS/(double)(SF_WRITE_OPS + SF_READ_OPS)); - - fflush(sf_logfile); - - fclose(sf_logfile); - sf_logfile = NULL; + fprintf(sf_logfile, "[%d] main_exit=%lf, finalize_threads=%lf\n", sf_context->sf_group_rank, + t_main_exit, t_finalize_threads); + if (SF_WRITE_OPS > 0) + fprintf(sf_logfile, + "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n", + sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME, + (t1 - t0)); + if (SF_READ_OPS > 0) + fprintf(sf_logfile, + "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n", + sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME, + (t1 - t0)); + + fprintf(sf_logfile, "[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank, + SF_QUEUE_DELAYS / (double)(SF_WRITE_OPS + SF_READ_OPS)); + + fflush(sf_logfile); + + fclose(sf_logfile); + sf_logfile = NULL; } } - if (sf_context->filename) { - free(sf_context->filename); - sf_context->filename = NULL; + if (sf_context->h5_filename) { + free(sf_context->h5_filename); + sf_context->h5_filename = NULL; } if (sf_context->subfile_prefix) { - free(sf_context->subfile_prefix); - sf_context->subfile_prefix = NULL; + free(sf_context->subfile_prefix); + sf_context->subfile_prefix = NULL; } #endif } - MPI_Allreduce(&errors, &global_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&errors, &global_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); #ifndef NDEBUG if (sf_verbose_flag) { @@ -551,9 +573,7 @@ close__subfiles( subfiling_context_t *sf_context, uint64_t fid) client_log = NULL; } } -#endif - - +#endif return global_errors; } /* end close__subfiles() */ @@ -568,83 +588,80 @@ being thread safe. */ int -sf_read_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, - int subfile_rank) +sf_read_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, int subfile_rank) { - int ret = 0; - int retries = MIN_RETRIES; - useconds_t delay = 100; - ssize_t bytes_read; - ssize_t bytes_remaining = (ssize_t) data_size; - char * this_buffer = data_buffer; + int ret = 0; + int retries = MIN_RETRIES; + useconds_t delay = 100; + ssize_t bytes_read; + ssize_t bytes_remaining = (ssize_t)data_size; + char * this_buffer = data_buffer; while (bytes_remaining) { - if ((bytes_read = (ssize_t) pread( - fd, this_buffer, (size_t) bytes_remaining, file_offset)) < 0) { + if ((bytes_read = (ssize_t)pread(fd, this_buffer, (size_t)bytes_remaining, file_offset)) < 0) { perror("pread failed!"); HDprintf("[ioc(%d) %s] pread(fd, buf, bytes_remaining=%ld, " - "file_offset =%ld)\n", - subfile_rank, __func__, bytes_remaining, file_offset); + "file_offset =%ld)\n", + subfile_rank, __func__, bytes_remaining, file_offset); HDfflush(stdout); return -1; - } else if (bytes_read > 0) { + } + else if (bytes_read > 0) { /* reset retry params */ - retries = MIN_RETRIES; - delay = 100; + retries = MIN_RETRIES; + delay = 100; bytes_remaining -= bytes_read; #ifdef VERBOSE - printf("[ioc(%d) %s]: read %ld bytes, remaining=%ld, file_offset=%ld\n",subfile_rank, - __func__, bytes_read, bytes_remaining, file_offset); + printf("[ioc(%d) %s]: read %ld bytes, remaining=%ld, file_offset=%ld\n", subfile_rank, __func__, + bytes_read, bytes_remaining, file_offset); fflush(stdout); #endif this_buffer += bytes_read; file_offset += bytes_read; - - } else { - if (retries == 0) { + } + else { + if (retries == 0) { #ifdef VERBOSE - printf("[ioc(%d) %s] TIMEOUT: file_offset=%ld, data_size=%ld\n", - subfile_rank, __func__, file_offset, data_size); - printf("[ioc(%d) %s] ERROR! read of 0 bytes == eof!\n", - subfile_rank, __func__); + printf("[ioc(%d) %s] TIMEOUT: file_offset=%ld, data_size=%ld\n", subfile_rank, __func__, + file_offset, data_size); + printf("[ioc(%d) %s] ERROR! read of 0 bytes == eof!\n", subfile_rank, __func__); fflush(stdout); #endif return -2; } - retries--; - usleep(delay); - delay *= 2; + retries--; + usleep(delay); + delay *= 2; } } return ret; } /* end sf_read_data() */ int -sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, - int subfile_rank) +sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, int subfile_rank) { - int ret = 0; - char * this_data = (char *) data_buffer; - ssize_t bytes_remaining = (ssize_t) data_size; - ssize_t written = 0; + int ret = 0; + char * this_data = (char *)data_buffer; + ssize_t bytes_remaining = (ssize_t)data_size; + ssize_t written = 0; while (bytes_remaining) { - if ((written = pwrite( - fd, this_data, (size_t) bytes_remaining, file_offset)) < 0) { + if ((written = pwrite(fd, this_data, (size_t)bytes_remaining, file_offset)) < 0) { struct stat statbuf; perror("pwrite failed!"); fstat(fd, &statbuf); HDprintf("[ioc(%d) %s] pwrite(fd, data, bytes_remaining=%ld, " - "file_offset=%ld), fd=%d, st_size=%ld\n", - subfile_rank, __func__, bytes_remaining, file_offset, fd, statbuf.st_size); + "file_offset=%ld), fd=%d, st_size=%ld\n", + subfile_rank, __func__, bytes_remaining, file_offset, fd, statbuf.st_size); HDfflush(stdout); return -1; - } else { + } + else { bytes_remaining -= written; #ifdef VERBOSE - printf("[ioc(%d) %s]: wrote %ld bytes, remaining=%ld, file_offset=%ld\n",subfile_rank, - __func__, written, bytes_remaining, file_offset); + printf("[ioc(%d) %s]: wrote %ld bytes, remaining=%ld, file_offset=%ld\n", subfile_rank, __func__, + written, bytes_remaining, file_offset); fflush(stdout); #endif this_data += written; @@ -660,7 +677,6 @@ sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, return ret; } /* end sf_write_data() */ - /* * --------------------------------------------------- * Topology discovery related functions for choosing @@ -700,8 +716,8 @@ sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size, static int compare_hostid(const void *h1, const void *h2) { - const layout_t *host1 = (const layout_t *) h1; - const layout_t *host2 = (const layout_t *) h2; + const layout_t *host1 = (const layout_t *)h1; + const layout_t *host2 = (const layout_t *)h2; return (host1->hostid > host2->hostid); } @@ -725,8 +741,8 @@ compare_hostid(const void *h1, const void *h2) static void gather_topology_info(sf_topology_t *info) { - int sf_world_size; - int sf_world_rank; + int sf_world_size; + int sf_world_rank; app_layout_t *app_layout = NULL; HDassert(info != NULL); @@ -739,24 +755,22 @@ gather_topology_info(sf_topology_t *info) return; if (1) { - long hostid = gethostid(); - layout_t my_hostinfo; - app_layout->layout = - (layout_t *) calloc((size_t) sf_world_size + 1, sizeof(layout_t)); + long hostid = gethostid(); + layout_t my_hostinfo; + app_layout->layout = (layout_t *)calloc((size_t)sf_world_size + 1, sizeof(layout_t)); if (app_layout->layout == NULL) { perror("calloc failure!"); MPI_Abort(MPI_COMM_WORLD, 1); } app_layout->hostid = hostid; - my_hostinfo.rank = sf_world_rank; - my_hostinfo.hostid = hostid; + my_hostinfo.rank = sf_world_rank; + my_hostinfo.hostid = hostid; app_layout->layout[sf_world_rank] = my_hostinfo; if (sf_world_size > 1) { - if (MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, - MPI_LONG, MPI_COMM_WORLD) == MPI_SUCCESS) { - qsort(app_layout->layout, (size_t) sf_world_size, sizeof(layout_t), - compare_hostid); + if (MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, MPI_COMM_WORLD) == + MPI_SUCCESS) { + qsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid); } } } @@ -785,9 +799,9 @@ gather_topology_info(sf_topology_t *info) static int count_nodes(sf_topology_t *info, int my_rank) { - int k, node_count, hostid_index = -1; + int k, node_count, hostid_index = -1; app_layout_t *app_layout = NULL; - long nextid; + long nextid; HDassert(info != NULL); HDassert((app_layout = info->app_layout) != NULL); @@ -797,8 +811,7 @@ count_nodes(sf_topology_t *info, int my_rank) } if (app_layout->node_ranks == NULL) { - app_layout->node_ranks = (int *) - calloc((size_t)(app_layout->world_size + 1), sizeof(int)); + app_layout->node_ranks = (int *)calloc((size_t)(app_layout->world_size + 1), sizeof(int)); } HDassert(app_layout->node_ranks != NULL); @@ -809,8 +822,8 @@ count_nodes(sf_topology_t *info, int my_rank) hostid_index = 0; } - app_layout->node_ranks[0] = 0; /* Add index */ - node_count = 1; + app_layout->node_ranks[0] = 0; /* Add index */ + node_count = 1; /* Recall that the topology array has been sorted! */ for (k = 1; k < app_layout->world_size; k++) { @@ -827,11 +840,10 @@ count_nodes(sf_topology_t *info, int my_rank) /* Mark the end of the node_ranks */ app_layout->node_ranks[node_count] = app_layout->world_size; /* Save the index where we first located my hostid */ - app_layout->node_index = hostid_index; + app_layout->node_index = hostid_index; return app_layout->node_count = node_count; } /* end count_nodes() */ - /*------------------------------------------------------------------------- * Function: identify_ioc_ranks * @@ -848,34 +860,34 @@ count_nodes(sf_topology_t *info, int my_rank) * As a side effect, we fill the 'ioc_concentrator' vector * and set the 'rank_is_ioc' flag to TRUE if our rank is * identified as owning an IO Concentrator (IOC). - * + * *------------------------------------------------------------------------- */ -static int +static int identify_ioc_ranks(int node_count, int iocs_per_node, sf_topology_t *info) { - int n; - int total_ioc_count = 0; - app_layout_t *app_layout = NULL; + int n; + int total_ioc_count = 0; + app_layout_t *app_layout = NULL; HDassert(info != NULL); - HDassert((app_layout = info->app_layout) != NULL); + HDassert((app_layout = info->app_layout) != NULL); - for (n=0; n < node_count; n++) { + for (n = 0; n < node_count; n++) { int k; - int node_index = app_layout->node_ranks[n]; - int local_peer_count = app_layout->node_ranks[n+1] - app_layout->node_ranks[n]; + int node_index = app_layout->node_ranks[n]; + int local_peer_count = app_layout->node_ranks[n + 1] - app_layout->node_ranks[n]; info->io_concentrator[total_ioc_count++] = (int)(app_layout->layout[node_index++].rank); - if (app_layout->layout[node_index-1].rank == app_layout->world_rank) { - info->subfile_rank = total_ioc_count-1; - info->rank_is_ioc = TRUE; + if (app_layout->layout[node_index - 1].rank == app_layout->world_rank) { + info->subfile_rank = total_ioc_count - 1; + info->rank_is_ioc = TRUE; } - for(k=1; k < iocs_per_node; k++) { + for (k = 1; k < iocs_per_node; k++) { if (k < local_peer_count) { if (app_layout->layout[node_index].rank == app_layout->world_rank) { - info->rank_is_ioc = TRUE; + info->rank_is_ioc = TRUE; info->subfile_rank = total_ioc_count; } info->io_concentrator[total_ioc_count++] = (int)(app_layout->layout[node_index++].rank); @@ -888,18 +900,18 @@ identify_ioc_ranks(int node_count, int iocs_per_node, sf_topology_t *info) } /* end identify_ioc_ranks() */ static inline void -assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topology_t *app_topology ) +assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topology_t *app_topology) { app_layout_t *app_layout = NULL; /* Validate that the input pointers are not NULL */ HDassert(io_concentrator); HDassert(app_topology); HDassert((app_layout = app_topology->app_layout) != NULL); - /* fill the io_concentrator values based on the application layout */ + /* fill the io_concentrator values based on the application layout */ if (io_concentrator) { int k, ioc_next, ioc_index; - for ( k=0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) { - ioc_index = rank_multiple * k++; + for (k = 0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) { + ioc_index = rank_multiple * k++; io_concentrator[ioc_next] = (int)(app_layout->layout[ioc_index].rank); if (io_concentrator[ioc_next] == app_layout->world_rank) app_topology->rank_is_ioc = TRUE; @@ -908,8 +920,6 @@ assign_ioc_ranks(int *io_concentrator, int ioc_count, int rank_multiple, sf_topo } } /* end assign_ioc_ranks() */ - - /*------------------------------------------------------------------------- * Function: fid_map_to_context * @@ -932,7 +942,7 @@ fid_map_to_context(uint64_t sf_fid) if (sf_open_file_map) { int i; for (i = 0; i < sf_file_map_size; i++) { - hid_t sf_context_id = sf_open_file_map[i].sf_context_id; + hid_t sf_context_id = sf_open_file_map[i].sf_context_id; if (sf_open_file_map[i].h5_file_id == sf_fid) { return sf_context_id; } @@ -941,7 +951,6 @@ fid_map_to_context(uint64_t sf_fid) return H5I_INVALID_HID; } /* end fid_map_to_context() */ - /*------------------------------------------------------------------------- * Function: clear_fid_map_entry * @@ -965,7 +974,7 @@ clear_fid_map_entry(uint64_t sf_fid) int i; for (i = 0; i < sf_file_map_size; i++) { if (sf_open_file_map[i].h5_file_id == sf_fid) { - sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID; + sf_open_file_map[i].h5_file_id = (uint64_t)H5I_INVALID_HID; sf_open_file_map[i].sf_context_id = 0; return; } @@ -1037,39 +1046,38 @@ active_map_entries(void) *------------------------------------------------------------------------- */ int -H5FD__determine_ioc_count(int world_size, int world_rank, - ioc_selection_t ioc_select_method, char *ioc_select_option, - sf_topology_t **thisapp) +H5FD__determine_ioc_count(int world_size, int world_rank, ioc_selection_t ioc_select_method, + char *ioc_select_option, sf_topology_t **thisapp) { - int ioc_count = 0; + int ioc_count = 0; ioc_selection_t ioc_selection = ioc_selection_options; - sf_topology_t * app_topology = NULL; + sf_topology_t * app_topology = NULL; HDassert(thisapp != NULL); if (!ioc_count || (ioc_selection != ioc_select_method)) { - int rank_multiple = 0; - int iocs_per_node = 1; - char * envValue = NULL; - int * io_concentrator = NULL; + int rank_multiple = 0; + int iocs_per_node = 1; + char *envValue = NULL; + int * io_concentrator = NULL; if ((app_topology = *thisapp) == NULL) { - app_topology = (sf_topology_t *) calloc(1, sizeof(sf_topology_t)); + app_topology = (sf_topology_t *)calloc(1, sizeof(sf_topology_t)); HDassert(app_topology != NULL); - } + } if (sf_app_layout == NULL) { - sf_app_layout = (app_layout_t *) calloc(1, sizeof(app_layout_t)); - HDassert (sf_app_layout != NULL); - } - /* Once the application layout has been filled once, any additional + sf_app_layout = (app_layout_t *)calloc(1, sizeof(app_layout_t)); + HDassert(sf_app_layout != NULL); + } + /* Once the application layout has been filled once, any additional * file open operations won't be required to gather that information. */ - app_topology->app_layout = sf_app_layout; + app_topology->app_layout = sf_app_layout; sf_app_layout->world_size = world_size; sf_app_layout->world_rank = world_rank; if (app_topology->io_concentrator == NULL) { app_topology->io_concentrator = io_concentrator = - (int *) HDmalloc(((size_t) world_size * sizeof(int))); + (int *)HDmalloc(((size_t)world_size * sizeof(int))); } assert(io_concentrator != NULL); app_topology->selection_type = ioc_selection = ioc_select_method; @@ -1082,13 +1090,13 @@ H5FD__determine_ioc_count(int world_size, int world_rank, if (ioc_select_method == SELECT_IOC_TOTAL) { if (ioc_select_option) { int checkValue = atoi(ioc_select_option); - if ((checkValue <= 0) || (checkValue >= world_size)) { + if ((checkValue <= 0) || (checkValue >= world_size)) { ioc_select_method = SELECT_IOC_ONE_PER_NODE; goto next; } - ioc_count = checkValue; - rank_multiple = (world_size/checkValue); + ioc_count = checkValue; + rank_multiple = (world_size / checkValue); assign_ioc_ranks(io_concentrator, ioc_count, rank_multiple, app_topology); *thisapp = app_topology; } @@ -1112,8 +1120,8 @@ H5FD__determine_ioc_count(int world_size, int world_rank, goto next; } rank_multiple = checkValue; - ioc_count = (world_size / rank_multiple); - + ioc_count = (world_size / rank_multiple); + if ((world_size % rank_multiple) != 0) { ioc_count++; } @@ -1131,35 +1139,35 @@ next: if (ioc_select_method == SELECT_IOC_ONE_PER_NODE) { app_topology->selection_type = ioc_select_method; - ioc_count = count_nodes(app_topology, world_rank); + ioc_count = count_nodes(app_topology, world_rank); - if ((envValue = HDgetenv("IOC_COUNT_PER_NODE")) != NULL) { + if ((envValue = HDgetenv("H5_IOC_COUNT_PER_NODE")) != NULL) { int value_check = atoi(envValue); if (value_check > 0) { iocs_per_node = value_check; } } - ioc_count = identify_ioc_ranks( ioc_count, iocs_per_node, app_topology); + ioc_count = identify_ioc_ranks(ioc_count, iocs_per_node, app_topology); } if (ioc_count > 0) { app_topology->n_io_concentrators = ioc_count; /* Create a vector of "potential" file descriptors - * which can be indexed by the IOC id. + * which can be indexed by the IOC id. */ - app_topology->subfile_fd = (int *) HDcalloc((size_t)ioc_count, sizeof(int)); + app_topology->subfile_fd = (int *)HDcalloc((size_t)ioc_count, sizeof(int)); if (app_topology->subfile_fd == NULL) { HDputs("Failed to allocate vector of subfile fds"); } *thisapp = app_topology; } - } else { + } + else { HDputs("Unable to create app_toplogy"); } return ioc_count; } /* end H5FD__determine_ioc_count() */ - /* ------------------------------------------------------------------------- Programmer: Richard Warren @@ -1193,8 +1201,9 @@ get_ioc_selection_criteria(ioc_selection_t *selection) if ((checkValue < 0) || (checkValue >= ioc_selection_options)) { *selection = SELECT_IOC_ONE_PER_NODE; return NULL; - } else { - *selection = (ioc_selection_t) checkValue; + } + else { + *selection = (ioc_selection_t)checkValue; return optValue; } } @@ -1219,9 +1228,10 @@ get_ioc_selection_criteria(ioc_selection_t *selection) *------------------------------------------------------------------------- */ int -H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, subfiling_context_t *newContext) +H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, + subfiling_context_t *newContext) { - MPI_Comm sf_msg_comm = MPI_COMM_NULL; + MPI_Comm sf_msg_comm = MPI_COMM_NULL; MPI_Comm sf_data_comm = MPI_COMM_NULL; assert(newContext != NULL); @@ -1229,37 +1239,35 @@ H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, s int status; char *envValue = NULL; - newContext->topology = thisApp; - newContext->sf_msg_comm = MPI_COMM_NULL; - newContext->sf_data_comm = MPI_COMM_NULL; - newContext->sf_group_comm = MPI_COMM_NULL; - newContext->sf_intercomm = MPI_COMM_NULL; + newContext->topology = thisApp; + newContext->sf_msg_comm = MPI_COMM_NULL; + newContext->sf_data_comm = MPI_COMM_NULL; + newContext->sf_group_comm = MPI_COMM_NULL; + newContext->sf_intercomm = MPI_COMM_NULL; newContext->sf_stripe_size = H5FD_DEFAULT_STRIPE_DEPTH; newContext->sf_write_count = 0; - newContext->sf_read_count = 0; - newContext->sf_eof = 0; + newContext->sf_read_count = 0; + newContext->sf_eof = 0; - if ((envValue = HDgetenv("IOC_STRIPE_SIZE")) != NULL) { + if ((envValue = HDgetenv("H5_IOC_STRIPE_SIZE")) != NULL) { long value_check = atol(envValue); if (value_check > 0) { - newContext->sf_stripe_size = (int64_t) value_check; + newContext->sf_stripe_size = (int64_t)value_check; } } - if ((envValue = HDgetenv("IOC_SUBFILE_PREFIX")) != NULL) { + if ((envValue = HDgetenv("H5_IOC_SUBFILE_PREFIX")) != NULL) { char temp[PATH_MAX]; sprintf(temp, "%s", envValue); newContext->subfile_prefix = strdup(temp); /* sf_subfile_prefix = strdup(temp); */ } - newContext->sf_blocksize_per_stripe = - newContext->sf_stripe_size * n_iocs; + newContext->sf_blocksize_per_stripe = newContext->sf_stripe_size * n_iocs; if (sf_msg_comm == MPI_COMM_NULL) { status = MPI_Comm_dup(MPI_COMM_WORLD, &newContext->sf_msg_comm); if (status != MPI_SUCCESS) goto err_exit; - status = MPI_Comm_set_errhandler( - newContext->sf_msg_comm, MPI_ERRORS_RETURN); + status = MPI_Comm_set_errhandler(newContext->sf_msg_comm, MPI_ERRORS_RETURN); if (status != MPI_SUCCESS) goto err_exit; sf_msg_comm = newContext->sf_msg_comm; @@ -1268,31 +1276,29 @@ H5FD__init_subfile_context(sf_topology_t *thisApp, int n_iocs, int world_rank, s status = MPI_Comm_dup(MPI_COMM_WORLD, &newContext->sf_data_comm); if (status != MPI_SUCCESS) goto err_exit; - status = MPI_Comm_set_errhandler( - newContext->sf_data_comm, MPI_ERRORS_RETURN); + status = MPI_Comm_set_errhandler(newContext->sf_data_comm, MPI_ERRORS_RETURN); if (status != MPI_SUCCESS) goto err_exit; sf_data_comm = newContext->sf_data_comm; } if (n_iocs > 1) { - status = MPI_Comm_split(MPI_COMM_WORLD, thisApp->rank_is_ioc, - world_rank, &newContext->sf_group_comm); + status = + MPI_Comm_split(MPI_COMM_WORLD, thisApp->rank_is_ioc, world_rank, &newContext->sf_group_comm); if (status != MPI_SUCCESS) goto err_exit; - status = MPI_Comm_size( - newContext->sf_group_comm, &newContext->sf_group_size); + status = MPI_Comm_size(newContext->sf_group_comm, &newContext->sf_group_size); if (status != MPI_SUCCESS) goto err_exit; - status = MPI_Comm_rank( - newContext->sf_group_comm, &newContext->sf_group_rank); + status = MPI_Comm_rank(newContext->sf_group_comm, &newContext->sf_group_rank); if (status != MPI_SUCCESS) goto err_exit; /* * There may be additional functionality we need for the IOCs... * If so, then can probably initialize those things here! */ - } else { + } + else { newContext->sf_group_size = 1; newContext->sf_group_rank = 0; } @@ -1303,7 +1309,6 @@ err_exit: return -1; } /* end H5FD__init_subfile_context() */ - /* ------------------------------------------------------------------------- Programmer: Richard Warren @@ -1329,12 +1334,12 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i herr_t ret_value = SUCCEED; int ioc_count; int world_rank, world_size; - sf_topology_t * thisApp = NULL; + sf_topology_t * thisApp = NULL; int file_index = active_map_entries(); - int64_t tag = SF_CONTEXT; + int64_t tag = SF_CONTEXT; int64_t context_id = ((tag << 32) | file_index); - subfiling_context_t *newContext = (subfiling_context_t *) get__subfiling_object(context_id); - char *envValue = NULL; + subfiling_context_t *newContext = (subfiling_context_t *)get__subfiling_object(context_id); + char * envValue = NULL; FUNC_ENTER_API(FAIL) H5TRACE3("e", "IO*s*!", ioc_select_method, ioc_select_option, sf_context); @@ -1351,8 +1356,8 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i } /* Compute the number an distribution map of the set of IO Concentrators */ - if ((ioc_count = H5FD__determine_ioc_count(world_size, world_rank, - ioc_select_method, ioc_select_option, &thisApp)) <= 0) { + if ((ioc_count = H5FD__determine_ioc_count(world_size, world_rank, ioc_select_method, ioc_select_option, + &thisApp)) <= 0) { HDputs("Unable to register subfiling topology!"); ret_value = FAIL; goto done; @@ -1361,15 +1366,16 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i newContext->sf_context_id = context_id; /* Maybe set the verbose flag for more debugging info */ - envValue = HDgetenv("SF_VERBOSE_FLAG"); + envValue = HDgetenv("H5_SF_VERBOSE_FLAG"); if (envValue != NULL) { int check_value = atoi(envValue); - if (check_value > 0) sf_verbose_flag = 1; + if (check_value > 0) + sf_verbose_flag = 1; } /* Maybe open client-side log files */ - if (sf_verbose_flag ) { - manage_client_logfile(world_rank,sf_verbose_flag); + if (sf_verbose_flag) { + manage_client_logfile(world_rank, sf_verbose_flag); } if (H5FD__init_subfile_context(thisApp, ioc_count, world_rank, newContext) != SUCCEED) { @@ -1382,6 +1388,11 @@ H5FDsubfiling_init(ioc_selection_t ioc_select_method, char *ioc_select_option, i ret_value = FAIL; goto done; } + + newContext->sf_base_addr = 0; + if (newContext->topology->rank_is_ioc) { + newContext->sf_base_addr = (int64_t)(newContext->topology->subfile_rank * newContext->sf_stripe_size); + } *sf_context = context_id; done: @@ -1427,7 +1438,7 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags) // char filepath[PATH_MAX]; // char *slash; config_common_t *config_info = _config_info; - char *option_arg = get_ioc_selection_criteria(&ioc_selection); + char * option_arg = get_ioc_selection_criteria(&ioc_selection); HDassert(config_info); /* Check to see who is calling ths function:: @@ -1437,7 +1448,7 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags) (config_info->magic != H5FD_SUBFILING_FAPL_T_MAGIC)) { HDputs("Unrecgonized driver!"); return -1; - } + } /* Initialize/identify IO Concentrators based on the * config information that we have... @@ -1451,19 +1462,19 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags) /* For statistics gathering */ maybe_initialize_statistics(); - /* Create a new context which is associated with + /* Create a new context which is associated with * this file (context_id) */ sf_context = get__subfiling_object(context_id); assert(sf_context != NULL); /* Save some basic things in the new context */ - config_info->context_id = context_id; - sf_context->sf_fid = 0; + config_info->context_id = context_id; + sf_context->sf_fid = 0; sf_context->sf_context_id = context_id; - sf_context->h5_file_id = h5_file_id; - sf_context->filename = strdup(config_info->file_path); - + sf_context->h5_file_id = h5_file_id; + sf_context->h5_filename = strdup(config_info->file_path); + sf_context->sf_filename = NULL; /* Ensure that the IOC service won't exit * as we prepare to start up.. */ @@ -1499,4 +1510,3 @@ H5FD__close_subfiles(int64_t context_id) assert(sf_context != NULL); return close__subfiles(sf_context, sf_context->h5_file_id); } - |