diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-08-09 23:05:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-09 23:05:37 (GMT) |
commit | ef33ac8bac5fd201b41d1a3084f03834f47729a2 (patch) | |
tree | ad4756b872abff6d16f11d9a6c6c949e8f359cad /src/H5FDsubfiling/H5subfiling_common.c | |
parent | b84241e57a97309b15846da4cc74611a66d92f6d (diff) | |
download | hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.zip hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.gz hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.bz2 |
Subfiling VFD - tidying up and fixing a few new testing failures (#1977)
* Rename Subfiling IOC "thread_pool_count" field to "thread_pool_size"
* Add simple HDF5 example for Subfiling VFD
* Subfiling VFD - never cache app topology as it may change
* Subfiling VFD - cleanup unused funtionality and tidy up some TODOs
* Subfiling VFD - tidy up subfiling error handling in H5subfiling_common.c
* Subfiling VFD - show number of failed I/O requests on close
* Subfiling VFD - Update file cmp callback after switching to MPI I/O VFD
* Amend RELEASE.txt with info about h5fuse.sh and Subfiling limitations
* Subfiling VFD - switch to using H5_basename and H5_dirname
Diffstat (limited to 'src/H5FDsubfiling/H5subfiling_common.c')
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.c | 1463 |
1 files changed, 358 insertions, 1105 deletions
diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c index b75dd81..d83d8c5 100644 --- a/src/H5FDsubfiling/H5subfiling_common.c +++ b/src/H5FDsubfiling/H5subfiling_common.c @@ -19,30 +19,13 @@ #include "H5subfiling_common.h" #include "H5subfiling_err.h" +#include "H5MMprivate.h" + typedef struct { /* Format of a context map entry */ void *file_handle; /* key value (linear search of the cache) */ int64_t sf_context_id; /* The return value if matching file_handle */ } file_map_to_context_t; -typedef struct stat_record { - int64_t op_count; /* How many ops in total */ - double min; /* minimum (time) */ - double max; /* maximum (time) */ - double total; /* average (time) */ -} stat_record_t; - -/* Stat (OP) Categories */ -typedef enum stat_category { - WRITE_STAT = 0, - WRITE_WAIT, - READ_STAT, - READ_WAIT, - FOPEN_STAT, - FCLOSE_STAT, - QUEUE_STAT, - TOTAL_STAT_COUNT -} stat_category_t; - /* Identifiers for HDF5's error API */ hid_t H5subfiling_err_stack_g = H5I_INVALID_HID; hid_t H5subfiling_err_class_g = H5I_INVALID_HID; @@ -55,31 +38,10 @@ static sf_topology_t *sf_topology_cache = NULL; static size_t sf_context_cache_limit = 16; static size_t sf_topology_cache_limit = 4; -app_layout_t *sf_app_layout = NULL; - static file_map_to_context_t *sf_open_file_map = NULL; static int sf_file_map_size = 0; #define DEFAULT_FILE_MAP_ENTRIES 8 -/* Definitions for recording subfiling statistics */ -static stat_record_t subfiling_stats[TOTAL_STAT_COUNT]; -#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total / (double)subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total / (double)subfiling_stats[WRITE_WAIT].op_count) -#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count) -#define SF_READ_TIME (subfiling_stats[READ_STAT].total / (double)subfiling_stats[READ_STAT].op_count) -#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count) -#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total) - -int sf_verbose_flag = 0; - -#ifdef H5_SUBFILING_DEBUG -char sf_logile_name[PATH_MAX]; -FILE *sf_logfile = NULL; - -static int sf_open_file_count = 0; -#endif - static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context); static herr_t H5_free_subfiling_topology(sf_topology_t *topology); @@ -92,7 +54,7 @@ static herr_t init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, MPI_Comm file_comm); static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags); static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index); -static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags); +static herr_t ioc_open_file(int64_t file_context_id, int file_acc_flags); static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out, size_t filename_out_len, char **filename_basename_out, char **subfile_dir_out); @@ -101,8 +63,6 @@ static herr_t create_config_file(subfiling_context_t *sf_context, const char *ba static herr_t open_config_file(subfiling_context_t *sf_context, const char *base_filename, const char *subfile_dir, const char *mode, FILE **config_file_out); -static void initialize_statistics(void); -static int numDigits(int n); static int get_next_fid_map_index(void); static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id); static int compare_hostid(const void *h1, const void *h2); @@ -113,79 +73,6 @@ static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm); static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node); static inline void assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple); -static void -initialize_statistics(void) -{ - HDmemset(subfiling_stats, 0, sizeof(subfiling_stats)); -} - -static int -numDigits(int n) -{ - if (n < 0) - n = (n == INT_MIN) ? INT_MAX : -n; - if (n < 10) - return 1; - if (n < 100) - return 2; - if (n < 1000) - return 3; - if (n < 10000) - return 4; - if (n < 100000) - return 5; - if (n < 1000000) - return 6; - if (n < 10000000) - return 7; - if (n < 100000000) - return 8; - if (n < 1000000000) - return 9; - return 10; -} - -/*------------------------------------------------------------------------- - * Function: set_verbose_flag - * - * Purpose: For debugging purposes, I allow a verbose setting to - * have printing of relevant information into an IOC specific - * file that is opened as a result of enabling the flag - * and closed when the verbose setting is disabled. - * - * Return: None - * Errors: None - * - * Programmer: Richard Warren - * - * Changes: Initial Version/None. - *------------------------------------------------------------------------- - */ -void -set_verbose_flag(int subfile_rank, int new_value) -{ -#ifdef H5_SUBFILING_DEBUG - sf_verbose_flag = (int)(new_value & 0x0FF); - if (sf_verbose_flag) { - char logname[64]; - HDsnprintf(logname, sizeof(logname), "ioc_%d.log", subfile_rank); - if (sf_open_file_count > 1) - sf_logfile = fopen(logname, "a+"); - else - sf_logfile = fopen(logname, "w+"); - } - else if (sf_logfile) { - fclose(sf_logfile); - sf_logfile = NULL; - } -#else - (void)subfile_rank; - (void)new_value; -#endif - - return; -} - static int get_next_fid_map_index(void) { @@ -300,8 +187,9 @@ compare_hostid(const void *h1, const void *h2) static herr_t get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str) { - char *opt_value = NULL; - char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA); + char *opt_value = NULL; + char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA); + herr_t ret_value = SUCCEED; HDassert(ioc_selection_type); HDassert(ioc_sel_info_str); @@ -323,31 +211,24 @@ get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_t errno = 0; check_value = HDstrtol(env_value, NULL, 0); - if (errno == ERANGE) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA - " environment variable\n", - __func__); -#endif - - return FAIL; - } + if (errno == ERANGE) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA + " environment variable"); - if ((check_value < 0) || (check_value >= ioc_selection_options)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA - " environment variable\n", - __func__, check_value); -#endif - - return FAIL; - } + if ((check_value < 0) || (check_value >= ioc_selection_options)) + H5_SUBFILING_GOTO_ERROR( + H5E_VFL, H5E_BADVALUE, FAIL, + "invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA + " environment variable", + check_value); *ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value; *ioc_sel_info_str = opt_value; } - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -379,6 +260,7 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) int hostid_index = -1; int my_rank; int mpi_code; + int ret_value = 0; HDassert(info); HDassert(info->app_layout); @@ -386,20 +268,12 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) HDassert(info->app_layout->node_ranks); HDassert(MPI_COMM_NULL != comm); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif - - return -1; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(-1, "MPI_Comm_rank failed", mpi_code); app_layout = info->app_layout; node_count = app_layout->node_count; - if (node_count == 0) - gather_topology_info(info, comm); - nextid = app_layout->layout[0].hostid; /* Possibly record my hostid_index */ if (app_layout->layout[0].rank == my_rank) { @@ -428,7 +302,10 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) app_layout->node_count = node_count; - return node_count; + ret_value = node_count; + +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -455,6 +332,7 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm) long hostid; int sf_world_size; int sf_world_rank; + herr_t ret_value = SUCCEED; HDassert(info); HDassert(info->app_layout); @@ -477,18 +355,14 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm) int mpi_code; if (MPI_SUCCESS != - (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Allgather failed with rc %d\n", __func__, mpi_code); -#endif + (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code); - return FAIL; - } - - qsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid); + HDqsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid); } - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -567,8 +441,10 @@ assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple) for (int k = 0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) { ioc_index = rank_multiple * k++; io_concentrators[ioc_next] = (int)(app_layout->layout[ioc_index].rank); - if (io_concentrators[ioc_next] == app_layout->world_rank) - app_topology->rank_is_ioc = TRUE; + if (io_concentrators[ioc_next] == app_layout->world_rank) { + app_topology->subfile_rank = ioc_next; + app_topology->rank_is_ioc = TRUE; + } } app_topology->n_io_concentrators = ioc_count; } @@ -610,13 +486,6 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val) * open at a time, then only a single subfiling context cache * entry will be used. * - * Topologies are static, e.g. for any one I/O concentrator - * allocation strategy, the results should always be the same. - * - * TODO: The one exception to this being the 1 IOC per N MPI - * ranks strategy. The value of N can be changed on a per-file - * basis, so we need to address that at some point. - * * Return: Pointer to underlying subfiling object if subfiling object * ID is valid * @@ -636,14 +505,11 @@ H5_get_subfiling_object(int64_t object_id) { int64_t obj_type = (object_id >> 32) & 0x0FFFF; int64_t obj_index = object_id & 0x0FFFF; + void *ret_value = NULL; - if (obj_index < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid object index for subfiling object ID %" PRId64 "\n", __func__, object_id); -#endif - - return NULL; - } + if (obj_index < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, + "invalid object index for subfiling object ID %" PRId64, object_id); if (obj_type == SF_CONTEXT) { /* Contexts provide information principally about @@ -658,13 +524,9 @@ H5_get_subfiling_object(int64_t object_id) /* Create subfiling context cache if it doesn't exist */ if (!sf_context_cache) { - if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__); -#endif - - return NULL; - } + if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling context cache"); } /* Make more space in context cache if needed */ @@ -677,13 +539,9 @@ H5_get_subfiling_object(int64_t object_id) sf_context_cache_limit *= 2; if (NULL == (tmp_realloc = HDrealloc(sf_context_cache, - sf_context_cache_limit * sizeof(subfiling_context_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__); -#endif - - return NULL; - } + sf_context_cache_limit * sizeof(subfiling_context_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling context cache"); sf_context_cache = tmp_realloc; @@ -698,13 +556,9 @@ H5_get_subfiling_object(int64_t object_id) else if (obj_type == SF_TOPOLOGY) { /* Create subfiling topology cache if it doesn't exist */ if (!sf_topology_cache) { - if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling topology cache\n", __func__); -#endif - - return NULL; - } + if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling topology cache"); } /* We will likely only cache a single topology @@ -712,13 +566,9 @@ H5_get_subfiling_object(int64_t object_id) * In that context, we will identify the number of * nodes along with the number of MPI ranks on a node. */ - if ((size_t)obj_index >= sf_topology_cache_limit) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid object index for subfiling topology object ID\n", __func__); -#endif - - return NULL; - } + if ((size_t)obj_index >= sf_topology_cache_limit) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, + "invalid object index for subfiling topology object ID"); /* Return direct pointer to the topology cache entry */ return (void *)&sf_topology_cache[obj_index]; @@ -728,7 +578,8 @@ H5_get_subfiling_object(int64_t object_id) HDprintf("%s: Unknown subfiling object type for ID %" PRId64 "\n", __func__, object_id); #endif - return NULL; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -746,23 +597,21 @@ H5_free_subfiling_object(int64_t object_id) { subfiling_context_t *sf_context = NULL; int64_t obj_type = (object_id >> 32) & 0x0FFFF; + herr_t ret_value = SUCCEED; - if (obj_type != SF_CONTEXT) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid subfiling object type for ID %" PRId64 "\n", __func__, object_id); -#endif - - return FAIL; - } + if (obj_type != SF_CONTEXT) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid subfiling object type for ID %" PRId64, + object_id); - sf_context = H5_get_subfiling_object(object_id); - if (!sf_context) - return FAIL; + if (NULL == (sf_context = H5_get_subfiling_object(object_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get subfiling context for subfiling object ID"); if (H5_free_subfiling_object_int(sf_context) < 0) - return FAIL; + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } static herr_t @@ -858,15 +707,7 @@ H5_free_subfiling_topology(sf_topology_t *topology) HDfree(topology->subfile_fd); topology->subfile_fd = NULL; - /* - * The below assumes that the subfiling application layout - * is retrieved once and used for subsequent file opens for - * the duration that the Subfiling VFD is in use - */ - HDassert(topology->app_layout == sf_app_layout); - -#if 0 - if (topology->app_layout && (topology->app_layout != sf_app_layout)) { + if (topology->app_layout) { HDfree(topology->app_layout->layout); topology->app_layout->layout = NULL; @@ -875,7 +716,6 @@ H5_free_subfiling_topology(sf_topology_t *topology) HDfree(topology->app_layout); } -#endif topology->app_layout = NULL; @@ -927,112 +767,55 @@ H5_open_subfiles(const char *base_filename, void *file_handle, int mpi_code; herr_t ret_value = SUCCEED; - if (!base_filename) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (!subfiling_config) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid subfiling configuration pointer\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (!context_id_out) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: context_id_out is NULL\n", __func__); -#endif + if (!base_filename) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling base filename"); - ret_value = FAIL; - goto done; - } + if (!subfiling_config) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling configuration"); - initialize_statistics(); - -#if 0 /* TODO */ - /* Maybe set the verbose flag for more debugging info */ - envValue = HDgetenv("H5_SF_VERBOSE_FLAG"); - if (envValue != NULL) { - int check_value = atoi(envValue); - if (check_value > 0) - sf_verbose_flag = 1; - } -#endif + if (!context_id_out) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer"); /* Initialize new subfiling context ID based on configuration information */ - if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize subfiling context\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context"); /* Retrieve the subfiling object for the newly-created context ID */ - if (NULL == (sf_context = H5_get_subfiling_object(context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID"); /* Save some basic things in the new subfiling context */ sf_context->h5_file_handle = file_handle; - if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy base HDF5 filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling filename"); /* * If we're actually using the IOCs, we will * start the service threads on the identified * ranks as part of the subfile opening. */ - if (open_subfile_with_context(sf_context, file_acc_flags) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't open subfiles\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (open_subfile_with_context(sf_context, file_acc_flags) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles"); #ifdef H5_SUBFILING_DEBUG { struct tm *tm = NULL; time_t cur_time; int mpi_rank; + int mpi_code; /* Open debugging logfile */ - if (MPI_SUCCESS != MPI_Comm_rank(file_comm, &mpi_rank)) { - HDprintf("%s: couldn't get MPI rank\n", __func__); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &mpi_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); HDsnprintf(sf_context->sf_logfile_name, PATH_MAX, "%s.log.%d", sf_context->h5_filename, mpi_rank); - if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a"))) { - HDprintf("%s: couldn't open subfiling debug logfile\n", __func__); - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a"))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling debug logfile"); cur_time = time(NULL); tm = localtime(&cur_time); @@ -1052,38 +835,24 @@ done: * Form consensus on whether opening subfiles was * successful */ - if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: MPI_Allreduce failed with rc %d\n", __func__, - sf_context->topology->app_layout->world_rank, mpi_code); -#endif - - ret_value = FAIL; - } + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm))) + H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Allreduce failed", mpi_code); if (g_errors > 0) { -#ifdef H5_SUBFILING_DEBUG - if (sf_context->topology->app_layout->world_rank == 0) { - HDprintf("%s: one or more IOC ranks couldn't open subfiles\n", __func__); - } -#endif - - ret_value = FAIL; + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "one or more IOC ranks couldn't open subfiles"); } if (ret_value < 0) { clear_fid_map_entry(file_handle, context_id); - if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling object\n", __func__); -#endif - } + if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); *context_id_out = -1; } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /* @@ -1120,48 +889,25 @@ init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, HDassert(file_index >= 0); /* Use the file's index to create a new subfiling context ID */ - if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling context ID"); /* Create a new subfiling context object with the created context ID */ - if (NULL == (new_context = H5_get_subfiling_object(context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (new_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling object"); /* * Setup the application topology information, including the computed * number and distribution map of the set of I/O concentrators */ - if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize application topology\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize application topology"); new_context->sf_context_id = context_id; - if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize subfiling topology object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "couldn't initialize subfiling application topology object"); new_context->sf_base_addr = 0; if (new_context->topology->rank_is_ioc) { @@ -1175,14 +921,11 @@ done: if (ret_value < 0) { HDfree(app_topology); - if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling object\n", __func__); -#endif - } + if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1224,14 +967,13 @@ static herr_t init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out) { - sf_topology_t *app_topology = NULL; - app_layout_t *app_layout = sf_app_layout; - char *env_value = NULL; - char *ioc_sel_str = NULL; - int *io_concentrators = NULL; - long ioc_select_val = -1; - long iocs_per_node = 1; - int ioc_count = 0; + sf_topology_t *app_topology = NULL; + app_layout_t *app_layout = NULL; + char *env_value = NULL; + char *ioc_sel_str = NULL; + long ioc_select_val = -1; + long iocs_per_node = 1; + int ioc_count = 0; int comm_rank; int comm_size; int mpi_code; @@ -1241,33 +983,16 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, HDassert(app_topology_out); HDassert(!*app_topology_out); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator size; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code); /* Check if an IOC selection type was specified by environment variable */ - if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get IOC selection type from environment\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get IOC selection type from environment"); /* Sanity checking on different IOC selection strategies */ switch (ioc_selection_type) { @@ -1318,67 +1043,29 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, } /* Allocate new application topology information object */ - if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling topology object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't create new subfiling topology object"); app_topology->subfile_rank = -1; app_topology->selection_type = ioc_selection_type; - if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate array of I/O concentrator ranks\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - io_concentrators = app_topology->io_concentrators; - HDassert(io_concentrators); + if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate array of I/O concentrator ranks"); if (!app_layout) { - /* TODO: this is dangerous if a new comm size is greater than what - * was allocated. Can't reuse app layout. - */ + if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout structure"); - if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout structure\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout node rank array\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout array\n", __func__); -#endif + if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout node rank array"); - ret_value = FAIL; - goto done; - } - - /* - * Once the application layout has been filled once, any additional - * file open operations won't be required to gather that information. - */ - sf_app_layout = app_layout; + if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout array"); } app_layout->world_size = comm_size; @@ -1386,6 +1073,8 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, app_topology->app_layout = app_layout; + gather_topology_info(app_topology, comm); + /* * Determine which ranks are I/O concentrator ranks, based on the * given IOC selection strategy and MPI information. @@ -1396,17 +1085,11 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, app_topology->selection_type = SELECT_IOC_ONE_PER_NODE; - if ((node_count = count_nodes(app_topology, comm)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't determine number of nodes used\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if ((node_count = count_nodes(app_topology, comm)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't determine number of nodes used"); /* Check for an IOC-per-node value set in the environment */ - /* TODO: should this env. var. be interpreted for other selection types? */ if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) { errno = 0; ioc_select_val = HDstrtol(env_value, NULL, 0); @@ -1465,11 +1148,7 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, case SELECT_IOC_WITH_CONFIG: default: -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid IOC selection strategy\n", __func__); -#endif - ret_value = FAIL; - goto done; + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid IOC selection strategy"); break; } @@ -1480,14 +1159,9 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, * Create a vector of "potential" file descriptors * which can be indexed by the IOC ID */ - if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate subfile file descriptor array\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate subfile file descriptor array"); *app_topology_out = app_topology; @@ -1505,7 +1179,7 @@ done: } } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1576,15 +1250,9 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co errno = 0; stripe_size = HDstrtoll(env_value, NULL, 0); - if (ERANGE == errno) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__, - env_value); -#endif - - ret_value = FAIL; - goto done; - } + if (ERANGE == errno) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, + "invalid stripe size setting for " H5FD_SUBFILING_STRIPE_SIZE); if (stripe_size > 0) { sf_context->sf_stripe_size = (int64_t)stripe_size; @@ -1599,14 +1267,8 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co /* Check for a subfile name prefix setting in the environment */ if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) { - if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix value\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix value"); } /* @@ -1614,124 +1276,44 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co * to/from IOC ranks */ - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC messages; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC message sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC data; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC data sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC EOF; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC EOF sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for barriers; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on barrier sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); /* Create an MPI sub-communicator for IOC ranks */ if (app_topology->n_io_concentrators > 1) { if (MPI_SUCCESS != (mpi_code = MPI_Comm_split(file_comm, app_topology->rank_is_ioc, comm_rank, - &sf_context->sf_group_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC ranks; rc = %d\n", __func__, mpi_code); -#endif + &sf_context->sf_group_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_split failed", mpi_code); - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != - (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI rank from IOC rank sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != - (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI comm size from IOC rank sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code); } done: @@ -1739,7 +1321,7 @@ done: H5_free_subfiling_object_int(sf_context); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1779,68 +1361,35 @@ done: static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) { - double start_time; herr_t ret_value = SUCCEED; HDassert(sf_context); - start_time = MPI_Wtime(); - /* * Save the HDF5 file ID (fid) to subfile context mapping. * There shouldn't be any issue, but check the status and * return if there was a problem. */ - if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "couldn't record HDF5 file ID to subfile context mapping"); /* * If this rank is an I/O concentrator, actually open * the subfile belonging to this IOC rank */ if (sf_context->topology->rank_is_ioc) { - sf_work_request_t msg = {{file_acc_flags, (int64_t)sf_context->h5_file_id, sf_context->sf_context_id}, - OPEN_OP, - sf_context->topology->app_layout->world_rank, - sf_context->topology->subfile_rank, - sf_context->sf_context_id, - start_time, - NULL, - 0, - 0, - 0, - 0}; - h5_stat_t st; + h5_stat_t st; /* Retrieve Inode value for HDF5 stub file */ - if (HDstat(sf_context->h5_filename, &st) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: couldn't stat file %s\n", __func__, - sf_context->topology->app_layout->world_rank, sf_context->h5_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (HDstat(sf_context->h5_filename, &st) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't stat HDF5 stub file"); HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t)); sf_context->h5_file_id = (uint64_t)st.st_ino; - if (ioc_open_file(&msg, file_acc_flags) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: couldn't open subfile\n", __func__, - sf_context->topology->app_layout->world_rank); -#endif - - ret_value = FAIL; - goto done; - } + if (ioc_open_file(sf_context->sf_context_id, file_acc_flags) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile"); } done: @@ -1848,7 +1397,7 @@ done: clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1889,14 +1438,8 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i if (sf_file_map_size == 0) { if (NULL == - (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate open file map\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping"); sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES; for (int i = 0; i < sf_file_map_size; i++) { @@ -1925,14 +1468,9 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i void *tmp_realloc; if (NULL == (tmp_realloc = HDrealloc(sf_open_file_map, - ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map))))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't reallocate open file map\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map))))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't reallocate open file mapping"); sf_open_file_map = tmp_realloc; sf_file_map_size *= 2; @@ -1950,7 +1488,7 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i } done: - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1992,10 +1530,9 @@ done: *------------------------------------------------------------------------- */ static herr_t -ioc_open_file(sf_work_request_t *msg, int file_acc_flags) +ioc_open_file(int64_t file_context_id, int file_acc_flags) { - subfiling_context_t *sf_context = NULL; - int64_t file_context_id; + subfiling_context_t *sf_context = NULL; mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; char *filepath = NULL; char *subfile_dir = NULL; @@ -2003,51 +1540,24 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags) int fd = -1; herr_t ret_value = SUCCEED; - HDassert(msg); - - /* Retrieve subfiling context ID from RPC message */ - file_context_id = msg->header[2]; - - if (NULL == (sf_context = H5_get_subfiling_object(file_context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context = H5_get_subfiling_object(file_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "couldn't get subfiling object from context ID"); /* Only IOC ranks should be here */ HDassert(sf_context->topology); HDassert(sf_context->topology->subfile_rank >= 0); - if (NULL == (filepath = HDcalloc(1, PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (filepath = HDcalloc(1, PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfile filename"); /* Generate the name of the subfile that this IOC rank will open */ - if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't generate name for subfile\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "couldn't generate name for subfile"); - if (NULL == (sf_context->sf_filename = HDstrdup(filepath))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile name\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->sf_filename = HDstrdup(filepath))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile name"); /* Attempt to create/open the subfile for this IOC rank */ if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0) @@ -2080,11 +1590,11 @@ done: } } - HDfree(base); - HDfree(subfile_dir); + H5MM_free(base); + H5MM_free(subfile_dir); HDfree(filepath); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /* @@ -2134,28 +1644,16 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char */ n_io_concentrators = sf_context->topology->n_io_concentrators; - if (NULL == (prefix = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (prefix = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfile prefix"); /* Under normal operation, we co-locate subfiles with the HDF5 file */ - HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX); + HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX - 1); + prefix[PATH_MAX - 1] = '\0'; - base = basename(prefix); - - if (NULL == (*filename_basename_out = HDstrdup(base))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile basename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_basename(prefix, &base) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get subfile basename"); if (sf_context->subfile_prefix) { /* Note: Users may specify a directory name which is inaccessible @@ -2165,28 +1663,12 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * if so, we could default to creating the subfiles in the * current directory. (?) */ - if (NULL == (*subfile_dir_out = HDstrdup(sf_context->subfile_prefix))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - subfile_dir = *subfile_dir_out; + if (NULL == (subfile_dir = H5MM_strdup(sf_context->subfile_prefix))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix"); } else { - subfile_dir = dirname(prefix); - - if (NULL == (*subfile_dir_out = HDstrdup(subfile_dir))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_dirname(prefix, &subfile_dir) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't get subfile prefix"); } /* @@ -2194,14 +1676,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * we aren't truncating the file. */ if (0 == (file_acc_flags & O_TRUNC)) { - if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't open existing subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "couldn't open existing subfiling configuration file"); } /* @@ -2210,14 +1687,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * in order to generate the correct subfile names. */ if (config_file) { - if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't read from subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, + "couldn't read from subfiling configuration file"); } /* @@ -2231,34 +1703,36 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * and the configuration file will be named: * ABC.h5.subfile_<file-number>.config */ - num_digits = numDigits(n_io_concentrators); + num_digits = (int)(HDlog10(n_io_concentrators) + 1); HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base, sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1, n_io_concentrators); -done: - if (config_file && (EOF == HDfclose(config_file))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif + *filename_basename_out = base; + *subfile_dir_out = subfile_dir; - ret_value = FAIL; - } +done: + if (config_file && (EOF == HDfclose(config_file))) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); if (ret_value < 0) { + H5MM_free(subfile_dir); + H5MM_free(base); + if (*filename_basename_out) { - HDfree(*filename_basename_out); + H5MM_free(*filename_basename_out); *filename_basename_out = NULL; } if (*subfile_dir_out) { - HDfree(*subfile_dir_out); + H5MM_free(*subfile_dir_out); *subfile_dir_out = NULL; } } HDfree(prefix); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2292,33 +1766,18 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c HDassert(base_filename); HDassert(subfile_dir); - if (sf_context->h5_file_id == UINT64_MAX) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id); -#endif - - ret_value = FAIL; - goto done; - } - if (*base_filename == '\0') { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (sf_context->h5_file_id == UINT64_MAX) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64, + sf_context->h5_file_id); + if (*base_filename == '\0') + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'", + base_filename); if (*subfile_dir == '\0') subfile_dir = "."; - if (NULL == (config_filename = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (config_filename = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling configuration filename"); HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, sf_context->h5_file_id); @@ -2329,14 +1788,9 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c config_file_exists = (ret == 0) || ((ret < 0) && (ENOENT != errno)); - if (config_file_exists && (ret != 0)) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't check existence of configuration file"); -#endif - - ret_value = FAIL; - goto done; - } + if (config_file_exists && (ret != 0)) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't check existence of subfiling configuration file"); /* * If a config file doesn't exist, create one. If a @@ -2349,100 +1803,61 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c int n_io_concentrators = sf_context->topology->n_io_concentrators; int num_digits; - if (NULL == (config_file = HDfopen(config_filename, "w+"))) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't open subfiling configuration file"); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (line_buf = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate buffer for writing to subfiling configuration file\n", __func__); -#endif + if (NULL == (config_file = HDfopen(config_filename, "w+"))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (line_buf = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate buffer for writing to subfiling configuration file"); /* Write the subfiling stripe size to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "stripe_size=%" PRId64 "\n", sf_context->sf_stripe_size); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the number of I/O concentrators to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "aggregator_count=%d\n", n_io_concentrators); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the base HDF5 filename to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "hdf5_file=%s\n", sf_context->h5_filename); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the optional subfile directory prefix to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "subfile_dir=%s\n", subfile_dir); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write out each subfile name to the configuration file */ - num_digits = numDigits(n_io_concentrators); + num_digits = (int)(HDlog10(n_io_concentrators) + 1); for (int k = 0; k < n_io_concentrators; k++) { HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename, sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); } } done: if (config_file) { - if (EOF == HDfclose(config_file)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - } + if (EOF == HDfclose(config_file)) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); } HDfree(line_buf); HDfree(config_filename); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2477,33 +1892,18 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con *config_file_out = NULL; - if (sf_context->h5_file_id == UINT64_MAX) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id); -#endif - - ret_value = FAIL; - goto done; - } - if (*base_filename == '\0') { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (sf_context->h5_file_id == UINT64_MAX) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64, + sf_context->h5_file_id); + if (*base_filename == '\0') + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'", + base_filename); if (*subfile_dir == '\0') subfile_dir = "."; - if (NULL == (config_filename = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (config_filename = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling configuration filename"); HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, sf_context->h5_file_id); @@ -2517,40 +1917,26 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con if (!config_file_exists) goto done; - if (config_file_exists && (ret != 0)) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't check existence of configuration file"); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (config_file = HDfopen(config_filename, mode))) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't open subfiling configuration file"); -#endif + if (config_file_exists && (ret != 0)) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't check existence of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (config_file = HDfopen(config_filename, mode))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling configuration file"); *config_file_out = config_file; done: if (ret_value < 0) { - if (config_file && (EOF == HDfclose(config_file))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - } + if (config_file && (EOF == HDfclose(config_file))) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); } HDfree(config_filename); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2575,79 +1961,42 @@ H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators) HDassert(config_file); HDassert(n_io_concentrators); - if (HDfseek(config_file, 0, SEEK_END) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } - - if ((config_file_len = HDftell(config_file)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } - - if (HDfseek(config_file, 0, SEEK_SET) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__, - errno); -#endif + if (HDfseek(config_file, 0, SEEK_END) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, + "couldn't seek to end of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if ((config_file_len = HDftell(config_file)) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, + "couldn't get size of subfiling configuration file"); - if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__); -#endif + if (HDfseek(config_file, 0, SEEK_SET) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, + "couldn't seek to beginning of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for reading from subfiling configuration file"); - if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL, + "couldn't read from subfiling configuration file"); config_buf[config_file_len] = '\0'; - if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n", - __func__); -#endif + if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, + "malformed subfiling configuration file - no aggregator count entry"); - ret_value = FAIL; - goto done; - } + if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) + H5_SUBFILING_SYS_GOTO_ERROR( + H5E_FILE, H5E_CANTGET, FAIL, + "couldn't get number of I/O concentrators from subfiling configuration file"); - if (read_n_io_concs <= 0) { - HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n", - __func__, read_n_io_concs); - ret_value = FAIL; - goto done; - } + if (read_n_io_concs <= 0) + H5_SUBFILING_GOTO_ERROR( + H5E_FILE, H5E_BADVALUE, FAIL, + "invalid number of I/O concentrators (%d) read from subfiling configuration file", + read_n_io_concs); *n_io_concentrators = read_n_io_concs; @@ -2703,77 +2052,44 @@ H5_close_subfiles(int64_t subfiling_context_id) { subfiling_context_t *sf_context = NULL; MPI_Request barrier_req = MPI_REQUEST_NULL; -#ifdef H5_SUBFILING_DEBUG - double t0 = 0.0; - double t1 = 0.0; - double t2 = 0.0; -#endif - int mpi_code; - herr_t ret_value = SUCCEED; - -#ifdef H5_SUBFILING_DEBUG - t0 = MPI_Wtime(); -#endif - - if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + int mpi_code; + herr_t ret_value = SUCCEED; - /* We make the subfile close operation collective. - * Otherwise, there may be a race condition between - * our closing the subfiles and the user application - * moving ahead and possibly re-opening a file. - * - * If we can, we utilize an async barrier which gives - * us the opportunity to reduce the CPU load due to - * MPI spinning while waiting for the barrier to - * complete. This is especially important if there - * is heavy thread utilization due to subfiling - * activities, i.e. the thread pool might be - * extremely busy servicing I/O requests from all - * HDF5 application ranks. - */ + if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "couldn't get subfiling object from context ID"); + + /* We make the subfile close operation collective. + * Otherwise, there may be a race condition between + * our closing the subfiles and the user application + * moving ahead and possibly re-opening a file. + * + * If we can, we utilize an async barrier which gives + * us the opportunity to reduce the CPU load due to + * MPI spinning while waiting for the barrier to + * complete. This is especially important if there + * is heavy thread utilization due to subfiling + * activities, i.e. the thread pool might be + * extremely busy servicing I/O requests from all + * HDF5 application ranks. + */ #if MPI_VERSION > 3 || (MPI_VERSION == 3 && MPI_SUBVERSION >= 1) { int barrier_complete = 0; - if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code); while (!barrier_complete) { useconds_t t_delay = 5; usleep(t_delay); - if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code); } } #else - if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); #endif /* The map from file handle to subfiling context can now be cleared */ @@ -2784,49 +2100,11 @@ H5_close_subfiles(int64_t subfiling_context_id) if (sf_context->topology->rank_is_ioc) { if (sf_context->sf_fid >= 0) { errno = 0; - if (HDclose(sf_context->sf_fid) < 0) { - HDperror("H5_close_subfiles - couldn't close subfile"); - -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't close subfile\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDclose(sf_context->sf_fid) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "couldn't close subfile"); sf_context->sf_fid = -1; } - -#ifdef H5_SUBFILING_DEBUG - /* FIXME: If we've had multiple files open, our statistics - * will be messed up! - */ - if (sf_verbose_flag) { - t1 = t2; - if (sf_logfile != NULL) { - if (SF_WRITE_OPS > 0) - HDfprintf( - sf_logfile, - "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME, - (t1 - t0)); - if (SF_READ_OPS > 0) - HDfprintf(sf_logfile, - "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME, - (t1 - t0)); - - HDfprintf(sf_logfile, "[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank, - SF_QUEUE_DELAYS / (double)(SF_WRITE_OPS + SF_READ_OPS)); - - HDfflush(sf_logfile); - - HDfclose(sf_logfile); - sf_logfile = NULL; - } - } -#endif } /* @@ -2838,50 +2116,27 @@ H5_close_subfiles(int64_t subfiling_context_id) { int barrier_complete = 0; - if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code); while (!barrier_complete) { useconds_t t_delay = 5; usleep(t_delay); - if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code); } } #else - if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); #endif done: - if (sf_context && H5_free_subfiling_object_int(sf_context) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling context object\n", __func__); -#endif + if (sf_context && H5_free_subfiling_object_int(sf_context) < 0) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "couldn't free subfiling context object"); - ret_value = FAIL; - } - - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2904,13 +2159,10 @@ done: int64_t H5_subfile_fhandle_to_context(void *file_handle) { - if (!sf_open_file_map) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: open file map is invalid\n", __func__); -#endif + int64_t ret_value = -1; - return -1; - } + if (!sf_open_file_map) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL"); for (int i = 0; i < sf_file_map_size; i++) { if (sf_open_file_map[i].file_handle == file_handle) { @@ -2918,7 +2170,8 @@ H5_subfile_fhandle_to_context(void *file_handle) } } - return -1; +done: + H5_SUBFILING_FUNC_LEAVE; } /* end H5_subfile_fhandle_to_context() */ #ifdef H5_SUBFILING_DEBUG |