summaryrefslogtreecommitdiffstats
path: root/src/H5Dmpio.c
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2023-10-12 16:32:23 (GMT)
committerGitHub <noreply@github.com>2023-10-12 16:32:23 (GMT)
commitea3f92605761e1ff17d858df303dc375df7efc1c (patch)
tree716311100f8d0f0dc77bca31245878546952aabc /src/H5Dmpio.c
parent0feda66ff0dccdf77453b7c881c80be244e0ae12 (diff)
downloadhdf5-ea3f92605761e1ff17d858df303dc375df7efc1c.zip
hdf5-ea3f92605761e1ff17d858df303dc375df7efc1c.tar.gz
hdf5-ea3f92605761e1ff17d858df303dc375df7efc1c.tar.bz2
1.14 sync with develop (#3660)
* Rework tools.cmake and add C flags (#3110) * Fix gh pages so that the doxygen files are uploaded (#3102) * Add workspace path * add debug * Make one job so workspace files are available * Put doxygen docs under docs folder in gh-pages * Fix a misc warning in test/vol.c (#3112) The compiler complains about using integers instead of size_t for some sizes. * Remove H5detect and H5make_libsettings (#3104) Removes H5detect and H5make_libsettings from the build and replaces their functionality with things that don't affect cross-compiling. H5detect --> floating-point types are now detected on library load H5make_libsettings --> Moved functionality to a new H5build_settings.c template file * clang-tidy clang-analyzer-core issues addressed (#3113) src/H5system.c:1293:13: warning: Dereference of null pointer [clang-analyzer-core.NullDereference] src/H5trace.c:4048:17: warning: Passed-by-value struct argument contains uninitialized data (e.g., via the field chain: 'initial.user') [clang-analyzer-core.CallAndMessage] * Add note for issue 3056 (#3117) * Use 1.14 toolchain (#3116) * Remove the checkposix script (#3122) This script was used to ensure that all non-HDF5 calls were prefixed with 'HD'. We are removing this scheme so this script is no longer needed. * Remove unused HD macros (#3120) * Remove unused HD macros The library prefixes most C and POSIX API calls with 'HD'. We are going to start removing these so the code looks like normal C. This PR removes most of the unused HD markup macros. * Replace ntohl/ntohs * Adds an optional version arg to bin/format_source (#3119) * Clean up mirror VFD code in utils dir (#3121) * Remove dead code * Replace mybzero with memset * Replace hbool_t/TRUE/FALSE with bool/true/false * Fix spelling issues flagged by codespell (#3130) * Make autogen.sh output message consistent (#3128) * Add Python for HDF-EOS zoo description (#3129) * Fix function name in comment in ros3 VFD (#3131) * Revert long double checks (#3133) * Revert "Remove long double conversion work-arounds (#3097)" This reverts commit 1e1dac1dac58fa18f6b7788346d1ba7d3315b0f9. * Update comments to reflect newer systems * Add java options to build scripts (#3127) * Add java options to build scripts Previously, cmakehdf5 turned on compiling of the java interface by default due to a value set in cacheinit.cmake. Now, consistent with how Fortran and CPP interfaces are handled, the script overwrites this default value to disable the libraries, fixing #2958. I also implemented the --enable-java/--disable java options for cmakehdf5, and -java for buildhdf5. Allen said these scripts should mention that compilers are to be specified in environment variables, but missing compilers causes errors at the CMake level, and CMake's error messages are already pretty informative (See the one in #2958 about JAVA_COMPILER). * Removed .lnt linter files (#3143) These were last usefully modified in 2004 * Fix path to libhdf5.settings in cmakehdf5 (#3140) * Many clang -Wextra-semi-stmt fixes (#2537) * Adds semicolons to function-like macros * Adds a do..while(0) loop to some macros * Removes semicolons when inappropriate, especially H5E_TRY_BEGIN/END * Remove HD prefix from network calls (#3142) HDsocket(), etc. Only affects the mirror VFD and its test code. * Remove hbool_t/TRUE/FALSE from java (#3145) Replaces with bool/true/false * CMake: (feature) ROS3 and cmake config file. (#3146) - Added a cmake variable to the hdf5-config.cmake file which indicate if the library has been build with or without the read-only S3 functionality. * Define minimal permissions for new GitHub workflows (#3147) * Track s3 i/o when S3COMMS_DEBUG enabled (#3139) * Track s3 i/o when S3COMMS_DEBUG enabled * Fix the snapshots workflow (#3148) * Add upload url as artifact * Change doxygen path and comment log-url upload * zip doxygen files for upload * add workspace var * chore: fix grammar (#3150) * chore: fix grammar * Removes the HD prefix from java C99 calls (#3149) POSIX calls (HDstrndup, etc.) are unchanged * Correct the zip usage (#3153) * Many fixes to various compiler warnings (#3124) * Fixed various -Wmissing-variable-declarations by adding static keyword * In a few cases, renamed the variable suffix from _g to _s. * Fixed some -Wmissing-variable-declarations by using different declaration macros * Fixed various -Wconditional-uninitialized warnings by just initializing variable to zero * Fixed various -Wcomma warnings * Fixed clang -Wstrict-prototypes warnings * Fixed various -Wunused-variable warnings * Updated some casts to fix the only 3 -Wcast-qual warnings * Fixed the only -Wsometimes-uninitialized warning * Create Security Policy (#3152) * Fix #1978 h5vers usage message. (#3162) Update Platforms Tested in RELEASE.txt. * speed-up building HDF5 (#3087) Disables building the tests when building the netCDF, etc. * Remove dead code behind #ifdef OLD_WAY (#3163) * Remove H5F_evict_tagged_metadata() (#3165) The rest of the library just calls H5AC_evict_tagged_metadata() directly. * Add missing space in zip command (#3167) * Fixed check for a VOL's async compatibility (#3164) * cap flag fix in test * added async comp. output * Update Linux workflows (#3173) * Consolidate environment setup * Turn on ros3 VFD in CMake (Linux only) * Add gh-pages doxygen link (#3175) * Fix the doxygen to gh pages and artifact creation (#3176) * Tidy the list of options in main.yml (#3181) * Remove HD/hbool_t from fortran (#3182) * Remove HD/hbool_t from high-level lib (#3183) * Remove HDva_(arg|copy|end|start) (#3184) * Drop HD prefix & hbool_t from H5TS (#3180) * Remove HD from fork/exec*/wait* (#3190) These are not C99 but are hidden behind ifdefs and are highly unlikely to ever have non-POSIX equivalents. * Fix assertion failure when attempting to use IOC VFD directly (#3187) * Rename HDqsort() to qsort() (#3193) * Rename HDqsort() to qsort() * Committing clang-format changes --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Rename HDpipe() to pipe() (#3192) Pipe is POSIX but implemented in Microsoft's CRT * Rename HDassert() to assert() (#3191) * Change HDassert to assert * Fix bin/make_err * Rename HD(f)printf() to (f)printf() (#3194) * Add note about HDF5_VOL_CONNECTOR to tools usage (#3159) * Rename HDsystem() to system() (#3197) system() is only used in the iopipe test and the things it calls (which are POSIX-y) are protected by an ifdef. * Remove HD from HDposix_memalign() (#3196) The posix_memalign call is only used in the direct VFD, which can only be built if posix_memalign() is available. * Remove HD from memory allocate/free calls (#3195) * HDcalloc * HDfree * HDmalloc * HDrealloc * chore: fix grammar (#3207) * docs: remove redundancy in Data Transfer section of user guide (#3208) * Remove checks for setsysinfo, which is unused (#3205) * Autotools * CMake * Remove HD from protected POSIX calls (#3203) These calls are non-C99 but protected by ifdefs and have no Windows equivalents: * HDalarm * HDasprintf * HDclock_gettime * HDfcntl * HDgethostname * HDgetrusage * HDsymlink * Rename HDato*() to ato*() (#3201) * Remove some "Programmer" comments (#3209) These are meaningless noise. Removes the "Programmer" lines on comment start lines: /* Programmer: John Smith These complicate my sed script that will rip out the rest of the comments. * Rename HDexit() and related to exit(), etc. (#3202) * HDatexit * HDexit * HD_exit * Remove HD from strto* calls (#3204) * HDstrtod * HDstrtol * HDstrtoll * HDstrtoul * HDstrtoull * HDstrtoumax * Remove HD from C std lib file ops (#3206) * HDfclose * HDferror * HDfeof * HDfflush * HDfopen * HDfread * HDfwrite * Remove programmer/date from comments (#3210) * Removes Programmer: and Date: fields * Fixes a few Modifications: fields leftover from previous work * Remove HD from HDmem* calls (#3211) * Remove HD from HDis* (e.g., isalpha) (#3212) * HDisalnum * HDisalpha * HDiscntrl * HDisdigit * HDisgraph * HDislower * HDisprint * HDispunct * HDisspace * HDisupper * HDisxdigit * Update actions for release option, fix branch for daily build (#3185) * Update actions for release option, fix branch for daily build * Scheduled workflows run on latest commit on the develop * Add snapshots location * docs: improve consistency in verb form (#3076) (#3188) * fix gh action if statements (#3213) * Adjust presets timeout and fix build script VS versions (#3215) * Several ros3vfd logging improvements * Committing clang-format changes * Update COPYING (#3231) Fixed old support URL. * addresses compilation fortran warnings on Frontier (#3236) * Fix doc for H5allocate_memory (#3240) * merge bbrelease to release (#3232) * merge bbrelease to release * Fix pre-req workflow * Replace support.hdfgroup.org URLs for alternative COPYING file (#3228) * Replace support.hdfgroup.org URLs for alternative COPYING file locations in copyright headers with https://www.hdfgroup.org/licenses. Replace support.hdfgroup.org URL for alternative COPYING_LBNL_HDF5 with github URL. Tweak chkcopyright script for change from UICOPYRIGHTSTR to THGCOPYRIGHTSTR. * Replace 1_10 reference with develop branch (#3227) * Switch CI to use release script (#3242) * Subfiling VFD source cleanup (#3241) * Subfiling VFD source cleanup Modularize Subfiling CMake code into separate CMakeLists.txt file Update Mercury util code to latest version and update Copyright Generate mercury_util_config.h header file instead of using pre-generated file Remove unnecessary Mercury functionality Fix minor warning in Subfiling VFD code * Remove Mercury headers from Autotools publicly-distributed header list * install h5fuse.sh in bin dir. (#3244) * Disable h5py until fixed properly without spack (#3243) * ROS3: (feature) Temporary security credentials (#3030) - Implemented support for AWS temporary security credentials. For this kind of credentials also a session/security token should be included in the request by adding the x-amz-security-token header. Co-authored-by: Larry Knox <lrknox@hdfgroup.org> Co-authored-by: Jordan Henderson <jhenderson@hdfgroup.org> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Avoid truncating at null byte when copying to std::string (#3083) --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Fix CMake builds when Subfiling VFD isn't enabled (#3250) * Fix CMake builds when Subfiling VFD isn't enabled * Add Subfiling VFD entry to hdf5-config.cmake.in * Fix some warnings in developer builds (#3247) * Fix some warnings in developer builds * Switch approach to Winline flag * Fixed more warnings about extra semicolons (#3249) * Require semi-colon after H5_CHECK_OVERFLOW calls Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Fix warning in H5C__UPDATE_STATS_FOR_DIRTY_PIN macro (#3259) Add braces to H5C__UPDATE_STATS_FOR_DIRTY_PIN macro to fix warning causing Werror Release builds to fail * Update DEFAULT_API_VERSION documentation for CMake (#3255) * Update DEFAULT_API_VERSION documentation for CMake * Fix hint --------- Co-authored-by: Larry Knox <lrknox@hdfgroup.org> * changed the scope of #ifdef DOXYGEN to now include H5D multi-functions (#3254) * Option changed but not all references (#3252) * Option changed but not all references * remove quotes from binary var * Move 1.12.3 release to October (#3263) * Fixed some -Wunused-variable warnings and one Wsometimes-uninitialized warning (#3260) * removed the use of encoded single apostrophe (#3261) * removed the use of encoded single apostrophe, and fix H5Dread_chunk from write to read * updated sanitizer paragraph * fixed brief description for H5Fget_info * ROS3: (fix) Replaced HDfprintf (#3266) - Replaced the HDfprintf() functions by fprintf() to be consistent with other parts of the library. * chore: make VRFY output consistent (#3268) * CMake: (fix) Threads dependency (#3267) - If the HDF5 library has been build with either thread-safety or subfiling VFD feature on it will have an additional dependency on a threading library. This dependency has been added to the hdf-config.cmake.in file. * chore: fix grammar - get hang -> get hung (#3272) * Another round of fixing -Wextra-semi-stmt warnings (#3264) Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * chore: fix typo - persent -> present (#3273) * Remove py-pip from h5py action (#3265) * Update release schedule (#3317) * Move 1.12.3 to November * Add 1.14.3 in October * Update README.md w/ 1.4.3 info (#3318) * Switch parallel compression to use vector I/O (#3245) Updates parallel compression feature to use vector I/O instead of creating and passing down MPI derived types to VFD * Fix incorrect error check in H5Ofill.c for undefined fill values (#3312) * Fix H5Otoken_to_str call in h5dump and other minor cleanup (#3314) * Fix loading plugin fails with missing directory GH issue #3248 (#3315) * Made HGOTO_ERROR a do-while loop (#3308) * Made HGOTO_ERROR a do-while loop * Update files to skip list and ignore_words_list (#3321) * Update files to skip list and ignore_words_list for codespell to not check files generated by autotools. Autotools generate misspellings that can't be fixed in HDF5 code. * Windows runtime items go into the bin folder (#3320) * A couple of documentation items to fix (#3332) * Fix h5repack for variable-length datatyped datasets (#3331) * Fix CVE-2018-11202 (#3330) A malformed file could result in chunk index memory leaks. Under most conditions (i.e., when the --enable-using-memchecker option is NOT used), this would result in a small memory leak and and infinite loop and abort when shutting down the library. The infinite loop would be due to the "free list" package not being able to clear its resources so the library couldn't shut down. When the "using a memory checker" option is used, the free lists are disabled so there is just a memory leak with no abort on library shutdown. The chunk index resources are now correctly cleaned up when reading misparsed files and valgrind confirms no memory leaks. * Fix CVE-2018-13867 (#3336) * Fixes the last of the -Wextra-semi-stmt warnings (#3326) * Fixed extra semi warning by adjusting alternative macro definitions * Find-replace H5E_END_TRY; -> H5E_END_TRY * Made H5Epush_goto a do-while loop, fixed indentation * Made GOTOERROR and ERRMSG do-while loops * Made Hgoto_error and Hgoto_done do-while loops * Made vrfy_cint_type and vrfy_ctype do-while loops * Made TEST_TYPE_CONTIG and others do-while loops * Removed extraneous semi-colons * Committing clang-format changes --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Fix a typo in RELEASE.txt * Fix assertion failure in H5D__mpio_collective_filtered_vec_io (#3340) * Make h5dump spacing consistent when printing VLEN datatype (#3351) * Fix for the bug exposed from running test/set_extent.c when selection… (#3319) * Fix for the bug exposed from running test/set_extent.c when selection I/O is enabled. This is a fix from Neil. The test/set_extent.c is modified to test for selection I/O enabled. * Add Fortran ES module to deploy list (#3341) * Add Fortran ES module to deploy list * Change fortran mod file export to use a list of names * test(parallel): verify FALSE case (#3356) * Implementation of the mpio driver with selection I/O. (#3222) * This changes the default selection I/O to on for MPIO. * Work around a testphdf5 failure on Cray MPICH machines (#3361) * set H5_PAC_C_MAX_REAL_PRECISION default to 0 when cross sompiling with (#3364) CMake to fix Fortran build failures. * Add RELEASE.txt notes for recent selection I/O work. (#3374) * Fix possible performance regression introduced with in-place type conversion in 1.14.2 (#3376) * Correct script (#3377) * Correct CI settings (#3384) * Correct CI settings * Correct plugin file name * restore CI tarball name prefix to match bin/release (#3385) * Fix assertion failure during file close on error (#3387) * Fix compile failures with H5C_DO_MEMORY_SANITY_CHECKS enabled (#3388) * Fix valgrind warning about write of uninitialized bytes (#3389) * Fix valgrind warning about write of uninitialized bytes in ScaleOffset filter (#3390) * Update presets, examples uncompress, szip cache (#3391) * Fix serial to parallel chunked dataset file space allocation bug (#3394) * chore: fix typo (#3405) * Fix for CVE-2016-4332 (#3406) This CVE issue was previously listed as fixed (via HDFFV-9950) back in 2016, but with no confirmation test. Now that test files exist for the 2016 Talos CVE issues, we found that CVE-2016-4332 can raise an assert in debug builds. This fix replaces the assert with pointer checks that don't raise errors or asserts. Since the function is in cleanup code, we do our best to close and free things, even when presented with partially- initialized structs. Fixes CVE-2016-4332 and HDFFV-9950 (confirmed via the cve_hdf5 repo) * Fix ph5diff tests for MPIEXEC_MAX_NUMPROCS=1 (#3407) * Enable szip by default in Autotools (#3412) Since libaec is so prevalent and BSD-licensed for both encode and decode, we build the szip filter by default when the szip or aec libraries are found. * Re-enable SZIP default to ON in CMake (#3414) The Autotools were handled in a separate commit * Fix Heap-buffer-overflow WRITE in H5MM_memcpy (#3368) * Add Intel oneAPI actions (#2949) (#2977) * ci: add Intel oneAPI actions (#2949) * ci: fix CMake installation * ci: use absolute paths for libtool installation * ci(oneAPI): update compiler versions and use cron * Add RELEASE.txt entry for compound performance regression fix (#3376) (#3416) * chore: remove gubbins comment (#3420) * Add other types and full type to enum/str/vlen dataformat for structblock[begin/end] (#3353) * Avoid H5Ocopy in h5repack for variable-length string types (#3419) * Skip atomicity tests for OpenMPI major versions < 5 (#3421) * Fix an issue with use of uninitialized memory in trefer_deprec.c test (#3422) * Add parallel examples in doxygen (#3413) * Fix use of uninitialized value in testpar/t_dset.c test (#3423) * Remove extraneous "33" in RELEASE.txt (#3425) * Revise file close assertion failure fix (#3418) * Remove intel oneapi warning (#3426) * Fix for CVE-2018-15671. h5stat -S $POC will result in a crash with segmenetation fault. (#3427) It is because the object in the testfile points back to the root group. When the tool tries to traverse the object, it goes back to the root group and then back again. * chore: match function call and VRFY() output (#3428) * Fix the Fortran extension used in example links (#3430) * Put H5T_CONV_ab macros in do..while loops (#3432) Ever since a recent round of macro cleanup, bin/trace and clang-format have been bickering over what H5Tconv.c should look like and neither produces readable code. This change puts the top-level H5T_CONV_ab macros in do..while loops, adds appropriate semicolons, and adds the missing H5_CLANG_DIAG_ON|OFF and H5_GCC_CLANG_DIAG_ON|OFF macros to the list of statement macros clang-format recognizes. H5Tconv.c is now readable and both bin/trace and clang-format are happy. * Convert some H5MM calls to standard C equivalents (#2382) * H5MM_calloc and malloc are now mapped to stdlib C calls * H5MM_memcpy now maps directly to memcpy in release builds * H5MM_memcpy is still implemented as a separate function that checks for buffer overlap when H5MM_DEBUG is defined (default w/ debug builds) * Switches many library memcpy calls to use H5MM_memcpy * Fixes a possible zero allocation in H5Olayout.c * Add 1.14.4 to the release schedule (#3434) * Output stderr file in CMake testing on failure (#3431) * Add Intel oneAPI badges (#3433) * Add a CVE regression test action (#3445) * * Disable SZIP for Intel oneAPI Action (#3449) * Disable SZIP for Intel oneAPI Action * Disable Fortran and parallel * Update VOL CMake for REST VOL (#3450) * Update VOL CMake for REST VOL * Prevent linking static libs to VOLs * Add an h5py badge to README.md (#3477) * Removed all the ranks printing out testing information (#3457) * Fix Subfiling VFD IOC assignment bug (#3456) * Correct java test dimension (#3482) * Support CMake VOL builds with FetchContent from local directory (#3455) * Update VOL CMake for REST VOL * Prevent linking static libs to VOLs * index on fetch_local: 5c5c3f1505 Prevent linking static libs to VOLs * index on (no branch): 9a36d3e7b1 On fetch_local: WIP:add source dir fetch option for vols * Allow building of VOL from local source * Move LOCAL_DIR option to HDF5_VOL_ALLOW_EXTERNAL * Fix the Fortran include dir in install config files (#3454) * Convert hbool_t --> bool in examples (#3492) * Fix some minor formatting for consistency (#3499) * Create scorecard.yml (#3508) Bring in OSSF Scorecard code scanner as a GitHub action * Convert hbool_t --> bool in testpar (#3495) * hbool_t/TRUE/FALSE --> bool/true/false in tools (#3491) * Convert hbool_t --> bool in test (#3494) * Convert hbool_t --> bool in src (#3496) * hbool_t --> bool in src * Does not remove TRUE/FALSE * Public header files are unchanged * Public API calls are unchanged * TRUE/FALSE --> true/false in src * Add deprecation notice for hbool_t * Added new Fortran API wrappers (#3511) * Added new wrappers for h5get_free_list_sizes_f H5Sselect_intersect_block_f H5Sselect_shape_same_f h5pget_no_selection_io_cause_f h5pget_mpio_no_collective_cause_f H5Lvisit_by_name_f H5Lvisit_f H5Fget_info_f h5dwrite_chunk_f h5dread_chunk_f * added h5pget_file_space_page_size_f, h5pset_file_space_page_size_f, h5pget_file_space_strategy_f, h5pset_file_space_strategy_f, h5info tests * added fortran tests * Update tH5F.F90 * misc. fortran fixes for failing CI dailty tests (#3523) * fixed H5Lvisit* interface * changed integer type for direct write * Consistent initialization of hid_t in the tests (#3521) * Fix windows cpack with debug (#3525) * Add missing row for the ROS3 VFD in table #3415 (#3517) * fixed nvidia compiler issue (#3527) * Identify functions in a subgroup (#3530) * quiet warning on sunspot (gcc 11.2.0) (#3534) * Add API examples doxygen page (#3500) * removed C_INT32_T from Fortran APIs (#3537) * Add NVHPC 23.7 GitHub Actions (#3509) * Add NVHPC 27.3 GitHub Actions * Address @derobins review * Remove HD prefix from math functions (#3538) * Remove HD prefix from HDlog10 calls (#3539) Was missed in a previous commit and causes building subfiling to fail. * fixed arg to C H5Dwrite_chunk (#3541) * Strip HD prefix from string/char C API calls (#3540) * Strip HD prefix from string/char C API calls * HD(f)(put|get)(s|c) * HDstr* * HDv*printf * HD(s)(print|scan)f * HDperror But NOT: * HDstrcase* * HDvasprintf * HDstrtok_r * HDstrndup As those are not C99 and have portability work-around implementations. They will be handled later. * Fix th5_system.c screwup * Convert main.yml CI into callable workflows (#3529) * Fix broken URL. (#3546) * Fix grammar (#3545) * Update oneAPI-C/A badge yml links. (#3564) * Check return values from HDF5 API calls. (#3556) * Adds link to h5fuse.sh in testpar for autotools (#3557) * Make the h5fuse.sh utility available to parallel subfiling tests so h5fuse testing is not skipped. * Some minor formatting and text changes (#3563) * Fix typos and grammar in t_pread. (#3565) * Fix typo (givin->given) in test/testframe.c. (#3567) * Fix ifx unused variable hdferr warning. (#3568) * Correct comments about H5Z_FILTER_NONE (#3572) * Update release script. (#3577) * fixed function declaration (#3579) * Fixed GH-3554 (#3584) Removed the extra condition * Remove h5dwalk.1 man page. (#3589) * Removed the use of -commons linking option on Darwin (#3581) Removed the use of -commons linking option on Darwin as COMMON and EQUIVALENCE is no long used * Fix docs for H5Pset_dxpl_mpio_collective_opt() (#3594) * Fix typo: arange->arrange in src/H5Cmpio.c. (#3597) * Fix docs for H5Acreate2 and H5Acreate_by_name (#3598) * Use HDoff_t with lseek consistently (#3600) lseek on Windows uses __int64 for both the offset and return type instead of off_t like most POSIX systems. This changes ensures we use HDoff_t (which is typdef'd correctly on Windows) w/ lseek. * Replaces HDgetenv with getenv (#3599) * Develop tools move (#3580) Reorganizes the tools files to support the VOL tests * Clean up Subfiling VFD header doxygen formatting (#3601) * Remove `sh` to run bash script. (#3590) * Correct path name of ddl file to be changed (#3607) * Fix potential uninitialized variable (#3602) Moves a union initialization up a bit so it's performed before code that can jump to the cleanup target, where file descriptors could be checked without being initialized. This could only happen in test code and only in an out-of-memory situation. Fixes Coverity 1542254 * Remove unnecessary assignment in test generator (#3603) Fixes what looks like a copy/paste/modify error in the format convert test file generator, where an array element is assigned one value and them immediately overwritten by another value. Fixes Coverity issue 1542285 * Remove useless define TRUE/FALSE statements. (#3604) * Fix typo behaviour and dependes. (#3605) * Fix typos (#3609) * Fixed unused variable in H5CS.c (#3552) (#3612) * Fixed #3552 * Fix grammar (#3614) * Cleanup unused statements (#3553) (#3617) Removed unnecessary assert statements and noise comments. * Fix Intel oneAPI icc warning (#3619) * Fix several spelling/grammar issues (#3621) * Add HPC CDash to README.md (#3623) * Disable static + thread-safe on Windows w/ CMake (#3622) The thread-safety feature on Windows requires a hook in DllMain() and thus is only available when HDF5 is built as a shared library. This was previously a warning, but has now been elevated to a fatal error that cannot be overridden with ALLOW_UNSUPPORTED. Fixes GitHub #3613 * Remove unused member from H5D_shared_t struct. (#3628) * Remove old EXTERNALPROJECT_ADD in favor of FETCH_CONTENT (#3624) * Fix grammar (#3635) * Disambiguate error output messages. (#3634) * Disambiguate error output messages. * Address @brtnfld review. * Fail CMake on Windows when sub-filing VFD is enabled (#3636) * Improve consistency in past tense usage (#3638) * Split out test logic to separate file (#3639) * Drop MPI-2 support (#3643) * Switch IEEE flags for NAG Fortran (#3644) Default is -ieee=stop, which causes problems when the H5T module performs floating-point type introspection. The new mode is -ieee=full * Remove 1.10.11 info from README.md (#3646) * Fixes GH#1027 compilation error (#3654) * Remove 1.10 badge (#3650) * Use real URLs and updated names for plugins (#3651) * synchronize TGZ naming convention/usage * Update parallel compression feature to support multi-dataset I/O (#3591) * Add more tests for selection I/O. (#3528) * Adjust 1.14 files after merging ---------
Diffstat (limited to 'src/H5Dmpio.c')
-rw-r--r--src/H5Dmpio.c2211
1 files changed, 1337 insertions, 874 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 82bcf02..8ba9a14 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -82,21 +82,10 @@
*/
#define H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, dset) \
do { \
- index_info.f = (dset)->oloc.file; \
- index_info.pline = &((dset)->shared->dcpl_cache.pline); \
- index_info.layout = &((dset)->shared->layout.u.chunk); \
- index_info.storage = &((dset)->shared->layout.storage.u.chunk); \
- } while (0)
-
-/*
- * Macro to initialize a H5D_chunk_ud_t structure
- * given a pointer to a H5D_chk_idx_info_t structure
- */
-#define H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, index_info_ptr) \
- do { \
- memset(&chunk_ud, 0, sizeof(H5D_chunk_ud_t)); \
- chunk_ud.common.layout = (index_info_ptr)->layout; \
- chunk_ud.common.storage = (index_info_ptr)->storage; \
+ (index_info).f = (dset)->oloc.file; \
+ (index_info).pline = &((dset)->shared->dcpl_cache.pline); \
+ (index_info).layout = &((dset)->shared->layout.u.chunk); \
+ (index_info).storage = &((dset)->shared->layout.storage.u.chunk); \
} while (0)
/******************/
@@ -129,16 +118,45 @@ typedef struct H5D_chunk_alloc_info_t {
H5F_block_t chunk_current;
H5F_block_t chunk_new;
hsize_t chunk_idx;
+ haddr_t dset_oloc_addr;
} H5D_chunk_alloc_info_t;
/*
* Information for a chunk pertaining to the dataset's chunk
- * index entry for the chunk
+ * index entry for the chunk.
+ *
+ * NOTE: To support efficient lookups of H5D_filtered_collective_chunk_info_t
+ * structures during parallel writes to filtered chunks, the
+ * chunk_idx and dset_oloc_addr fields of this structure are used
+ * together as a key for a hash table by following the approach
+ * outlined at https://troydhanson.github.io/uthash/userguide.html#_compound_keys.
+ * This means the following:
+ *
+ * - Instances of this structure should be memset to 0 when
+ * used for hashing to ensure that any padding between the
+ * chunk_idx and dset_oloc_addr fields does not affect the
+ * generated key.
+ *
+ * - The chunk_idx and dset_oloc_addr fields should be arranged
+ * in that specific order, as the code currently relies on
+ * this ordering when calculating the key length and it
+ * performs memory operations on the structure starting from
+ * the chunk_idx field and using the calculated key length.
+ *
+ * - The chunk_idx and dset_oloc_addr fields should ideally
+ * be arranged next to each other in the structure to minimize
+ * the calculated key length.
*/
typedef struct H5D_chunk_index_info_t {
- hsize_t chunk_idx;
+ /*
+ * These two fields must come in this order and next to
+ * each other for proper and efficient hashing
+ */
+ hsize_t chunk_idx;
+ haddr_t dset_oloc_addr;
+
unsigned filter_mask;
- hbool_t need_insert;
+ bool need_insert;
} H5D_chunk_index_info_t;
/*
@@ -219,8 +237,8 @@ typedef struct H5D_filtered_collective_chunk_info_t {
H5D_piece_info_t *chunk_info;
H5F_block_t chunk_current;
H5F_block_t chunk_new;
- hbool_t need_read;
- hbool_t skip_filter_pline;
+ bool need_read;
+ bool skip_filter_pline;
size_t io_size;
size_t chunk_buf_size;
int orig_owner;
@@ -232,6 +250,24 @@ typedef struct H5D_filtered_collective_chunk_info_t {
} H5D_filtered_collective_chunk_info_t;
/*
+ * Information cached about each dataset involved when performing
+ * collective I/O on filtered chunks.
+ */
+typedef struct H5D_mpio_filtered_dset_info_t {
+ const H5D_dset_io_info_t *dset_io_info;
+ H5D_fill_buf_info_t fb_info;
+ H5D_chk_idx_info_t chunk_idx_info;
+ hsize_t file_chunk_size;
+ haddr_t dset_oloc_addr;
+ H5S_t *fill_space;
+ bool should_fill;
+ bool fb_info_init;
+ bool index_empty;
+
+ UT_hash_handle hh;
+} H5D_mpio_filtered_dset_info_t;
+
+/*
* Top-level structure that contains an array of H5D_filtered_collective_chunk_info_t
* chunk info structures for collective filtered I/O, as well as other useful information.
* The struct's fields are as follows:
@@ -249,6 +285,10 @@ typedef struct H5D_filtered_collective_chunk_info_t {
* will contain the chunk's "chunk index" value that can be used for chunk
* lookup operations.
*
+ * chunk_hash_table_keylen - The calculated length of the key used for the chunk info hash
+ * table, depending on whether collective I/O is being performed
+ * on a single or multiple filtered datasets.
+ *
* num_chunks_infos - The number of entries in the `chunk_infos` array.
*
* num_chunks_to_read - The number of entries (or chunks) in the `chunk_infos` array that
@@ -263,12 +303,39 @@ typedef struct H5D_filtered_collective_chunk_info_t {
* of chunk info structures to determine how big of I/O vectors to
* allocate during read operations, as an example.
*
+ * all_dset_indices_empty - A boolean determining whether all the datasets involved in the
+ * I/O operation have empty chunk indices. If this is the case,
+ * collective read operations can be skipped during processing
+ * of chunks.
+ *
+ * no_dset_index_insert_methods - A boolean determining whether all the datasets involved
+ * in the I/O operation have no chunk index insertion
+ * methods. If this is the case, collective chunk reinsertion
+ * operations can be skipped during processing of chunks.
+ *
+ * single_dset_info - A pointer to a H5D_mpio_filtered_dset_info_t structure containing
+ * information that is used when performing collective I/O on a single
+ * filtered dataset.
+ *
+ * dset_info_hash_table - A hash table storing H5D_mpio_filtered_dset_info_t structures
+ * that is populated when performing collective I/O on multiple
+ * filtered datasets at a time using the multi-dataset I/O API
+ * routines.
+ *
*/
typedef struct H5D_filtered_collective_io_info_t {
H5D_filtered_collective_chunk_info_t *chunk_infos;
H5D_filtered_collective_chunk_info_t *chunk_hash_table;
+ size_t chunk_hash_table_keylen;
size_t num_chunk_infos;
size_t num_chunks_to_read;
+ bool all_dset_indices_empty;
+ bool no_dset_index_insert_methods;
+
+ union {
+ H5D_mpio_filtered_dset_info_t *single_dset_info;
+ H5D_mpio_filtered_dset_info_t *dset_info_hash_table;
+ } dset_info;
} H5D_filtered_collective_io_info_t;
/*
@@ -278,6 +345,7 @@ typedef struct H5D_filtered_collective_io_info_t {
typedef struct H5D_chunk_redistribute_info_t {
H5F_block_t chunk_block;
hsize_t chunk_idx;
+ haddr_t dset_oloc_addr;
int orig_owner;
int new_owner;
int num_writers;
@@ -299,11 +367,11 @@ typedef struct H5D_chunk_insert_info_t {
static herr_t H5D__piece_io(H5D_io_info_t *io_info);
static herr_t H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info,
int mpi_rank, int mpi_size);
-static herr_t H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info,
- int mpi_rank, int mpi_size);
+static herr_t H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_infos,
+ size_t num_dset_infos, int mpi_rank, int mpi_size);
static herr_t H5D__link_piece_collective_io(H5D_io_info_t *io_info, int mpi_rank);
-static herr_t H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info,
- int mpi_rank, int mpi_size);
+static herr_t H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_infos,
+ size_t num_dset_infos, int mpi_rank, int mpi_size);
static herr_t H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di,
H5S_t *file_space, H5S_t *mem_space);
static herr_t H5D__final_collective_io(H5D_io_info_t *io_info, hsize_t mpi_buf_count,
@@ -314,48 +382,44 @@ static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info, int *sum_chu
static herr_t H5D__mpio_get_sum_chunk_dset(const H5D_io_info_t *io_info, const H5D_dset_io_info_t *dset_info,
int *sum_chunkf);
static herr_t H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info,
- const H5D_dset_io_info_t *di, int mpi_rank,
+ const H5D_dset_io_info_t *di,
+ size_t num_dset_infos, int mpi_rank,
H5D_filtered_collective_io_info_t *chunk_list);
static herr_t H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_list,
const H5D_io_info_t *io_info, int mpi_rank, int mpi_size,
size_t **rank_chunks_assigned_map);
static herr_t H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list,
- size_t *num_chunks_assigned_map,
- hbool_t all_ranks_involved,
- const H5D_io_info_t *io_info, int mpi_rank,
- int mpi_size);
+ size_t *num_chunks_assigned_map,
+ bool all_ranks_involved, const H5D_io_info_t *io_info,
+ int mpi_rank, int mpi_size);
static herr_t H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list,
- H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info,
- int mpi_rank, int H5_ATTR_NDEBUG_UNUSED mpi_size,
- unsigned char ***chunk_msg_bufs,
- int *chunk_msg_bufs_len);
+ H5D_io_info_t *io_info, int mpi_rank,
+ int H5_ATTR_NDEBUG_UNUSED mpi_size,
+ unsigned char ***chunk_msg_bufs,
+ int *chunk_msg_bufs_len);
static herr_t H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list,
- const H5D_io_info_t *io_info,
- const H5D_dset_io_info_t *di, int mpi_rank);
+ const H5D_io_info_t *io_info, size_t num_dset_infos,
+ int mpi_rank);
static herr_t H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list,
unsigned char **chunk_msg_bufs,
int chunk_msg_bufs_len, const H5D_io_info_t *io_info,
- const H5D_dset_io_info_t *di,
- int H5_ATTR_NDEBUG_UNUSED mpi_rank);
+ size_t num_dset_infos, int mpi_rank);
static herr_t H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list,
- size_t *num_chunks_assigned_map,
- H5D_io_info_t *io_info,
- H5D_chk_idx_info_t *idx_info, int mpi_rank,
- int mpi_size);
+ size_t *num_chunks_assigned_map,
+ H5D_io_info_t *io_info, size_t num_dset_infos,
+ int mpi_rank, int mpi_size);
static herr_t H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list,
size_t *num_chunks_assigned_map,
- H5D_io_info_t *io_info, H5D_dset_io_info_t *di,
- H5D_chk_idx_info_t *idx_info, int mpi_rank,
- int mpi_size);
+ H5D_io_info_t *io_info, size_t num_dset_infos,
+ int mpi_rank, int mpi_size);
static herr_t H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type,
- hbool_t *contig_type_derived,
+ bool *contig_type_derived,
MPI_Datatype *resized_type,
- hbool_t *resized_type_derived);
-static herr_t H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
- MPI_Datatype *resized_type, hbool_t *resized_type_derived);
-static herr_t H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
- MPI_Datatype *resized_type,
- hbool_t *resized_type_derived);
+ bool *resized_type_derived);
+static herr_t H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, bool *contig_type_derived,
+ MPI_Datatype *resized_type, bool *resized_type_derived);
+static herr_t H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, bool *contig_type_derived,
+ MPI_Datatype *resized_type, bool *resized_type_derived);
static herr_t H5D__mpio_collective_filtered_vec_io(const H5D_filtered_collective_io_info_t *chunk_list,
H5F_shared_t *f_sh, H5D_io_op_type_t op_type);
static int H5D__cmp_piece_addr(const void *chunk_addr_info1, const void *chunk_addr_info2);
@@ -411,7 +475,7 @@ H5FL_EXTERN(H5S_sel_iter_t);
*/
static int H5D_mpio_debug_flags_s[256];
static int H5D_mpio_debug_rank_s[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-static hbool_t H5D_mpio_debug_inited = FALSE;
+static bool H5D_mpio_debug_inited = false;
static const char *const trace_in_pre = "-> ";
static const char *const trace_out_pre = "<- ";
static int debug_indent = 0;
@@ -444,27 +508,27 @@ static FILE *debug_stream = NULL;
#define H5D_MPIO_TRACE_ENTER(rank) \
do { \
- hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \
+ bool trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \
\
if (trace_flag) { \
H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_in_pre, __func__); \
- debug_indent += (int)HDstrlen(trace_in_pre); \
+ debug_indent += (int)strlen(trace_in_pre); \
} \
} while (0)
#define H5D_MPIO_TRACE_EXIT(rank) \
do { \
- hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \
+ bool trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \
\
if (trace_flag) { \
- debug_indent -= (int)HDstrlen(trace_out_pre); \
+ debug_indent -= (int)strlen(trace_out_pre); \
H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_out_pre, __func__); \
} \
} while (0)
#define H5D_MPIO_TIME_START(rank, op_name) \
{ \
- hbool_t time_flag = H5D_mpio_debug_flags_s[(int)'c']; \
+ bool time_flag = H5D_mpio_debug_flags_s[(int)'c']; \
double start_time = 0.0, end_time = 0.0; \
const char *const op = op_name; \
\
@@ -499,7 +563,7 @@ H5D__mpio_parse_debug_str(const char *s)
int c = (int)(*s);
if (c >= (int)'0' && c <= (int)'9') {
- hbool_t range = FALSE;
+ bool range = false;
if (*(s + 1) && *(s + 2))
range = (int)*(s + 1) == '-' && (int)*(s + 2) >= (int)'0' && (int)*(s + 2) <= (int)'9';
@@ -546,14 +610,14 @@ H5D__mpio_debug_init(void)
memset(H5D_mpio_debug_flags_s, 0, sizeof(H5D_mpio_debug_flags_s));
/* Retrieve and parse the H5Dmpio debug string */
- debug_str = HDgetenv("H5D_mpio_Debug");
+ debug_str = getenv("H5D_mpio_Debug");
if (debug_str)
H5D__mpio_parse_debug_str(debug_str);
if (H5DEBUG(D))
debug_stream = H5DEBUG(D);
- H5D_mpio_debug_inited = TRUE;
+ H5D_mpio_debug_inited = true;
FUNC_LEAVE_NOAPI(ret_value)
}
@@ -569,7 +633,7 @@ H5D__mpio_debug_init(void)
* This was derived from H5D__mpio_opt_possible for
* multi-dset work.
*
- * Return: Success: Non-negative: TRUE or FALSE
+ * Return: Success: Non-negative: true or false
* Failure: Negative
*
*-------------------------------------------------------------------------
@@ -587,7 +651,7 @@ H5D__mpio_opt_possible(H5D_io_info_t *io_info)
/* [1] Flag if dataset is both: H5S_ALL and small */
unsigned global_cause[2] = {0, 0}; /* Global reason(s) for breaking collective mode */
htri_t is_vl_storage; /* Whether the dataset's datatype is stored in a variable-length form */
- htri_t ret_value = TRUE; /* Return value */
+ htri_t ret_value = true; /* Return value */
FUNC_ENTER_PACKAGE
@@ -638,8 +702,8 @@ H5D__mpio_opt_possible(H5D_io_info_t *io_info)
}
/* Check whether these are both simple or scalar dataspaces */
- if (!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) ||
- H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space)) &&
+ if (!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space) ||
+ H5S_NULL == H5S_GET_EXTENT_TYPE(mem_space)) &&
(H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) ||
H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space))))
local_cause[0] |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
@@ -740,14 +804,14 @@ H5D__mpio_opt_possible(H5D_io_info_t *io_info)
/* Set read-with-rank0-and-bcast flag if possible */
if (global_cause[0] == 0 && global_cause[1] == 0) {
- H5CX_set_mpio_rank0_bcast(TRUE);
+ H5CX_set_mpio_rank0_bcast(true);
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- H5CX_test_set_mpio_coll_rank0_bcast(TRUE);
+ H5CX_test_set_mpio_coll_rank0_bcast(true);
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
} /* end if */
/* Set the return value, based on the global cause */
- ret_value = global_cause[0] > 0 ? FALSE : TRUE;
+ ret_value = global_cause[0] > 0 ? false : true;
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -863,14 +927,14 @@ H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len, c
* so, prepend a semicolon to separate the messages.
*/
if (buf_space_left && local_cause_bytes_written) {
- HDstrncat(local_cause, "; ", buf_space_left);
+ strncat(local_cause, "; ", buf_space_left);
local_cause_bytes_written += MIN(buf_space_left, 2);
buf_space_left -= MIN(buf_space_left, 2);
}
if (buf_space_left) {
- HDstrncat(local_cause, cause_str, buf_space_left);
- local_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str));
+ strncat(local_cause, cause_str, buf_space_left);
+ local_cause_bytes_written += MIN(buf_space_left, strlen(cause_str));
}
}
@@ -886,14 +950,14 @@ H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len, c
* so, prepend a semicolon to separate the messages.
*/
if (buf_space_left && global_cause_bytes_written) {
- HDstrncat(global_cause, "; ", buf_space_left);
+ strncat(global_cause, "; ", buf_space_left);
global_cause_bytes_written += MIN(buf_space_left, 2);
buf_space_left -= MIN(buf_space_left, 2);
}
if (buf_space_left) {
- HDstrncat(global_cause, cause_str, buf_space_left);
- global_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str));
+ strncat(global_cause, cause_str, buf_space_left);
+ global_cause_bytes_written += MIN(buf_space_left, strlen(cause_str));
}
}
}
@@ -1076,12 +1140,12 @@ H5D__piece_io(H5D_io_info_t *io_info)
{
H5FD_mpio_chunk_opt_t chunk_opt_mode;
#ifdef H5Dmpio_DEBUG
- hbool_t log_file_flag = FALSE;
- FILE *debug_log_file = NULL;
+ bool log_file_flag = false;
+ FILE *debug_log_file = NULL;
#endif
int io_option = H5D_MULTI_CHUNK_IO_MORE_OPT;
- hbool_t recalc_io_option = FALSE;
- hbool_t use_multi_dset = FALSE;
+ bool recalc_io_option = false;
+ bool use_multi_dset = false;
unsigned one_link_chunk_io_threshold; /* Threshold to use single collective I/O for all chunks */
int sum_chunk = -1;
int mpi_rank;
@@ -1113,7 +1177,7 @@ H5D__piece_io(H5D_io_info_t *io_info)
char debug_log_filename[1024];
time_t time_now;
- HDsnprintf(debug_log_filename, 1024, "H5Dmpio_debug.rank%d", mpi_rank);
+ snprintf(debug_log_filename, 1024, "H5Dmpio_debug.rank%d", mpi_rank);
if (NULL == (debug_log_file = fopen(debug_log_filename, "a")))
HGOTO_ERROR(H5E_IO, H5E_OPENERROR, FAIL, "couldn't open debugging log file");
@@ -1138,19 +1202,12 @@ H5D__piece_io(H5D_io_info_t *io_info)
else if (H5FD_MPIO_CHUNK_MULTI_IO == chunk_opt_mode)
io_option = H5D_MULTI_CHUNK_IO;
else
- recalc_io_option = TRUE;
+ recalc_io_option = true;
/* Check if we can and should use multi dataset path */
if (io_info->count > 1 && (io_option == H5D_ONE_LINK_CHUNK_IO || recalc_io_option)) {
/* Use multi dataset path for now */
- use_multi_dset = TRUE;
-
- /* Check for filtered datasets */
- for (i = 0; i < io_info->count; i++)
- if (io_info->dsets_info[i].dset->shared->dcpl_cache.pline.nused > 0) {
- use_multi_dset = FALSE;
- break;
- }
+ use_multi_dset = true;
/* Check if this I/O exceeds one linked chunk threshold */
if (recalc_io_option && use_multi_dset) {
@@ -1170,31 +1227,45 @@ H5D__piece_io(H5D_io_info_t *io_info)
* chunk IO. If this threshold is not exceeded for all datasets, no need to check it again
* for each individual dataset. */
if ((unsigned)sum_chunk / (unsigned)mpi_size < one_link_chunk_io_threshold) {
- recalc_io_option = FALSE;
- use_multi_dset = FALSE;
+ recalc_io_option = false;
+ use_multi_dset = false;
}
}
}
+ }
- /* Perform multi dataset I/O if appropriate */
- if (use_multi_dset) {
+ /* Perform multi dataset I/O if appropriate */
+ if (use_multi_dset) {
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- /*** Set collective chunk user-input optimization API. ***/
- if (H5D_ONE_LINK_CHUNK_IO == io_option) {
- if (H5CX_test_set_mpio_coll_chunk_link_hard(0) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "unable to set property value");
- } /* end if */
-#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+ /*** Set collective chunk user-input optimization API. ***/
+ if (H5D_ONE_LINK_CHUNK_IO == io_option) {
+ if (H5CX_test_set_mpio_coll_chunk_link_hard(0) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "unable to set property value");
+ } /* end if */
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* Process all the filtered datasets first */
+ if (io_info->filtered_count > 0) {
+ if (H5D__link_chunk_filtered_collective_io(io_info, io_info->dsets_info, io_info->count, mpi_rank,
+ mpi_size) < 0)
+ HGOTO_ERROR(H5E_IO, (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish filtered linked chunk MPI-IO");
+ }
+ /* Process all the unfiltered datasets */
+ if ((io_info->filtered_count == 0) || (io_info->filtered_count < io_info->count)) {
/* Perform unfiltered link chunk collective IO */
if (H5D__link_piece_collective_io(io_info, mpi_rank) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish linked chunk MPI-IO");
+ HGOTO_ERROR(H5E_IO, (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish linked chunk MPI-IO");
}
}
-
- if (!use_multi_dset) {
+ else {
/* Loop over datasets */
for (i = 0; i < io_info->count; i++) {
+ if (io_info->dsets_info[i].skip_io)
+ continue;
+
if (io_info->dsets_info[i].layout->type == H5D_CONTIGUOUS) {
/* Contiguous: call H5D__inter_collective_io() directly */
H5D_mpio_actual_io_mode_t actual_io_mode = H5D_MPIO_CONTIGUOUS_COLLECTIVE;
@@ -1205,7 +1276,8 @@ H5D__piece_io(H5D_io_info_t *io_info)
if (H5D__inter_collective_io(io_info, &io_info->dsets_info[i],
io_info->dsets_info[i].file_space,
io_info->dsets_info[i].mem_space) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish shared collective MPI-IO");
+ HGOTO_ERROR(H5E_IO, (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish shared collective MPI-IO");
/* Set the actual I/O mode property. internal_collective_io will not break to
* independent I/O, so we set it here.
@@ -1226,7 +1298,7 @@ H5D__piece_io(H5D_io_info_t *io_info)
/* If the threshold is 0, no need to check number of chunks */
if (one_link_chunk_io_threshold == 0) {
io_option = H5D_ONE_LINK_CHUNK_IO_MORE_OPT;
- recalc_io_option = FALSE;
+ recalc_io_option = false;
}
else {
/* Get number of chunks for all processes */
@@ -1250,28 +1322,34 @@ H5D__piece_io(H5D_io_info_t *io_info)
case H5D_ONE_LINK_CHUNK_IO_MORE_OPT:
/* Check if there are any filters in the pipeline */
if (io_info->dsets_info[i].dset->shared->dcpl_cache.pline.nused > 0) {
- if (H5D__link_chunk_filtered_collective_io(io_info, &io_info->dsets_info[i],
+ if (H5D__link_chunk_filtered_collective_io(io_info, &io_info->dsets_info[i], 1,
mpi_rank, mpi_size) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
- "couldn't finish filtered linked chunk MPI-IO");
+ HGOTO_ERROR(
+ H5E_IO,
+ (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish filtered linked chunk MPI-IO");
} /* end if */
else {
/* If there is more than one dataset we cannot make the multi dataset call here,
* fall back to multi chunk */
if (io_info->count > 1) {
io_option = H5D_MULTI_CHUNK_IO_MORE_OPT;
- recalc_io_option = TRUE;
+ recalc_io_option = true;
if (H5D__multi_chunk_collective_io(io_info, &io_info->dsets_info[i], mpi_rank,
mpi_size) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
- "couldn't finish optimized multiple chunk MPI-IO");
+ HGOTO_ERROR(
+ H5E_IO,
+ (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish optimized multiple chunk MPI-IO");
}
else {
/* Perform unfiltered link chunk collective IO */
if (H5D__link_piece_collective_io(io_info, mpi_rank) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
- "couldn't finish linked chunk MPI-IO");
+ HGOTO_ERROR(
+ H5E_IO,
+ (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish linked chunk MPI-IO");
}
}
@@ -1281,17 +1359,21 @@ H5D__piece_io(H5D_io_info_t *io_info)
default: /* multiple chunk IO via threshold */
/* Check if there are any filters in the pipeline */
if (io_info->dsets_info[i].dset->shared->dcpl_cache.pline.nused > 0) {
- if (H5D__multi_chunk_filtered_collective_io(io_info, &io_info->dsets_info[i],
+ if (H5D__multi_chunk_filtered_collective_io(io_info, &io_info->dsets_info[i], 1,
mpi_rank, mpi_size) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
- "couldn't finish optimized multiple filtered chunk MPI-IO");
+ HGOTO_ERROR(
+ H5E_IO,
+ (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish optimized multiple filtered chunk MPI-IO");
} /* end if */
else {
/* Perform unfiltered multi chunk collective IO */
if (H5D__multi_chunk_collective_io(io_info, &io_info->dsets_info[i], mpi_rank,
mpi_size) < 0)
- HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
- "couldn't finish optimized multiple chunk MPI-IO");
+ HGOTO_ERROR(
+ H5E_IO,
+ (H5D_IO_OP_READ == io_info->op_type ? H5E_READERROR : H5E_WRITEERROR),
+ FAIL, "couldn't finish optimized multiple chunk MPI-IO");
}
break;
@@ -1409,31 +1491,40 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
#endif
{
MPI_Datatype chunk_final_mtype; /* Final memory MPI datatype for all chunks with selection */
- hbool_t chunk_final_mtype_is_derived = FALSE;
+ bool chunk_final_mtype_is_derived = false;
MPI_Datatype chunk_final_ftype; /* Final file MPI datatype for all chunks with selection */
- hbool_t chunk_final_ftype_is_derived = FALSE;
+ bool chunk_final_ftype_is_derived = false;
H5D_storage_t ctg_store; /* Storage info for "fake" contiguous dataset */
MPI_Datatype *chunk_mtype = NULL;
MPI_Datatype *chunk_ftype = NULL;
MPI_Aint *chunk_file_disp_array = NULL;
MPI_Aint *chunk_mem_disp_array = NULL;
- hbool_t *chunk_mft_is_derived_array =
- NULL; /* Flags to indicate each chunk's MPI file datatype is derived */
- hbool_t *chunk_mbt_is_derived_array =
+ bool *chunk_mft_is_derived_array = NULL; /* Flags to indicate each chunk's MPI file datatype is derived */
+ bool *chunk_mbt_is_derived_array =
NULL; /* Flags to indicate each chunk's MPI memory datatype is derived */
int *chunk_mpi_file_counts = NULL; /* Count of MPI file datatype for each chunk */
int *chunk_mpi_mem_counts = NULL; /* Count of MPI memory datatype for each chunk */
int mpi_code; /* MPI return code */
H5D_mpio_actual_chunk_opt_mode_t actual_chunk_opt_mode = H5D_MPIO_LINK_CHUNK;
H5D_mpio_actual_io_mode_t actual_io_mode = 0;
- size_t i; /* Local index variable */
- herr_t ret_value = SUCCEED;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_PACKAGE
/* set actual_io_mode */
- for (i = 0; i < io_info->count; i++) {
- assert(io_info->dsets_info[i].dset->shared->dcpl_cache.pline.nused == 0);
+ for (size_t i = 0; i < io_info->count; i++) {
+ /* Skip this dataset if no I/O is being performed */
+ if (io_info->dsets_info[i].skip_io)
+ continue;
+
+ /* Filtered datasets are processed elsewhere. A contiguous dataset
+ * could possibly have filters in the DCPL pipeline, but the library
+ * will currently ignore optional filters in that case.
+ */
+ if ((io_info->dsets_info[i].dset->shared->dcpl_cache.pline.nused > 0) &&
+ (io_info->dsets_info[i].layout->type != H5D_CONTIGUOUS))
+ continue;
+
if (io_info->dsets_info[i].layout->type == H5D_CHUNKED)
actual_io_mode |= H5D_MPIO_CHUNK_COLLECTIVE;
else if (io_info->dsets_info[i].layout->type == H5D_CONTIGUOUS)
@@ -1460,8 +1551,9 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
H5_flexible_const_ptr_t base_buf_addr;
base_buf_addr.cvp = NULL;
- /* Get the number of chunks with a selection */
- num_chunk = io_info->pieces_added;
+ /* Get the number of unfiltered chunks with a selection */
+ assert(io_info->filtered_pieces_added <= io_info->pieces_added);
+ num_chunk = io_info->pieces_added - io_info->filtered_pieces_added;
H5_CHECK_OVERFLOW(num_chunk, size_t, int);
#ifdef H5Dmpio_DEBUG
@@ -1470,15 +1562,15 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
/* Set up MPI datatype for chunks selected */
if (num_chunk) {
- hbool_t need_sort = FALSE;
+ bool need_sort = false;
/* Check if sel_pieces array is sorted */
assert(io_info->sel_pieces[0]->faddr != HADDR_UNDEF);
- for (i = 1; i < num_chunk; i++) {
+ for (size_t i = 1; i < io_info->pieces_added; i++) {
assert(io_info->sel_pieces[i]->faddr != HADDR_UNDEF);
if (io_info->sel_pieces[i]->faddr < io_info->sel_pieces[i - 1]->faddr) {
- need_sort = TRUE;
+ need_sort = true;
break;
}
}
@@ -1504,18 +1596,27 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk memory counts buffer");
if (NULL == (chunk_mpi_file_counts = (int *)H5MM_calloc(num_chunk * sizeof(int))))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file counts buffer");
- if (NULL == (chunk_mbt_is_derived_array = (hbool_t *)H5MM_calloc(num_chunk * sizeof(hbool_t))))
+ if (NULL == (chunk_mbt_is_derived_array = (bool *)H5MM_calloc(num_chunk * sizeof(bool))))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
"couldn't allocate chunk memory is derived datatype flags buffer");
- if (NULL == (chunk_mft_is_derived_array = (hbool_t *)H5MM_calloc(num_chunk * sizeof(hbool_t))))
+ if (NULL == (chunk_mft_is_derived_array = (bool *)H5MM_calloc(num_chunk * sizeof(bool))))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
"couldn't allocate chunk file is derived datatype flags buffer");
- /* save lowest file address */
- ctg_store.contig.dset_addr = io_info->sel_pieces[0]->faddr;
-
- /* save base mem addr of piece for read/write */
- base_buf_addr = io_info->sel_pieces[0]->dset_info->buf;
+ /*
+ * After sorting sel_pieces according to file address, locate
+ * the first unfiltered chunk and save its file address and
+ * base memory address for read/write
+ */
+ ctg_store.contig.dset_addr = HADDR_UNDEF;
+ for (size_t i = 0; i < io_info->pieces_added; i++) {
+ if (!io_info->sel_pieces[i]->filtered_dset) {
+ ctg_store.contig.dset_addr = io_info->sel_pieces[i]->faddr;
+ base_buf_addr = io_info->sel_pieces[i]->dset_info->buf;
+ break;
+ }
+ }
+ assert(ctg_store.contig.dset_addr != HADDR_UNDEF);
#ifdef H5Dmpio_DEBUG
H5D_MPIO_DEBUG(mpi_rank, "before iterate over selected pieces\n");
@@ -1523,34 +1624,38 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
/* Obtain MPI derived datatype from all individual pieces */
/* Iterate over selected pieces for this process */
- for (i = 0; i < num_chunk; i++) {
+ for (size_t i = 0, curr_idx = 0; i < io_info->pieces_added; i++) {
hsize_t *permute_map = NULL; /* array that holds the mapping from the old,
out-of-order displacements to the in-order
displacements of the MPI datatypes of the
point selection of the file space */
- hbool_t is_permuted = FALSE;
+ bool is_permuted = false;
/* Assign convenience pointer to piece info */
piece_info = io_info->sel_pieces[i];
+ /* Skip over filtered pieces as they are processed elsewhere */
+ if (piece_info->filtered_dset)
+ continue;
+
/* Obtain disk and memory MPI derived datatype */
/* NOTE: The permute_map array can be allocated within H5S_mpio_space_type
* and will be fed into the next call to H5S_mpio_space_type
* where it will be freed.
*/
if (H5S_mpio_space_type(piece_info->fspace, piece_info->dset_info->type_info.src_type_size,
- &chunk_ftype[i], /* OUT: datatype created */
- &chunk_mpi_file_counts[i], /* OUT */
- &(chunk_mft_is_derived_array[i]), /* OUT */
- TRUE, /* this is a file space,
- so permute the
- datatype if the point
- selections are out of
- order */
- &permute_map, /* OUT: a map to indicate the
- permutation of points
- selected in case they
- are out of order */
+ &chunk_ftype[curr_idx], /* OUT: datatype created */
+ &chunk_mpi_file_counts[curr_idx], /* OUT */
+ &(chunk_mft_is_derived_array[curr_idx]), /* OUT */
+ true, /* this is a file space,
+ so permute the
+ datatype if the point
+ selections are out of
+ order */
+ &permute_map, /* OUT: a map to indicate the
+ permutation of points
+ selected in case they
+ are out of order */
&is_permuted /* OUT */) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI file type");
@@ -1558,20 +1663,20 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
if (is_permuted)
assert(permute_map);
if (H5S_mpio_space_type(piece_info->mspace, piece_info->dset_info->type_info.dst_type_size,
- &chunk_mtype[i], &chunk_mpi_mem_counts[i],
- &(chunk_mbt_is_derived_array[i]), FALSE, /* this is a memory
- space, so if the file
- space is not
- permuted, there is no
- need to permute the
- datatype if the point
- selections are out of
- order*/
- &permute_map, /* IN: the permutation map
- generated by the
- file_space selection
- and applied to the
- memory selection */
+ &chunk_mtype[curr_idx], &chunk_mpi_mem_counts[curr_idx],
+ &(chunk_mbt_is_derived_array[curr_idx]), false, /* this is a memory
+ space, so if the
+ file space is not
+ permuted, there is
+ no need to permute
+ the datatype if the
+ point selections
+ are out of order */
+ &permute_map, /* IN: the permutation map
+ generated by the
+ file_space selection
+ and applied to the
+ memory selection */
&is_permuted /* IN */) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI buf type");
/* Sanity check */
@@ -1581,16 +1686,19 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
/* Piece address relative to the first piece addr
* Assign piece address to MPI displacement
* (assume MPI_Aint big enough to hold it) */
- chunk_file_disp_array[i] = (MPI_Aint)piece_info->faddr - (MPI_Aint)ctg_store.contig.dset_addr;
+ chunk_file_disp_array[curr_idx] =
+ (MPI_Aint)piece_info->faddr - (MPI_Aint)ctg_store.contig.dset_addr;
if (io_info->op_type == H5D_IO_OP_WRITE) {
- chunk_mem_disp_array[i] =
+ chunk_mem_disp_array[curr_idx] =
(MPI_Aint)piece_info->dset_info->buf.cvp - (MPI_Aint)base_buf_addr.cvp;
}
else if (io_info->op_type == H5D_IO_OP_READ) {
- chunk_mem_disp_array[i] =
+ chunk_mem_disp_array[curr_idx] =
(MPI_Aint)piece_info->dset_info->buf.vp - (MPI_Aint)base_buf_addr.vp;
}
+
+ curr_idx++;
} /* end for */
/* Create final MPI derived datatype for the file */
@@ -1601,7 +1709,7 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&chunk_final_ftype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
- chunk_final_ftype_is_derived = TRUE;
+ chunk_final_ftype_is_derived = true;
/* Create final MPI derived datatype for memory */
if (MPI_SUCCESS !=
@@ -1610,10 +1718,10 @@ H5D__link_piece_collective_io(H5D_io_info_t *io_info, int H5_ATTR_UNUSED mpi_ran
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&chunk_final_mtype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
- chunk_final_mtype_is_derived = TRUE;
+ chunk_final_mtype_is_derived = true;
/* Free the file & memory MPI datatypes for each chunk */
- for (i = 0; i < num_chunk; i++) {
+ for (size_t i = 0; i < num_chunk; i++) {
if (chunk_mbt_is_derived_array[i])
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(chunk_mtype + i)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
@@ -1658,6 +1766,9 @@ done:
ret_value);
#endif
+ if (ret_value < 0)
+ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
+
/* Release resources */
if (chunk_mtype)
H5MM_xfree(chunk_mtype);
@@ -1754,8 +1865,8 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info, int mpi_rank,
- int mpi_size)
+H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_infos,
+ size_t num_dset_infos, int mpi_rank, int mpi_size)
{
H5D_filtered_collective_io_info_t chunk_list = {0};
unsigned char **chunk_msg_bufs = NULL;
@@ -1763,7 +1874,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
int chunk_msg_bufs_len = 0;
herr_t ret_value = SUCCEED;
- FUNC_ENTER_PACKAGE_TAG(dset_info->dset->oloc.addr)
+ FUNC_ENTER_PACKAGE
assert(io_info);
@@ -1784,18 +1895,15 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
/* Build a list of selected chunks in the collective io operation */
- if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, dset_info, mpi_rank, &chunk_list) < 0)
+ if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, dset_infos, num_dset_infos, mpi_rank,
+ &chunk_list) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list");
if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */
- if (H5D__mpio_collective_filtered_chunk_read(&chunk_list, io_info, dset_info, mpi_rank) < 0)
+ if (H5D__mpio_collective_filtered_chunk_read(&chunk_list, io_info, num_dset_infos, mpi_rank) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks");
}
else { /* Filtered collective write */
- H5D_chk_idx_info_t index_info;
-
- H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, dset_info->dset);
-
if (mpi_size > 1) {
/* Redistribute shared chunks being written to */
if (H5D__mpio_redistribute_shared_chunks(&chunk_list, io_info, mpi_rank, mpi_size,
@@ -1803,7 +1911,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks");
/* Send any chunk modification messages for chunks this rank no longer owns */
- if (H5D__mpio_share_chunk_modification_data(&chunk_list, io_info, dset_info, mpi_rank, mpi_size,
+ if (H5D__mpio_share_chunk_modification_data(&chunk_list, io_info, mpi_rank, mpi_size,
&chunk_msg_bufs, &chunk_msg_bufs_len) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"unable to send chunk modification data between MPI ranks");
@@ -1818,7 +1926,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
* must participate.
*/
if (H5D__mpio_collective_filtered_chunk_update(&chunk_list, chunk_msg_bufs, chunk_msg_bufs_len,
- io_info, dset_info, mpi_rank) < 0)
+ io_info, num_dset_infos, mpi_rank) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks");
/* Free up resources used by chunk hash table now that we're done updating chunks */
@@ -1826,7 +1934,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
/* All ranks now collectively re-allocate file space for all chunks */
if (H5D__mpio_collective_filtered_chunk_reallocate(&chunk_list, rank_chunks_assigned_map, io_info,
- &index_info, mpi_rank, mpi_size) < 0)
+ num_dset_infos, mpi_rank, mpi_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"couldn't collectively re-allocate file space for chunks");
@@ -1846,12 +1954,15 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_
* into the chunk index
*/
if (H5D__mpio_collective_filtered_chunk_reinsert(&chunk_list, rank_chunks_assigned_map, io_info,
- dset_info, &index_info, mpi_rank, mpi_size) < 0)
+ num_dset_infos, mpi_rank, mpi_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"couldn't collectively re-insert modified chunks into chunk index");
}
done:
+ if (ret_value < 0)
+ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
+
if (chunk_msg_bufs) {
for (size_t i = 0; i < (size_t)chunk_msg_bufs_len; i++)
H5MM_free(chunk_msg_bufs[i]);
@@ -1861,6 +1972,9 @@ done:
HASH_CLEAR(hh, chunk_list.chunk_hash_table);
+ if (rank_chunks_assigned_map)
+ H5MM_free(rank_chunks_assigned_map);
+
/* Free resources used by a rank which had some selection */
if (chunk_list.chunk_infos) {
for (size_t i = 0; i < chunk_list.num_chunk_infos; i++)
@@ -1870,15 +1984,42 @@ done:
H5MM_free(chunk_list.chunk_infos);
} /* end if */
- if (rank_chunks_assigned_map)
- H5MM_free(rank_chunks_assigned_map);
+ /* Free resources used by cached dataset info */
+ if ((num_dset_infos == 1) && (chunk_list.dset_info.single_dset_info)) {
+ H5D_mpio_filtered_dset_info_t *curr_dset_info = chunk_list.dset_info.single_dset_info;
+
+ if (curr_dset_info->fb_info_init && H5D__fill_term(&curr_dset_info->fb_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't release fill buffer info");
+ if (curr_dset_info->fill_space && H5S_close(curr_dset_info->fill_space) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
+
+ H5MM_free(chunk_list.dset_info.single_dset_info);
+ chunk_list.dset_info.single_dset_info = NULL;
+ }
+ else if ((num_dset_infos > 1) && (chunk_list.dset_info.dset_info_hash_table)) {
+ H5D_mpio_filtered_dset_info_t *curr_dset_info;
+ H5D_mpio_filtered_dset_info_t *tmp;
+
+ HASH_ITER(hh, chunk_list.dset_info.dset_info_hash_table, curr_dset_info, tmp)
+ {
+ HASH_DELETE(hh, chunk_list.dset_info.dset_info_hash_table, curr_dset_info);
+
+ if (curr_dset_info->fb_info_init && H5D__fill_term(&curr_dset_info->fb_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't release fill buffer info");
+ if (curr_dset_info->fill_space && H5S_close(curr_dset_info->fill_space) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
+
+ H5MM_free(curr_dset_info);
+ curr_dset_info = NULL;
+ }
+ }
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TIME_STOP(mpi_rank);
H5D_MPIO_TRACE_EXIT(mpi_rank);
#endif
- FUNC_LEAVE_NOAPI_TAG(ret_value)
+ FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__link_chunk_filtered_collective_io() */
/*-------------------------------------------------------------------------
@@ -2082,6 +2223,9 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_
H5CX_set_mpio_actual_io_mode(actual_io_mode);
done:
+ if (ret_value < 0)
+ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
+
/* Reset collective opt mode */
if (H5CX_set_mpio_coll_opt(orig_coll_opt_mode) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't reset MPI-I/O collective_op property");
@@ -2174,20 +2318,21 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info, int mpi_rank,
- int mpi_size)
+H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_infos,
+ size_t num_dset_infos, int mpi_rank, int mpi_size)
{
H5D_filtered_collective_io_info_t chunk_list = {0};
unsigned char **chunk_msg_bufs = NULL;
- hbool_t have_chunk_to_process;
+ bool have_chunk_to_process;
size_t max_num_chunks;
int chunk_msg_bufs_len = 0;
int mpi_code;
herr_t ret_value = SUCCEED;
- FUNC_ENTER_PACKAGE_TAG(dset_info->dset->oloc.addr)
+ FUNC_ENTER_PACKAGE_TAG(dset_infos->dset->oloc.addr)
assert(io_info);
+ assert(num_dset_infos == 1); /* Currently only supported with 1 dataset at a time */
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TRACE_ENTER(mpi_rank);
@@ -2205,7 +2350,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
H5CX_set_mpio_actual_io_mode(H5D_MPIO_CHUNK_COLLECTIVE);
/* Build a list of selected chunks in the collective IO operation */
- if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, dset_info, mpi_rank, &chunk_list) < 0)
+ if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, dset_infos, 1, mpi_rank, &chunk_list) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list");
/* Retrieve the maximum number of chunks selected for any rank */
@@ -2219,7 +2364,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */
for (size_t i = 0; i < max_num_chunks; i++) {
- H5D_filtered_collective_io_info_t single_chunk_list = {0};
+ H5D_filtered_collective_io_info_t single_chunk_list = chunk_list;
/* Check if this rank has a chunk to work on for this iteration */
have_chunk_to_process = (i < chunk_list.num_chunk_infos);
@@ -2239,8 +2384,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
single_chunk_list.num_chunks_to_read = 0;
}
- if (H5D__mpio_collective_filtered_chunk_read(&single_chunk_list, io_info, dset_info, mpi_rank) <
- 0)
+ if (H5D__mpio_collective_filtered_chunk_read(&single_chunk_list, io_info, 1, mpi_rank) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks");
if (have_chunk_to_process && chunk_list.chunk_infos[i].buf) {
@@ -2250,18 +2394,13 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
}
}
else { /* Filtered collective write */
- H5D_chk_idx_info_t index_info;
-
- /* Construct chunked index info */
- H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, dset_info->dset);
-
if (mpi_size > 1) {
/* Redistribute shared chunks being written to */
if (H5D__mpio_redistribute_shared_chunks(&chunk_list, io_info, mpi_rank, mpi_size, NULL) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks");
/* Send any chunk modification messages for chunks this rank no longer owns */
- if (H5D__mpio_share_chunk_modification_data(&chunk_list, io_info, dset_info, mpi_rank, mpi_size,
+ if (H5D__mpio_share_chunk_modification_data(&chunk_list, io_info, mpi_rank, mpi_size,
&chunk_msg_bufs, &chunk_msg_bufs_len) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"unable to send chunk modification data between MPI ranks");
@@ -2272,7 +2411,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
* collective re-allocation and re-insertion of chunks modified by other ranks.
*/
for (size_t i = 0; i < max_num_chunks; i++) {
- H5D_filtered_collective_io_info_t single_chunk_list = {0};
+ H5D_filtered_collective_io_info_t single_chunk_list = chunk_list;
/* Check if this rank has a chunk to work on for this iteration */
have_chunk_to_process =
@@ -2284,13 +2423,11 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
*/
if (have_chunk_to_process) {
single_chunk_list.chunk_infos = &chunk_list.chunk_infos[i];
- single_chunk_list.chunk_hash_table = chunk_list.chunk_hash_table;
single_chunk_list.num_chunk_infos = 1;
single_chunk_list.num_chunks_to_read = chunk_list.chunk_infos[i].need_read ? 1 : 0;
}
else {
single_chunk_list.chunk_infos = NULL;
- single_chunk_list.chunk_hash_table = chunk_list.chunk_hash_table;
single_chunk_list.num_chunk_infos = 0;
single_chunk_list.num_chunks_to_read = 0;
}
@@ -2300,13 +2437,13 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
* the chunks. As chunk reads are done collectively here, all ranks
* must participate.
*/
- if (H5D__mpio_collective_filtered_chunk_update(
- &single_chunk_list, chunk_msg_bufs, chunk_msg_bufs_len, io_info, dset_info, mpi_rank) < 0)
+ if (H5D__mpio_collective_filtered_chunk_update(&single_chunk_list, chunk_msg_bufs,
+ chunk_msg_bufs_len, io_info, 1, mpi_rank) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks");
/* All ranks now collectively re-allocate file space for all chunks */
- if (H5D__mpio_collective_filtered_chunk_reallocate(&single_chunk_list, NULL, io_info, &index_info,
- mpi_rank, mpi_size) < 0)
+ if (H5D__mpio_collective_filtered_chunk_reallocate(&single_chunk_list, NULL, io_info, 1, mpi_rank,
+ mpi_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"couldn't collectively re-allocate file space for chunks");
@@ -2324,14 +2461,17 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, H5D_dset_io_info
/* Participate in the collective re-insertion of all chunks modified
* in this iteration into the chunk index
*/
- if (H5D__mpio_collective_filtered_chunk_reinsert(&single_chunk_list, NULL, io_info, dset_info,
- &index_info, mpi_rank, mpi_size) < 0)
+ if (H5D__mpio_collective_filtered_chunk_reinsert(&single_chunk_list, NULL, io_info, 1, mpi_rank,
+ mpi_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
"couldn't collectively re-insert modified chunks into chunk index");
} /* end for */
}
done:
+ if (ret_value < 0)
+ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
+
if (chunk_msg_bufs) {
for (size_t i = 0; i < (size_t)chunk_msg_bufs_len; i++)
H5MM_free(chunk_msg_bufs[i]);
@@ -2350,6 +2490,36 @@ done:
H5MM_free(chunk_list.chunk_infos);
} /* end if */
+ /* Free resources used by cached dataset info */
+ if ((num_dset_infos == 1) && (chunk_list.dset_info.single_dset_info)) {
+ H5D_mpio_filtered_dset_info_t *curr_dset_info = chunk_list.dset_info.single_dset_info;
+
+ if (curr_dset_info->fb_info_init && H5D__fill_term(&curr_dset_info->fb_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't release fill buffer info");
+ if (curr_dset_info->fill_space && H5S_close(curr_dset_info->fill_space) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
+
+ H5MM_free(chunk_list.dset_info.single_dset_info);
+ chunk_list.dset_info.single_dset_info = NULL;
+ }
+ else if ((num_dset_infos > 1) && (chunk_list.dset_info.dset_info_hash_table)) {
+ H5D_mpio_filtered_dset_info_t *curr_dset_info;
+ H5D_mpio_filtered_dset_info_t *tmp;
+
+ HASH_ITER(hh, chunk_list.dset_info.dset_info_hash_table, curr_dset_info, tmp)
+ {
+ HASH_DELETE(hh, chunk_list.dset_info.dset_info_hash_table, curr_dset_info);
+
+ if (curr_dset_info->fb_info_init && H5D__fill_term(&curr_dset_info->fb_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't release fill buffer info");
+ if (curr_dset_info->fill_space && H5S_close(curr_dset_info->fill_space) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
+
+ H5MM_free(curr_dset_info);
+ curr_dset_info = NULL;
+ }
+ }
+
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TIME_STOP(mpi_rank);
H5D_MPIO_TRACE_EXIT(mpi_rank);
@@ -2373,8 +2543,8 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di, H
H5S_t *mem_space)
{
int mpi_buf_count; /* # of MPI types */
- hbool_t mbt_is_derived = FALSE;
- hbool_t mft_is_derived = FALSE;
+ bool mbt_is_derived = false;
+ bool mft_is_derived = false;
MPI_Datatype mpi_file_type, mpi_buf_type;
int mpi_code; /* MPI return code */
#ifdef H5Dmpio_DEBUG
@@ -2400,7 +2570,7 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di, H
out-of-order displacements to the in-order
displacements of the MPI datatypes of the
point selection of the file space */
- hbool_t is_permuted = FALSE;
+ bool is_permuted = false;
assert(di);
@@ -2411,7 +2581,7 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di, H
*/
if (H5S_mpio_space_type(file_space, di->type_info.src_type_size, &mpi_file_type, &mpi_file_count,
&mft_is_derived, /* OUT: datatype created */
- TRUE, /* this is a file space, so
+ true, /* this is a file space, so
permute the datatype if the
point selection is out of
order */
@@ -2427,7 +2597,7 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di, H
assert(permute_map);
if (H5S_mpio_space_type(mem_space, di->type_info.src_type_size, &mpi_buf_type, &mpi_buf_count,
&mbt_is_derived, /* OUT: datatype created */
- FALSE, /* this is a memory space, so if
+ false, /* this is a memory space, so if
the file space is not
permuted, there is no need to
permute the datatype if the
@@ -2450,8 +2620,8 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_dset_io_info_t *di, H
mpi_buf_type = MPI_BYTE;
mpi_file_type = MPI_BYTE;
mpi_buf_count = 0;
- mbt_is_derived = FALSE;
- mft_is_derived = FALSE;
+ mbt_is_derived = false;
+ mft_is_derived = false;
} /* end else */
#ifdef H5Dmpio_DEBUG
@@ -2586,20 +2756,25 @@ H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_in
addr2 = entry2->chunk_new.offset;
/*
- * If both chunk addresses are defined, H5_addr_cmp is safe to use.
- * Otherwise, if both addresses aren't defined, compared chunk
- * entries based on their chunk index. Finally, if only one chunk
- * address is defined, return the appropriate value based on which
- * is defined.
+ * If both chunk's file addresses are defined, H5_addr_cmp is safe to use.
+ * If only one chunk's file address is defined, return the appropriate
+ * value based on which is defined. If neither chunk's file address is
+ * defined, compare chunk entries based on their dataset object header
+ * address, then by their chunk index value.
*/
if (H5_addr_defined(addr1) && H5_addr_defined(addr2)) {
ret_value = H5_addr_cmp(addr1, addr2);
}
else if (!H5_addr_defined(addr1) && !H5_addr_defined(addr2)) {
- hsize_t chunk_idx1 = entry1->index_info.chunk_idx;
- hsize_t chunk_idx2 = entry2->index_info.chunk_idx;
+ haddr_t oloc_addr1 = entry1->index_info.dset_oloc_addr;
+ haddr_t oloc_addr2 = entry2->index_info.dset_oloc_addr;
+
+ if (0 == (ret_value = H5_addr_cmp(oloc_addr1, oloc_addr2))) {
+ hsize_t chunk_idx1 = entry1->index_info.chunk_idx;
+ hsize_t chunk_idx2 = entry2->index_info.chunk_idx;
- ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+ ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+ }
}
else
ret_value = H5_addr_defined(addr1) ? 1 : -1;
@@ -2625,8 +2800,8 @@ H5D__cmp_chunk_redistribute_info(const void *_entry1, const void *_entry2)
{
const H5D_chunk_redistribute_info_t *entry1;
const H5D_chunk_redistribute_info_t *entry2;
- hsize_t chunk_index1;
- hsize_t chunk_index2;
+ haddr_t oloc_addr1;
+ haddr_t oloc_addr2;
int ret_value;
FUNC_ENTER_PACKAGE_NOERR
@@ -2634,17 +2809,26 @@ H5D__cmp_chunk_redistribute_info(const void *_entry1, const void *_entry2)
entry1 = (const H5D_chunk_redistribute_info_t *)_entry1;
entry2 = (const H5D_chunk_redistribute_info_t *)_entry2;
- chunk_index1 = entry1->chunk_idx;
- chunk_index2 = entry2->chunk_idx;
+ oloc_addr1 = entry1->dset_oloc_addr;
+ oloc_addr2 = entry2->dset_oloc_addr;
+
+ /* Sort first by dataset object header address */
+ if (0 == (ret_value = H5_addr_cmp(oloc_addr1, oloc_addr2))) {
+ hsize_t chunk_index1 = entry1->chunk_idx;
+ hsize_t chunk_index2 = entry2->chunk_idx;
+
+ /* Then by chunk index value */
+ if (chunk_index1 == chunk_index2) {
+ int orig_owner1 = entry1->orig_owner;
+ int orig_owner2 = entry2->orig_owner;
- if (chunk_index1 == chunk_index2) {
- int orig_owner1 = entry1->orig_owner;
- int orig_owner2 = entry2->orig_owner;
+ /* And finally by original owning MPI rank for the chunk */
- ret_value = (orig_owner1 > orig_owner2) - (orig_owner1 < orig_owner2);
+ ret_value = (orig_owner1 > orig_owner2) - (orig_owner1 < orig_owner2);
+ }
+ else
+ ret_value = (chunk_index1 > chunk_index2) - (chunk_index1 < chunk_index2);
}
- else
- ret_value = (chunk_index1 > chunk_index2) - (chunk_index1 < chunk_index2);
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__cmp_chunk_redistribute_info() */
@@ -2659,6 +2843,16 @@ H5D__cmp_chunk_redistribute_info(const void *_entry1, const void *_entry2)
* rank for two H5D_chunk_redistribute_info_t
* structures
*
+ * NOTE: The inner logic used in this sorting callback (inside the
+ * block where the original owners are equal) is intended to
+ * cause the given array of H5D_chunk_redistribute_info_t
+ * structures to be sorted back exactly as it was sorted
+ * before a shared chunks redistribution operation, according
+ * to the logic in H5D__cmp_filtered_collective_io_info_entry.
+ * Since the two sorting callbacks are currently tied directly
+ * to each other, both should be updated in the same way when
+ * changes are made.
+ *
* Return: -1, 0, 1
*
*-------------------------------------------------------------------------
@@ -2685,20 +2879,25 @@ H5D__cmp_chunk_redistribute_info_orig_owner(const void *_entry1, const void *_en
haddr_t addr2 = entry2->chunk_block.offset;
/*
- * If both chunk addresses are defined, H5_addr_cmp is safe to use.
- * Otherwise, if both addresses aren't defined, compared chunk
- * entries based on their chunk index. Finally, if only one chunk
- * address is defined, return the appropriate value based on which
- * is defined.
+ * If both chunk's file addresses are defined, H5_addr_cmp is safe to use.
+ * If only one chunk's file address is defined, return the appropriate
+ * value based on which is defined. If neither chunk's file address is
+ * defined, compare chunk entries based on their dataset object header
+ * address, then by their chunk index value.
*/
if (H5_addr_defined(addr1) && H5_addr_defined(addr2)) {
ret_value = H5_addr_cmp(addr1, addr2);
}
else if (!H5_addr_defined(addr1) && !H5_addr_defined(addr2)) {
- hsize_t chunk_idx1 = entry1->chunk_idx;
- hsize_t chunk_idx2 = entry2->chunk_idx;
+ haddr_t oloc_addr1 = entry1->dset_oloc_addr;
+ haddr_t oloc_addr2 = entry2->dset_oloc_addr;
- ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+ if (0 == (ret_value = H5_addr_cmp(oloc_addr1, oloc_addr2))) {
+ hsize_t chunk_idx1 = entry1->chunk_idx;
+ hsize_t chunk_idx2 = entry2->chunk_idx;
+
+ ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+ }
}
else
ret_value = H5_addr_defined(addr1) ? 1 : -1;
@@ -2756,8 +2955,8 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_dset_io_info_t *di, uint8_t as
H5SL_node_t *chunk_node;
H5D_piece_info_t *chunk_info;
H5P_coll_md_read_flag_t md_reads_file_flag;
- hbool_t md_reads_context_flag;
- hbool_t restore_md_reads_state = FALSE;
+ bool md_reads_context_flag;
+ bool restore_md_reads_state = false;
MPI_Comm comm;
int root;
size_t ic;
@@ -2826,9 +3025,9 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_dset_io_info_t *di, uint8_t as
*/
if (H5F_get_coll_metadata_reads(di->dset->oloc.file)) {
md_reads_file_flag = H5P_FORCE_FALSE;
- md_reads_context_flag = FALSE;
+ md_reads_context_flag = false;
H5F_set_coll_metadata_reads(di->dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag);
- restore_md_reads_state = TRUE;
+ restore_md_reads_state = true;
}
/* pre-computing: calculate number of processes and
@@ -2880,13 +3079,13 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_dset_io_info_t *di, uint8_t as
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
{
- hbool_t coll_op = FALSE;
+ bool coll_op = false;
for (ic = 0; ic < total_chunks; ic++)
if (assign_io_mode[ic] == H5D_CHUNK_IO_MODE_COL) {
if (H5CX_test_set_mpio_coll_chunk_multi_ratio_coll(0) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "unable to set property value");
- coll_op = TRUE;
+ coll_op = true;
break;
} /* end if */
@@ -2930,20 +3129,21 @@ done:
*/
static herr_t
H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info, const H5D_dset_io_info_t *di,
- int mpi_rank, H5D_filtered_collective_io_info_t *chunk_list)
+ size_t num_dset_infos, int mpi_rank,
+ H5D_filtered_collective_io_info_t *chunk_list)
{
- H5D_filtered_collective_chunk_info_t *local_info_array = NULL;
- H5D_chunk_ud_t udata;
- hbool_t filter_partial_edge_chunks;
- size_t num_chunks_selected;
- size_t num_chunks_to_read = 0;
- herr_t ret_value = SUCCEED;
+ H5D_filtered_collective_chunk_info_t *local_info_array = NULL;
+ H5D_mpio_filtered_dset_info_t *curr_dset_info = NULL;
+ size_t num_chunks_selected = 0;
+ size_t num_chunks_to_read = 0;
+ size_t buf_idx = 0;
+ bool need_sort = false;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_PACKAGE
assert(io_info);
assert(di);
- assert(di->layout->type == H5D_CHUNKED);
assert(chunk_list);
#ifdef H5Dmpio_DEBUG
@@ -2951,166 +3151,330 @@ H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info, const
H5D_MPIO_TIME_START(mpi_rank, "Filtered Collective I/O Setup");
#endif
- /* Each rank builds a local list of the chunks they have selected */
- if ((num_chunks_selected = H5SL_count(di->layout_io_info.chunk_map->dset_sel_pieces))) {
- H5D_piece_info_t *chunk_info;
- H5SL_node_t *chunk_node;
- hsize_t select_npoints;
- hbool_t need_sort = FALSE;
+ /* Calculate hash key length for chunk hash table */
+ if (num_dset_infos > 1) {
+ /* Just in case the structure changes... */
+ HDcompile_assert(offsetof(H5D_chunk_index_info_t, dset_oloc_addr) >
+ offsetof(H5D_chunk_index_info_t, chunk_idx));
+
+ /* Calculate key length using uthash compound key example */
+ chunk_list->chunk_hash_table_keylen = offsetof(H5D_chunk_index_info_t, dset_oloc_addr) +
+ sizeof(haddr_t) - offsetof(H5D_chunk_index_info_t, chunk_idx);
+ }
+ else
+ chunk_list->chunk_hash_table_keylen = sizeof(hsize_t);
+
+ chunk_list->all_dset_indices_empty = true;
+ chunk_list->no_dset_index_insert_methods = true;
+
+ /* Calculate size needed for total chunk list */
+ for (size_t dset_idx = 0; dset_idx < num_dset_infos; dset_idx++) {
+ /* Skip this dataset if no I/O is being performed */
+ if (di[dset_idx].skip_io)
+ continue;
+
+ /* Only process filtered, chunked datasets. A contiguous dataset
+ * could possibly have filters in the DCPL pipeline, but the library
+ * will currently ignore optional filters in that case.
+ */
+ if ((di[dset_idx].dset->shared->dcpl_cache.pline.nused == 0) ||
+ (di[dset_idx].layout->type == H5D_CONTIGUOUS))
+ continue;
+
+ assert(di[dset_idx].layout->type == H5D_CHUNKED);
+ assert(di[dset_idx].layout->storage.type == H5D_CHUNKED);
- /* Determine whether partial edge chunks should be filtered */
- filter_partial_edge_chunks =
- !(di->dset->shared->layout.u.chunk.flags & H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS);
+ num_chunks_selected += H5SL_count(di[dset_idx].layout_io_info.chunk_map->dset_sel_pieces);
+ }
+ if (num_chunks_selected)
if (NULL == (local_info_array = H5MM_malloc(num_chunks_selected * sizeof(*local_info_array))))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate local io info array buffer");
- chunk_node = H5SL_first(di->layout_io_info.chunk_map->dset_sel_pieces);
- for (size_t i = 0; chunk_node; i++) {
- chunk_info = (H5D_piece_info_t *)H5SL_item(chunk_node);
+ for (size_t dset_idx = 0; dset_idx < num_dset_infos; dset_idx++) {
+ H5D_chunk_ud_t udata;
+ H5O_fill_t *fill_msg;
+ haddr_t prev_tag = HADDR_UNDEF;
- /* Obtain this chunk's address */
- if (H5D__chunk_lookup(di->dset, chunk_info->scaled, &udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address");
+ /* Skip this dataset if no I/O is being performed */
+ if (di[dset_idx].skip_io)
+ continue;
- /* Initialize rank-local chunk info */
- local_info_array[i].chunk_info = chunk_info;
- local_info_array[i].chunk_buf_size = 0;
- local_info_array[i].num_writers = 0;
- local_info_array[i].orig_owner = mpi_rank;
- local_info_array[i].new_owner = mpi_rank;
- local_info_array[i].buf = NULL;
+ /* Only process filtered, chunked datasets. A contiguous dataset
+ * could possibly have filters in the DCPL pipeline, but the library
+ * will currently ignore optional filters in that case.
+ */
+ if ((di[dset_idx].dset->shared->dcpl_cache.pline.nused == 0) ||
+ (di[dset_idx].layout->type == H5D_CONTIGUOUS))
+ continue;
- select_npoints = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
- local_info_array[i].io_size = (size_t)select_npoints * di->type_info.dst_type_size;
+ assert(di[dset_idx].layout->storage.type == H5D_CHUNKED);
+ assert(di[dset_idx].layout->storage.u.chunk.idx_type != H5D_CHUNK_IDX_NONE);
- /*
- * Determine whether this chunk will need to be read from the file. If this is
- * a read operation, the chunk will be read. If this is a write operation, we
- * generally need to read a filtered chunk from the file before modifying it,
- * unless the chunk is being fully overwritten.
- *
- * TODO: Currently the full overwrite status of a chunk is only obtained on a
- * per-rank basis. This means that if the total selection in the chunk, as
- * determined by the combination of selections of all of the ranks interested in
- * the chunk, covers the entire chunk, the performance optimization of not reading
- * the chunk from the file is still valid, but is not applied in the current
- * implementation.
- *
- * To implement this case, a few approaches were considered:
- *
- * - Keep a running total (distributed to each rank) of the number of chunk
- * elements selected during chunk redistribution and compare that to the total
- * number of elements in the chunk once redistribution is finished
- *
- * - Process all incoming chunk messages before doing I/O (these are currently
- * processed AFTER doing I/O), combine the owning rank's selection in a chunk
- * with the selections received from other ranks and check to see whether that
- * combined selection covers the entire chunk
- *
- * The first approach will be dangerous if the application performs an overlapping
- * write to a chunk, as the number of selected elements can equal or exceed the
- * number of elements in the chunk without the whole chunk selection being covered.
- * While it might be considered erroneous for an application to do an overlapping
- * write, we don't explicitly disallow it.
- *
- * The second approach contains a bit of complexity in that part of the chunk
- * messages will be needed before doing I/O and part will be needed after doing I/O.
- * Since modification data from chunk messages can't be applied until after any I/O
- * is performed (otherwise, we'll overwrite any applied modification data), chunk
- * messages are currently entirely processed after I/O. However, in order to determine
- * if a chunk is being fully overwritten, we need the dataspace portion of the chunk
- * messages before doing I/O. The naive way to do this is to process chunk messages
- * twice, using just the relevant information from the message before and after I/O.
- * The better way would be to avoid processing chunk messages twice by extracting (and
- * keeping around) the dataspace portion of the message before I/O and processing the
- * rest of the chunk message after I/O. Note that the dataspace portion of each chunk
- * message is used to correctly apply chunk modification data from the message, so
- * must be kept around both before and after I/O in this case.
- */
- if (io_info->op_type == H5D_IO_OP_READ)
- local_info_array[i].need_read = TRUE;
- else {
- local_info_array[i].need_read =
- local_info_array[i].io_size < (size_t)di->dset->shared->layout.u.chunk.size;
- }
+ /*
+ * To support the multi-dataset I/O case, cache some info (chunk size,
+ * fill buffer and fill dataspace, etc.) about each dataset involved
+ * in the I/O operation for use when processing chunks. If only one
+ * dataset is involved, this information is the same for every chunk
+ * processed. Otherwise, if multiple datasets are involved, a hash
+ * table is used to quickly match a particular chunk with the cached
+ * information pertaining to the dataset it resides in.
+ */
+ if (NULL == (curr_dset_info = H5MM_malloc(sizeof(H5D_mpio_filtered_dset_info_t))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate space for dataset info");
+
+ memset(&curr_dset_info->fb_info, 0, sizeof(H5D_fill_buf_info_t));
+
+ H5D_MPIO_INIT_CHUNK_IDX_INFO(curr_dset_info->chunk_idx_info, di[dset_idx].dset);
+
+ curr_dset_info->dset_io_info = &di[dset_idx];
+ curr_dset_info->file_chunk_size = di[dset_idx].dset->shared->layout.u.chunk.size;
+ curr_dset_info->dset_oloc_addr = di[dset_idx].dset->oloc.addr;
+ curr_dset_info->fill_space = NULL;
+ curr_dset_info->fb_info_init = false;
+ curr_dset_info->index_empty = false;
+
+ /* Determine if fill values should be written to chunks */
+ fill_msg = &di[dset_idx].dset->shared->dcpl_cache.fill;
+ curr_dset_info->should_fill =
+ (fill_msg->fill_time == H5D_FILL_TIME_ALLOC) ||
+ ((fill_msg->fill_time == H5D_FILL_TIME_IFSET) && fill_msg->fill_defined);
+
+ if (curr_dset_info->should_fill) {
+ hsize_t chunk_dims[H5S_MAX_RANK];
+
+ assert(di[dset_idx].dset->shared->ndims == di[dset_idx].dset->shared->layout.u.chunk.ndims - 1);
+ for (size_t dim_idx = 0; dim_idx < di[dset_idx].dset->shared->layout.u.chunk.ndims - 1; dim_idx++)
+ chunk_dims[dim_idx] = (hsize_t)di[dset_idx].dset->shared->layout.u.chunk.dim[dim_idx];
+
+ /* Get a dataspace for filling chunk memory buffers */
+ if (NULL == (curr_dset_info->fill_space = H5S_create_simple(
+ di[dset_idx].dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create chunk fill dataspace");
+
+ /* Initialize fill value buffer */
+ if (H5D__fill_init(&curr_dset_info->fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc,
+ (void *)&di[dset_idx].dset->shared->dcpl_cache.pline,
+ (H5MM_free_t)H5D__chunk_mem_free,
+ (void *)&di[dset_idx].dset->shared->dcpl_cache.pline,
+ &di[dset_idx].dset->shared->dcpl_cache.fill, di[dset_idx].dset->shared->type,
+ di[dset_idx].dset->shared->type_id, 0, curr_dset_info->file_chunk_size) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer");
+
+ curr_dset_info->fb_info_init = true;
+ }
+
+ /*
+ * If the dataset is incrementally allocated and hasn't been written
+ * to yet, the chunk index should be empty. In this case, a collective
+ * read of its chunks is essentially a no-op, so we can avoid that read
+ * later. If all datasets have empty chunk indices, we can skip the
+ * collective read entirely.
+ */
+ if (fill_msg->alloc_time == H5D_ALLOC_TIME_INCR)
+ if (H5D__chunk_index_empty(di[dset_idx].dset, &curr_dset_info->index_empty) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty");
+
+ if ((fill_msg->alloc_time != H5D_ALLOC_TIME_INCR) || !curr_dset_info->index_empty)
+ chunk_list->all_dset_indices_empty = false;
+
+ if (curr_dset_info->chunk_idx_info.storage->ops->insert)
+ chunk_list->no_dset_index_insert_methods = false;
+
+ /*
+ * For multi-dataset I/O, use a hash table to keep a mapping between
+ * chunks and the cached info for the dataset that they're in. Otherwise,
+ * we can just use the info object directly if only one dataset is being
+ * worked on.
+ */
+ if (num_dset_infos > 1) {
+ HASH_ADD(hh, chunk_list->dset_info.dset_info_hash_table, dset_oloc_addr, sizeof(haddr_t),
+ curr_dset_info);
+ }
+ else
+ chunk_list->dset_info.single_dset_info = curr_dset_info;
+ curr_dset_info = NULL;
+
+ /*
+ * Now, each rank builds a local list of info about the chunks
+ * they have selected among the chunks in the current dataset
+ */
+
+ /* Set metadata tagging with dataset oheader addr */
+ H5AC_tag(di[dset_idx].dset->oloc.addr, &prev_tag);
+
+ if (H5SL_count(di[dset_idx].layout_io_info.chunk_map->dset_sel_pieces)) {
+ H5SL_node_t *chunk_node;
+ bool filter_partial_edge_chunks;
+
+ /* Determine whether partial edge chunks should be filtered */
+ filter_partial_edge_chunks = !(di[dset_idx].dset->shared->layout.u.chunk.flags &
+ H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS);
+
+ chunk_node = H5SL_first(di[dset_idx].layout_io_info.chunk_map->dset_sel_pieces);
+ while (chunk_node) {
+ H5D_piece_info_t *chunk_info;
+ hsize_t select_npoints;
+
+ chunk_info = (H5D_piece_info_t *)H5SL_item(chunk_node);
+ assert(chunk_info->filtered_dset);
+
+ /* Obtain this chunk's address */
+ if (H5D__chunk_lookup(di[dset_idx].dset, chunk_info->scaled, &udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address");
+
+ /* Initialize rank-local chunk info */
+ local_info_array[buf_idx].chunk_info = chunk_info;
+ local_info_array[buf_idx].chunk_buf_size = 0;
+ local_info_array[buf_idx].num_writers = 0;
+ local_info_array[buf_idx].orig_owner = mpi_rank;
+ local_info_array[buf_idx].new_owner = mpi_rank;
+ local_info_array[buf_idx].buf = NULL;
- if (local_info_array[i].need_read)
- num_chunks_to_read++;
+ select_npoints = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
+ local_info_array[buf_idx].io_size =
+ (size_t)select_npoints * di[dset_idx].type_info.dst_type_size;
- local_info_array[i].skip_filter_pline = FALSE;
- if (!filter_partial_edge_chunks) {
/*
- * If this is a partial edge chunk and the "don't filter partial edge
- * chunks" flag is set, make sure not to apply filters to the chunk.
+ * Determine whether this chunk will need to be read from the file. If this is
+ * a read operation, the chunk will be read. If this is a write operation, we
+ * generally need to read a filtered chunk from the file before modifying it,
+ * unless the chunk is being fully overwritten.
+ *
+ * TODO: Currently the full overwrite status of a chunk is only obtained on a
+ * per-rank basis. This means that if the total selection in the chunk, as
+ * determined by the combination of selections of all of the ranks interested in
+ * the chunk, covers the entire chunk, the performance optimization of not reading
+ * the chunk from the file is still valid, but is not applied in the current
+ * implementation.
+ *
+ * To implement this case, a few approaches were considered:
+ *
+ * - Keep a running total (distributed to each rank) of the number of chunk
+ * elements selected during chunk redistribution and compare that to the total
+ * number of elements in the chunk once redistribution is finished
+ *
+ * - Process all incoming chunk messages before doing I/O (these are currently
+ * processed AFTER doing I/O), combine the owning rank's selection in a chunk
+ * with the selections received from other ranks and check to see whether that
+ * combined selection covers the entire chunk
+ *
+ * The first approach will be dangerous if the application performs an overlapping
+ * write to a chunk, as the number of selected elements can equal or exceed the
+ * number of elements in the chunk without the whole chunk selection being covered.
+ * While it might be considered erroneous for an application to do an overlapping
+ * write, we don't explicitly disallow it.
+ *
+ * The second approach contains a bit of complexity in that part of the chunk
+ * messages will be needed before doing I/O and part will be needed after doing I/O.
+ * Since modification data from chunk messages can't be applied until after any I/O
+ * is performed (otherwise, we'll overwrite any applied modification data), chunk
+ * messages are currently entirely processed after I/O. However, in order to determine
+ * if a chunk is being fully overwritten, we need the dataspace portion of the chunk
+ * messages before doing I/O. The naive way to do this is to process chunk messages
+ * twice, using just the relevant information from the message before and after I/O.
+ * The better way would be to avoid processing chunk messages twice by extracting (and
+ * keeping around) the dataspace portion of the message before I/O and processing the
+ * rest of the chunk message after I/O. Note that the dataspace portion of each chunk
+ * message is used to correctly apply chunk modification data from the message, so
+ * must be kept around both before and after I/O in this case.
*/
- if (H5D__chunk_is_partial_edge_chunk(di->dset->shared->ndims,
- di->dset->shared->layout.u.chunk.dim, chunk_info->scaled,
- di->dset->shared->curr_dims))
- local_info_array[i].skip_filter_pline = TRUE;
- }
+ if (io_info->op_type == H5D_IO_OP_READ)
+ local_info_array[buf_idx].need_read = true;
+ else {
+ local_info_array[buf_idx].need_read =
+ local_info_array[buf_idx].io_size <
+ (size_t)di[dset_idx].dset->shared->layout.u.chunk.size;
+ }
- /* Initialize the chunk's shared info */
- local_info_array[i].chunk_current = udata.chunk_block;
- local_info_array[i].chunk_new = udata.chunk_block;
+ if (local_info_array[buf_idx].need_read)
+ num_chunks_to_read++;
- /*
- * Check if the list is not in ascending order of offset in the file
- * or has unallocated chunks. In either case, the list should get
- * sorted.
- */
- if (i) {
- haddr_t curr_chunk_offset = local_info_array[i].chunk_current.offset;
- haddr_t prev_chunk_offset = local_info_array[i - 1].chunk_current.offset;
+ local_info_array[buf_idx].skip_filter_pline = false;
+ if (!filter_partial_edge_chunks) {
+ /*
+ * If this is a partial edge chunk and the "don't filter partial edge
+ * chunks" flag is set, make sure not to apply filters to the chunk.
+ */
+ if (H5D__chunk_is_partial_edge_chunk(
+ di[dset_idx].dset->shared->ndims, di[dset_idx].dset->shared->layout.u.chunk.dim,
+ chunk_info->scaled, di[dset_idx].dset->shared->curr_dims))
+ local_info_array[buf_idx].skip_filter_pline = true;
+ }
+
+ /* Initialize the chunk's shared info */
+ local_info_array[buf_idx].chunk_current = udata.chunk_block;
+ local_info_array[buf_idx].chunk_new = udata.chunk_block;
+
+ /*
+ * Check if the list is not in ascending order of offset in the file
+ * or has unallocated chunks. In either case, the list should get
+ * sorted.
+ */
+ if (!need_sort && buf_idx) {
+ haddr_t curr_chunk_offset = local_info_array[buf_idx].chunk_current.offset;
+ haddr_t prev_chunk_offset = local_info_array[buf_idx - 1].chunk_current.offset;
+
+ if (!H5_addr_defined(prev_chunk_offset) || !H5_addr_defined(curr_chunk_offset) ||
+ (curr_chunk_offset < prev_chunk_offset))
+ need_sort = true;
+ }
+
+ /* Needed for proper hashing later on */
+ memset(&local_info_array[buf_idx].index_info, 0, sizeof(H5D_chunk_index_info_t));
+
+ /*
+ * Extensible arrays may calculate a chunk's index a little differently
+ * than normal when the dataset's unlimited dimension is not the
+ * slowest-changing dimension, so set the index here based on what the
+ * extensible array code calculated instead of what was calculated
+ * in the chunk file mapping.
+ */
+ if (di[dset_idx].dset->shared->layout.u.chunk.idx_type == H5D_CHUNK_IDX_EARRAY)
+ local_info_array[buf_idx].index_info.chunk_idx = udata.chunk_idx;
+ else
+ local_info_array[buf_idx].index_info.chunk_idx = chunk_info->index;
- if (!H5_addr_defined(prev_chunk_offset) || !H5_addr_defined(curr_chunk_offset) ||
- (curr_chunk_offset < prev_chunk_offset))
- need_sort = TRUE;
+ assert(H5_addr_defined(di[dset_idx].dset->oloc.addr));
+ local_info_array[buf_idx].index_info.dset_oloc_addr = di[dset_idx].dset->oloc.addr;
+
+ local_info_array[buf_idx].index_info.filter_mask = udata.filter_mask;
+ local_info_array[buf_idx].index_info.need_insert = false;
+
+ buf_idx++;
+
+ chunk_node = H5SL_next(chunk_node);
}
+ }
+ else if (H5F_get_coll_metadata_reads(di[dset_idx].dset->oloc.file)) {
+ hsize_t scaled[H5O_LAYOUT_NDIMS] = {0};
/*
- * Extensible arrays may calculate a chunk's index a little differently
- * than normal when the dataset's unlimited dimension is not the
- * slowest-changing dimension, so set the index here based on what the
- * extensible array code calculated instead of what was calculated
- * in the chunk file mapping.
+ * If this rank has no selection in the dataset and collective
+ * metadata reads are enabled, do a fake lookup of a chunk to
+ * ensure that this rank has the chunk index opened. Otherwise,
+ * only the ranks that had a selection will have opened the
+ * chunk index and they will have done so independently. Therefore,
+ * when ranks with no selection participate in later collective
+ * metadata reads, they will try to open the chunk index collectively
+ * and issues will occur since other ranks won't participate.
+ *
+ * In the future, we should consider having a chunk index "open"
+ * callback that can be used to ensure collectivity between ranks
+ * in a more natural way, but this hack should suffice for now.
*/
- if (di->dset->shared->layout.u.chunk.idx_type == H5D_CHUNK_IDX_EARRAY)
- local_info_array[i].index_info.chunk_idx = udata.chunk_idx;
- else
- local_info_array[i].index_info.chunk_idx = chunk_info->index;
-
- local_info_array[i].index_info.filter_mask = udata.filter_mask;
- local_info_array[i].index_info.need_insert = FALSE;
-
- chunk_node = H5SL_next(chunk_node);
+ if (H5D__chunk_lookup(di[dset_idx].dset, scaled, &udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address");
}
- /* Ensure the chunk list is sorted in ascending order of offset in the file */
- if (need_sort)
- qsort(local_info_array, num_chunks_selected, sizeof(H5D_filtered_collective_chunk_info_t),
- H5D__cmp_filtered_collective_io_info_entry);
+ /* Reset metadata tagging */
+ H5AC_tag(prev_tag, NULL);
}
- else if (H5F_get_coll_metadata_reads(di->dset->oloc.file)) {
- hsize_t scaled[H5O_LAYOUT_NDIMS] = {0};
- /*
- * If this rank has no selection in the dataset and collective
- * metadata reads are enabled, do a fake lookup of a chunk to
- * ensure that this rank has the chunk index opened. Otherwise,
- * only the ranks that had a selection will have opened the
- * chunk index and they will have done so independently. Therefore,
- * when ranks with no selection participate in later collective
- * metadata reads, they will try to open the chunk index collectively
- * and issues will occur since other ranks won't participate.
- *
- * In the future, we should consider having a chunk index "open"
- * callback that can be used to ensure collectivity between ranks
- * in a more natural way, but this hack should suffice for now.
- */
- if (H5D__chunk_lookup(di->dset, scaled, &udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address");
- }
+ /* Ensure the chunk list is sorted in ascending order of offset in the file */
+ if (local_info_array && need_sort)
+ qsort(local_info_array, num_chunks_selected, sizeof(H5D_filtered_collective_chunk_info_t),
+ H5D__cmp_filtered_collective_io_info_entry);
chunk_list->chunk_infos = local_info_array;
chunk_list->num_chunk_infos = num_chunks_selected;
@@ -3122,6 +3486,37 @@ H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info, const
done:
if (ret_value < 0) {
+ /* Free temporary cached dataset info object */
+ if (curr_dset_info) {
+ if (curr_dset_info->fb_info_init && H5D__fill_term(&curr_dset_info->fb_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't release fill buffer info");
+ if (curr_dset_info->fill_space && H5S_close(curr_dset_info->fill_space) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
+
+ H5MM_free(curr_dset_info);
+ curr_dset_info = NULL;
+
+ if (num_dset_infos == 1)
+ chunk_list->dset_info.single_dset_info = NULL;
+ }
+
+ /* Free resources used by cached dataset info hash table */
+ if (num_dset_infos > 1) {
+ H5D_mpio_filtered_dset_info_t *tmp;
+
+ HASH_ITER(hh, chunk_list->dset_info.dset_info_hash_table, curr_dset_info, tmp)
+ {
+ HASH_DELETE(hh, chunk_list->dset_info.dset_info_hash_table, curr_dset_info);
+ H5MM_free(curr_dset_info);
+ curr_dset_info = NULL;
+ }
+ }
+
+ if (num_dset_infos == 1)
+ chunk_list->dset_info.single_dset_info = NULL;
+ else
+ chunk_list->dset_info.dset_info_hash_table = NULL;
+
H5MM_free(local_info_array);
}
@@ -3158,10 +3553,9 @@ H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_li
const H5D_io_info_t *io_info, int mpi_rank, int mpi_size,
size_t **rank_chunks_assigned_map)
{
- hbool_t redistribute_on_all_ranks;
+ bool redistribute_on_all_ranks;
size_t *num_chunks_map = NULL;
size_t coll_chunk_list_size = 0;
- size_t i;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -3189,8 +3583,8 @@ H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_li
num_chunks_map, 1, H5_SIZE_T_AS_MPI_TYPE, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
- for (i = 0; i < (size_t)mpi_size; i++)
- coll_chunk_list_size += num_chunks_map[i];
+ for (int curr_rank = 0; curr_rank < mpi_size; curr_rank++)
+ coll_chunk_list_size += num_chunks_map[curr_rank];
/*
* Determine whether we should perform chunk redistribution on all
@@ -3244,13 +3638,13 @@ done:
* Purpose: Routine to perform redistribution of shared chunks during
* parallel writes to datasets with filters applied.
*
- * If `all_ranks_involved` is TRUE, chunk redistribution
+ * If `all_ranks_involved` is true, chunk redistribution
* occurs on all MPI ranks. This is usually done when there
* is a relatively small number of chunks involved in order to
* cut down on MPI communication overhead while increasing
* total memory usage a bit.
*
- * If `all_ranks_involved` is FALSE, only rank 0 will perform
+ * If `all_ranks_involved` is false, only rank 0 will perform
* chunk redistribution. This is usually done when there is
* a relatively large number of chunks involved in order to
* cut down on total memory usage at the cost of increased
@@ -3260,21 +3654,23 @@ done:
*
* - All MPI ranks send their list of selected chunks to the
* ranks involved in chunk redistribution. Then, the
- * involved ranks sort this new list in order of chunk
- * index.
+ * involved ranks sort this new list in order of:
+ *
+ * dataset object header address -> chunk index value ->
+ * original owning MPI rank for chunk
*
* - The involved ranks scan the list looking for matching
- * runs of chunk index values (corresponding to a shared
- * chunk which has been selected by more than one rank in
- * the I/O operation) and for each shared chunk,
- * redistribute the chunk to the MPI rank writing to the
- * chunk which currently has the least amount of chunks
- * assigned to it. This is done by modifying the "new_owner"
- * field in each of the list entries corresponding to that
- * chunk. The involved ranks then re-sort the list in order
- * of original chunk owner so that each rank's section of
- * contributed chunks is contiguous in the collective chunk
- * list.
+ * runs of (dataset object header address, chunk index value)
+ * pairs (corresponding to a shared chunk which has been
+ * selected by more than one rank in the I/O operation) and
+ * for each shared chunk, redistribute the chunk to the MPI
+ * rank writing to the chunk which currently has the least
+ * amount of chunks assigned to it. This is done by modifying
+ * the "new_owner" field in each of the list entries
+ * corresponding to that chunk. The involved ranks then
+ * re-sort the list in order of original chunk owner so that
+ * each rank's section of contributed chunks is contiguous
+ * in the collective chunk list.
*
* - If chunk redistribution occurred on all ranks, each rank
* scans through the collective chunk list to find their
@@ -3291,14 +3687,13 @@ done:
*/
static herr_t
H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list,
- size_t *num_chunks_assigned_map, hbool_t all_ranks_involved,
+ size_t *num_chunks_assigned_map, bool all_ranks_involved,
const H5D_io_info_t *io_info, int mpi_rank, int mpi_size)
{
MPI_Datatype struct_type;
MPI_Datatype packed_type;
- hbool_t struct_type_derived = FALSE;
- hbool_t packed_type_derived = FALSE;
- size_t i;
+ bool struct_type_derived = false;
+ bool packed_type_derived = false;
size_t coll_chunk_list_num_entries = 0;
void *coll_chunk_list = NULL;
int *counts_disps_array = NULL;
@@ -3349,15 +3744,15 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
/* Set the receive counts from the assigned chunks map */
counts_ptr = counts_disps_array;
- for (i = 0; i < (size_t)mpi_size; i++)
- H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+ for (int curr_rank = 0; curr_rank < mpi_size; curr_rank++)
+ H5_CHECKED_ASSIGN(counts_ptr[curr_rank], int, num_chunks_assigned_map[curr_rank], size_t);
/* Set the displacements into the receive buffer for the gather operation */
displacements_ptr = &counts_disps_array[mpi_size];
*displacements_ptr = 0;
- for (i = 1; i < (size_t)mpi_size; i++)
- displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+ for (int curr_rank = 1; curr_rank < mpi_size; curr_rank++)
+ displacements_ptr[curr_rank] = displacements_ptr[curr_rank - 1] + counts_ptr[curr_rank - 1];
}
}
@@ -3366,9 +3761,11 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
* necessary for MPI communication
*/
if (H5D__mpio_get_chunk_redistribute_info_types(&packed_type, &packed_type_derived, &struct_type,
- &struct_type_derived) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+ &struct_type_derived) < 0) {
+ /* Push an error, but still participate in collective gather operation */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
"can't create derived datatypes for chunk redistribution info");
+ }
/* Perform gather operation */
if (H5_mpio_gatherv_alloc(chunk_list->chunk_infos, num_chunks_int, struct_type, counts_ptr,
@@ -3392,15 +3789,14 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
if (all_ranks_involved || (mpi_rank == 0)) {
H5D_chunk_redistribute_info_t *chunk_entry;
- hsize_t curr_chunk_idx;
- size_t set_begin_index;
- int num_writers;
- int new_chunk_owner;
/* Clear the mapping from rank value -> number of assigned chunks */
memset(num_chunks_assigned_map, 0, (size_t)mpi_size * sizeof(*num_chunks_assigned_map));
- /* Sort collective chunk list according to chunk index */
+ /*
+ * Sort collective chunk list according to:
+ * dataset object header address -> chunk index value -> original owning MPI rank for chunk
+ */
qsort(coll_chunk_list, coll_chunk_list_num_entries, sizeof(H5D_chunk_redistribute_info_t),
H5D__cmp_chunk_redistribute_info);
@@ -3413,21 +3809,30 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
* chunks).
*/
chunk_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[0];
- for (i = 0; i < coll_chunk_list_num_entries;) {
+ for (size_t entry_idx = 0; entry_idx < coll_chunk_list_num_entries;) {
+ haddr_t curr_oloc_addr;
+ hsize_t curr_chunk_idx;
+ size_t set_begin_index;
+ bool keep_processing;
+ int num_writers;
+ int new_chunk_owner;
+
/* Set chunk's initial new owner to its original owner */
new_chunk_owner = chunk_entry->orig_owner;
/*
- * Set the current chunk index so we know when we've processed
- * all duplicate entries for a particular shared chunk
+ * Set the current dataset object header address and chunk
+ * index value so we know when we've processed all duplicate
+ * entries for a particular shared chunk
*/
+ curr_oloc_addr = chunk_entry->dset_oloc_addr;
curr_chunk_idx = chunk_entry->chunk_idx;
/* Reset the initial number of writers to this chunk */
num_writers = 0;
/* Set index for the beginning of this section of duplicate chunk entries */
- set_begin_index = i;
+ set_begin_index = entry_idx;
/*
* Process each chunk entry in the set for the current
@@ -3448,13 +3853,21 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
num_writers++;
chunk_entry++;
- } while (++i < coll_chunk_list_num_entries && chunk_entry->chunk_idx == curr_chunk_idx);
+
+ keep_processing =
+ /* Make sure we haven't run out of chunks in the chunk list */
+ (++entry_idx < coll_chunk_list_num_entries) &&
+ /* Make sure the chunk we're looking at is in the same dataset */
+ (H5_addr_eq(chunk_entry->dset_oloc_addr, curr_oloc_addr)) &&
+ /* Make sure the chunk we're looking at is the same chunk */
+ (chunk_entry->chunk_idx == curr_chunk_idx);
+ } while (keep_processing);
/* We should never have more writers to a chunk than the number of MPI ranks */
assert(num_writers <= mpi_size);
/* Set all processed chunk entries' "new_owner" and "num_writers" fields */
- for (; set_begin_index < i; set_begin_index++) {
+ for (; set_begin_index < entry_idx; set_begin_index++) {
H5D_chunk_redistribute_info_t *entry;
entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[set_begin_index];
@@ -3488,29 +3901,32 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
}
if (all_ranks_involved) {
+ size_t entry_idx;
+
/*
* If redistribution occurred on all ranks, search for the section
* in the collective chunk list corresponding to this rank's locally
* selected chunks and update the local list after redistribution.
*/
- for (i = 0; i < coll_chunk_list_num_entries; i++)
- if (mpi_rank == ((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i].orig_owner)
+ for (entry_idx = 0; entry_idx < coll_chunk_list_num_entries; entry_idx++)
+ if (mpi_rank == ((H5D_chunk_redistribute_info_t *)coll_chunk_list)[entry_idx].orig_owner)
break;
- for (size_t j = 0; j < (size_t)num_chunks_int; j++) {
+ for (size_t info_idx = 0; info_idx < (size_t)num_chunks_int; info_idx++) {
H5D_chunk_redistribute_info_t *coll_entry;
- coll_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i++];
+ coll_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[entry_idx++];
- chunk_list->chunk_infos[j].new_owner = coll_entry->new_owner;
- chunk_list->chunk_infos[j].num_writers = coll_entry->num_writers;
+ chunk_list->chunk_infos[info_idx].new_owner = coll_entry->new_owner;
+ chunk_list->chunk_infos[info_idx].num_writers = coll_entry->num_writers;
/*
* Check if the chunk list struct's `num_chunks_to_read` field
* needs to be updated
*/
- if (chunk_list->chunk_infos[j].need_read && (chunk_list->chunk_infos[j].new_owner != mpi_rank)) {
- chunk_list->chunk_infos[j].need_read = FALSE;
+ if (chunk_list->chunk_infos[info_idx].need_read &&
+ (chunk_list->chunk_infos[info_idx].new_owner != mpi_rank)) {
+ chunk_list->chunk_infos[info_idx].need_read = false;
assert(chunk_list->num_chunks_to_read > 0);
chunk_list->num_chunks_to_read--;
@@ -3533,9 +3949,10 @@ H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chun
* their chunk list struct's `num_chunks_to_read` field since it
* may now be out of date.
*/
- for (i = 0; i < chunk_list->num_chunk_infos; i++) {
- if ((chunk_list->chunk_infos[i].new_owner != mpi_rank) && chunk_list->chunk_infos[i].need_read) {
- chunk_list->chunk_infos[i].need_read = FALSE;
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ if ((chunk_list->chunk_infos[info_idx].new_owner != mpi_rank) &&
+ chunk_list->chunk_infos[info_idx].need_read) {
+ chunk_list->chunk_infos[info_idx].need_read = false;
assert(chunk_list->num_chunks_to_read > 0);
chunk_list->num_chunks_to_read--;
@@ -3600,9 +4017,10 @@ done:
* owned by that rank, the rank sends the data it wishes to
* update the chunk with to the MPI rank that now has
* ownership of that chunk. To do this, it encodes the
- * chunk's index, its selection in the chunk and its
- * modification data into a buffer and then posts a
- * non-blocking MPI_Issend to the owning rank.
+ * chunk's index value, the dataset's object header address
+ * (only for the multi-dataset I/O case), its selection in
+ * the chunk and its modification data into a buffer and
+ * then posts a non-blocking MPI_Issend to the owning rank.
*
* Once this step is complete, all MPI ranks allocate arrays
* to hold chunk message receive buffers and MPI request
@@ -3644,9 +4062,8 @@ done:
*/
static herr_t
H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list, H5D_io_info_t *io_info,
- H5D_dset_io_info_t *dset_info, int mpi_rank,
- int H5_ATTR_NDEBUG_UNUSED mpi_size, unsigned char ***chunk_msg_bufs,
- int *chunk_msg_bufs_len)
+ int mpi_rank, int H5_ATTR_NDEBUG_UNUSED mpi_size,
+ unsigned char ***chunk_msg_bufs, int *chunk_msg_bufs_len)
{
#if H5_CHECK_MPI_VERSION(3, 0)
H5D_filtered_collective_chunk_info_t *chunk_table = NULL;
@@ -3656,14 +4073,14 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
MPI_Request *send_requests = NULL;
MPI_Request *recv_requests = NULL;
MPI_Request ibarrier = MPI_REQUEST_NULL;
- hbool_t mem_iter_init = FALSE;
- hbool_t ibarrier_posted = FALSE;
+ bool mem_iter_init = false;
+ bool ibarrier_posted = false;
size_t send_bufs_nalloc = 0;
size_t num_send_requests = 0;
size_t num_recv_requests = 0;
size_t num_msgs_incoming = 0;
+ size_t hash_keylen = 0;
size_t last_assigned_idx;
- size_t i;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -3671,7 +4088,6 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
assert(chunk_list);
assert(io_info);
- assert(dset_info);
assert(mpi_size > 1);
assert(chunk_msg_bufs);
assert(chunk_msg_bufs_len);
@@ -3685,6 +4101,9 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
H5CX_set_libver_bounds(NULL);
if (chunk_list->num_chunk_infos > 0) {
+ hash_keylen = chunk_list->chunk_hash_table_keylen;
+ assert(hash_keylen > 0);
+
/* Allocate a selection iterator for iterating over chunk dataspaces */
if (NULL == (mem_iter = H5FL_MALLOC(H5S_sel_iter_t)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate dataspace selection iterator");
@@ -3716,8 +4135,9 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
* synchronous sends to send the data this rank is writing to
* the rank that does own the chunk.
*/
- for (i = 0, last_assigned_idx = 0; i < chunk_list->num_chunk_infos; i++) {
- H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[i];
+ last_assigned_idx = 0;
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[info_idx];
if (mpi_rank == chunk_entry->new_owner) {
num_msgs_incoming += (size_t)(chunk_entry->num_writers - 1);
@@ -3727,19 +4147,24 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
* does own, since it has sent the necessary data and is no longer
* interested in the chunks it doesn't own.
*/
- chunk_list->chunk_infos[last_assigned_idx] = chunk_list->chunk_infos[i];
+ chunk_list->chunk_infos[last_assigned_idx] = chunk_list->chunk_infos[info_idx];
/*
* Since, at large scale, a chunk's index value may be larger than
* the maximum value that can be stored in an int, we cannot rely
* on using a chunk's index value as the tag for the MPI messages
- * sent/received for a chunk. Therefore, add this chunk to a hash
- * table with the chunk's index as a key so that we can quickly find
- * the chunk when processing chunk messages that were received. The
- * message itself will contain the chunk's index so we can update
- * the correct chunk with the received data.
+ * sent/received for a chunk. Further, to support the multi-dataset
+ * I/O case, we can't rely on being able to distinguish between
+ * chunks by their chunk index value alone since two chunks from
+ * different datasets could have the same chunk index value.
+ * Therefore, add this chunk to a hash table with the dataset's
+ * object header address + the chunk's index value as a key so that
+ * we can quickly find the chunk when processing chunk messages that
+ * were received. The message itself will contain the dataset's
+ * object header address and the chunk's index value so we can
+ * update the correct chunk with the received data.
*/
- HASH_ADD(hh, chunk_table, index_info.chunk_idx, sizeof(hsize_t),
+ HASH_ADD(hh, chunk_table, index_info.chunk_idx, hash_keylen,
&chunk_list->chunk_infos[last_assigned_idx]);
last_assigned_idx++;
@@ -3751,8 +4176,8 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
size_t mod_data_size = 0;
size_t space_size = 0;
- /* Add the size of the chunk index to the encoded size */
- mod_data_size += sizeof(hsize_t);
+ /* Add the size of the chunk hash table key to the encoded size */
+ mod_data_size += hash_keylen;
/* Determine size of serialized chunk file dataspace */
if (H5S_encode(chunk_info->fspace, &mod_data_p, &space_size) < 0)
@@ -3763,7 +4188,7 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
H5_CHECK_OVERFLOW(iter_nelmts, hsize_t, size_t);
- mod_data_size += (size_t)iter_nelmts * dset_info->type_info.src_type_size;
+ mod_data_size += (size_t)iter_nelmts * chunk_info->dset_info->type_info.src_type_size;
if (NULL == (msg_send_bufs[num_send_requests] = H5MM_malloc(mod_data_size)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
@@ -3771,23 +4196,28 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
mod_data_p = msg_send_bufs[num_send_requests];
- /* Store the chunk's index into the buffer */
- memcpy(mod_data_p, &chunk_entry->index_info.chunk_idx, sizeof(hsize_t));
- mod_data_p += sizeof(hsize_t);
+ /*
+ * Add the chunk hash table key (chunk index value + possibly
+ * dataset object header address) into the buffer
+ */
+ H5MM_memcpy(mod_data_p, &chunk_entry->index_info.chunk_idx, hash_keylen);
+ mod_data_p += hash_keylen;
/* Serialize the chunk's file dataspace into the buffer */
if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace");
/* Initialize iterator for memory selection */
- if (H5S_select_iter_init(mem_iter, chunk_info->mspace, dset_info->type_info.src_type_size,
+ if (H5S_select_iter_init(mem_iter, chunk_info->mspace,
+ chunk_info->dset_info->type_info.src_type_size,
H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
"unable to initialize memory selection information");
- mem_iter_init = TRUE;
+ mem_iter_init = true;
/* Collect the modification data into the buffer */
- if (0 == H5D__gather_mem(dset_info->buf.cvp, mem_iter, (size_t)iter_nelmts, mod_data_p))
+ if (0 ==
+ H5D__gather_mem(chunk_info->dset_info->buf.cvp, mem_iter, (size_t)iter_nelmts, mod_data_p))
HGOTO_ERROR(H5E_IO, H5E_CANTGATHER, FAIL, "couldn't gather from write buffer");
/*
@@ -3830,7 +4260,7 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release memory selection iterator");
- mem_iter_init = FALSE;
+ mem_iter_init = false;
}
}
@@ -3930,14 +4360,14 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
/* Post non-blocking barrier */
if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(io_info->comm, &ibarrier)))
HMPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code)
- ibarrier_posted = TRUE;
+ ibarrier_posted = true;
/*
* Now that all send requests have completed, free up the
* send buffers used in the non-blocking operations
*/
if (msg_send_bufs) {
- for (i = 0; i < num_send_requests; i++) {
+ for (size_t i = 0; i < num_send_requests; i++) {
if (msg_send_bufs[i])
H5MM_free(msg_send_bufs[i]);
}
@@ -3972,7 +4402,7 @@ H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk
/* Set the new number of locally-selected chunks */
chunk_list->num_chunk_infos = last_assigned_idx;
- /* Set chunk hash table pointer for future use */
+ /* Set chunk hash table information for future use */
chunk_list->chunk_hash_table = chunk_table;
/* Return chunk message buffers if any were received */
@@ -3988,19 +4418,19 @@ done:
}
if (num_send_requests) {
- for (i = 0; i < num_send_requests; i++) {
+ for (size_t i = 0; i < num_send_requests; i++) {
MPI_Cancel(&send_requests[i]);
}
}
if (recv_requests) {
- for (i = 0; i < num_recv_requests; i++) {
+ for (size_t i = 0; i < num_recv_requests; i++) {
MPI_Cancel(&recv_requests[i]);
}
}
if (msg_recv_bufs) {
- for (i = 0; i < num_recv_requests; i++) {
+ for (size_t i = 0; i < num_recv_requests; i++) {
H5MM_free(msg_recv_bufs[i]);
}
@@ -4016,7 +4446,7 @@ done:
H5MM_free(send_requests);
if (msg_send_bufs) {
- for (i = 0; i < num_send_requests; i++) {
+ for (size_t i = 0; i < num_send_requests; i++) {
if (msg_send_bufs[i])
H5MM_free(msg_send_bufs[i]);
}
@@ -4059,26 +4489,16 @@ done:
*/
static herr_t
H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list,
- const H5D_io_info_t *io_info, const H5D_dset_io_info_t *di,
- int mpi_rank)
+ const H5D_io_info_t *io_info, size_t num_dset_infos, int mpi_rank)
{
- H5D_fill_buf_info_t fb_info;
- H5Z_EDC_t err_detect; /* Error detection info */
- H5Z_cb_t filter_cb; /* I/O filter callback function */
- hsize_t file_chunk_size = 0;
- hsize_t iter_nelmts; /* Number of points to iterate over for the chunk IO operation */
- hbool_t should_fill = FALSE;
- hbool_t fb_info_init = FALSE;
- hbool_t index_empty = FALSE;
- H5S_t *fill_space = NULL;
- void *base_read_buf = NULL;
- herr_t ret_value = SUCCEED;
+ H5Z_EDC_t err_detect; /* Error detection info */
+ H5Z_cb_t filter_cb; /* I/O filter callback function */
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_PACKAGE
assert(chunk_list);
assert(io_info);
- assert(di);
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TRACE_ENTER(mpi_rank);
@@ -4087,22 +4507,6 @@ H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chun
(void)mpi_rank;
#endif
- if (chunk_list->num_chunk_infos) {
- /* Retrieve filter settings from API context */
- if (H5CX_get_err_detect(&err_detect) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info");
- if (H5CX_get_filter_cb(&filter_cb) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function");
-
- /* Set size of full chunks in dataset */
- file_chunk_size = di->dset->shared->layout.u.chunk.size;
-
- /* Determine if fill values should be "read" for unallocated chunks */
- should_fill = (di->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) ||
- ((di->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) &&
- di->dset->shared->dcpl_cache.fill.fill_defined);
- }
-
/*
* Allocate memory buffers for all chunks being read. Chunk data buffers are of
* the largest size between the chunk's current filtered size and the chunk's true
@@ -4116,29 +4520,61 @@ H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chun
* size; reading into a (smaller) buffer of size equal to the unfiltered
* chunk size would of course be bad.
*/
- for (size_t i = 0; i < chunk_list->num_chunk_infos; i++) {
- H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[i];
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[info_idx];
+ H5D_mpio_filtered_dset_info_t *cached_dset_info;
+ hsize_t file_chunk_size;
assert(chunk_entry->need_read);
+ /* Find the cached dataset info for the dataset this chunk is in */
+ if (num_dset_infos > 1) {
+ HASH_FIND(hh, chunk_list->dset_info.dset_info_hash_table, &chunk_entry->index_info.dset_oloc_addr,
+ sizeof(haddr_t), cached_dset_info);
+ if (cached_dset_info == NULL) {
+ if (chunk_list->all_dset_indices_empty)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ break;
+ }
+ }
+ }
+ else
+ cached_dset_info = chunk_list->dset_info.single_dset_info;
+ assert(cached_dset_info);
+
+ file_chunk_size = cached_dset_info->file_chunk_size;
+
chunk_entry->chunk_buf_size = MAX(chunk_entry->chunk_current.length, file_chunk_size);
if (NULL == (chunk_entry->buf = H5MM_malloc(chunk_entry->chunk_buf_size))) {
- /* Push an error, but participate in collective read */
- HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
- break;
+ if (chunk_list->all_dset_indices_empty)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
+ break;
+ }
}
/*
- * Check if chunk is currently allocated. If not, don't try to
- * read it from the file. Instead, just fill the chunk buffer
- * with the fill value if necessary.
+ * Check whether the chunk needs to be read from the file, based
+ * on whether the dataset's chunk index is empty or the chunk has
+ * a defined address in the file. If the chunk doesn't need to be
+ * read from the file, just fill the chunk buffer with the fill
+ * value if necessary.
*/
- if (H5_addr_defined(chunk_entry->chunk_current.offset)) {
- /* Set first read buffer */
- if (!base_read_buf)
- base_read_buf = chunk_entry->buf;
+ if (cached_dset_info->index_empty || !H5_addr_defined(chunk_entry->chunk_current.offset)) {
+ chunk_entry->need_read = false;
+
+ /* Update field keeping track of number of chunks to read */
+ assert(chunk_list->num_chunks_to_read > 0);
+ chunk_list->num_chunks_to_read--;
+ }
+ if (chunk_entry->need_read) {
/* Set chunk's new length for eventual filter pipeline calls */
if (chunk_entry->skip_filter_pline)
chunk_entry->chunk_new.length = file_chunk_size;
@@ -4146,77 +4582,58 @@ H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chun
chunk_entry->chunk_new.length = chunk_entry->chunk_current.length;
}
else {
- chunk_entry->need_read = FALSE;
-
- /* Update field keeping track of number of chunks to read */
- assert(chunk_list->num_chunks_to_read > 0);
- chunk_list->num_chunks_to_read--;
-
/* Set chunk's new length for eventual filter pipeline calls */
chunk_entry->chunk_new.length = file_chunk_size;
- if (should_fill) {
- /* Initialize fill value buffer if not already initialized */
- if (!fb_info_init) {
- hsize_t chunk_dims[H5S_MAX_RANK];
-
- assert(di->dset->shared->ndims == di->dset->shared->layout.u.chunk.ndims - 1);
- for (size_t j = 0; j < di->dset->shared->layout.u.chunk.ndims - 1; j++)
- chunk_dims[j] = (hsize_t)di->dset->shared->layout.u.chunk.dim[j];
-
- /* Get a dataspace for filling chunk memory buffers */
- if (NULL == (fill_space = H5S_create_simple(di->dset->shared->layout.u.chunk.ndims - 1,
- chunk_dims, NULL)))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create chunk fill dataspace");
-
- /* Initialize fill value buffer */
- if (H5D__fill_init(
- &fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc,
- (void *)&di->dset->shared->dcpl_cache.pline, (H5MM_free_t)H5D__chunk_mem_free,
- (void *)&di->dset->shared->dcpl_cache.pline, &di->dset->shared->dcpl_cache.fill,
- di->dset->shared->type, di->dset->shared->type_id, 0, file_chunk_size) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer");
-
- fb_info_init = TRUE;
- }
+ /* Determine if fill values should be "read" for this unallocated chunk */
+ if (cached_dset_info->should_fill) {
+ assert(cached_dset_info->fb_info_init);
+ assert(cached_dset_info->fb_info.fill_buf);
/* Write fill value to memory buffer */
- assert(fb_info.fill_buf);
- if (H5D__fill(fb_info.fill_buf, di->dset->shared->type, chunk_entry->buf,
- di->type_info.mem_type, fill_space) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
- "couldn't fill chunk buffer with fill value");
+ if (H5D__fill(cached_dset_info->fb_info.fill_buf,
+ cached_dset_info->dset_io_info->type_info.dset_type, chunk_entry->buf,
+ cached_dset_info->dset_io_info->type_info.mem_type,
+ cached_dset_info->fill_space) < 0) {
+ if (chunk_list->all_dset_indices_empty)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+ "couldn't fill chunk buffer with fill value");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+ "couldn't fill chunk buffer with fill value");
+ break;
+ }
+ }
}
}
}
- /*
- * If dataset is incrementally allocated and hasn't been written to
- * yet, the chunk index should be empty. In this case, a collective
- * read of chunks is essentially a no-op, so avoid it here.
- */
- index_empty = FALSE;
- if (di->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR)
- if (H5D__chunk_index_empty(di->dset, &index_empty) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty");
-
- if (!index_empty) {
- /* Perform collective vector read */
+ /* Perform collective vector read if necessary */
+ if (!chunk_list->all_dset_indices_empty)
if (H5D__mpio_collective_filtered_vec_io(chunk_list, io_info->f_sh, H5D_IO_OP_READ) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't perform vector I/O on filtered chunks");
+
+ if (chunk_list->num_chunk_infos) {
+ /* Retrieve filter settings from API context */
+ if (H5CX_get_err_detect(&err_detect) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info");
+ if (H5CX_get_filter_cb(&filter_cb) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function");
}
/*
* Iterate through all the read chunks, unfiltering them and scattering their
* data out to the application's read buffer.
*/
- for (size_t i = 0; i < chunk_list->num_chunk_infos; i++) {
- H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[i];
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[info_idx];
H5D_piece_info_t *chunk_info = chunk_entry->chunk_info;
+ hsize_t iter_nelmts;
/* Unfilter the chunk, unless we didn't read it from the file */
if (chunk_entry->need_read && !chunk_entry->skip_filter_pline) {
- if (H5Z_pipeline(&di->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
+ if (H5Z_pipeline(&chunk_info->dset_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
&(chunk_entry->index_info.filter_mask), err_detect, filter_cb,
(size_t *)&chunk_entry->chunk_new.length, &chunk_entry->chunk_buf_size,
&chunk_entry->buf) < 0)
@@ -4226,26 +4643,21 @@ H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chun
/* Scatter the chunk data to the read buffer */
iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
- if (H5D_select_io_mem(di->buf.vp, chunk_info->mspace, chunk_entry->buf, chunk_info->fspace,
- di->type_info.src_type_size, (size_t)iter_nelmts) < 0)
+ if (H5D_select_io_mem(chunk_info->dset_info->buf.vp, chunk_info->mspace, chunk_entry->buf,
+ chunk_info->fspace, chunk_info->dset_info->type_info.src_type_size,
+ (size_t)iter_nelmts) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't copy chunk data to read buffer");
}
done:
/* Free all resources used by entries in the chunk list */
- for (size_t i = 0; i < chunk_list->num_chunk_infos; i++) {
- if (chunk_list->chunk_infos[i].buf) {
- H5MM_free(chunk_list->chunk_infos[i].buf);
- chunk_list->chunk_infos[i].buf = NULL;
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ if (chunk_list->chunk_infos[info_idx].buf) {
+ H5MM_free(chunk_list->chunk_infos[info_idx].buf);
+ chunk_list->chunk_infos[info_idx].buf = NULL;
}
}
- /* Release the fill buffer info, if it's been initialized */
- if (fb_info_init && H5D__fill_term(&fb_info) < 0)
- HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info");
- if (fill_space && (H5S_close(fill_space) < 0))
- HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
-
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TIME_STOP(mpi_rank);
H5D_MPIO_TRACE_EXIT(mpi_rank);
@@ -4269,58 +4681,27 @@ done:
static herr_t
H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list,
unsigned char **chunk_msg_bufs, int chunk_msg_bufs_len,
- const H5D_io_info_t *io_info, const H5D_dset_io_info_t *di,
- int H5_ATTR_NDEBUG_UNUSED mpi_rank)
+ const H5D_io_info_t *io_info, size_t num_dset_infos, int mpi_rank)
{
- const H5D_type_info_t *type_info = NULL;
- H5D_fill_buf_info_t fb_info;
- H5S_sel_iter_t *sel_iter = NULL; /* Dataspace selection iterator for H5D__scatter_mem */
- H5Z_EDC_t err_detect; /* Error detection info */
- H5Z_cb_t filter_cb; /* I/O filter callback function */
- hsize_t file_chunk_size = 0;
- hsize_t iter_nelmts; /* Number of points to iterate over for the chunk IO operation */
- hbool_t should_fill = FALSE;
- hbool_t fb_info_init = FALSE;
- hbool_t sel_iter_init = FALSE;
- hbool_t index_empty = FALSE;
- size_t i;
- H5S_t *dataspace = NULL;
- H5S_t *fill_space = NULL;
- void *base_read_buf = NULL;
- herr_t ret_value = SUCCEED;
+ H5S_sel_iter_t *sel_iter = NULL; /* Dataspace selection iterator for H5D__scatter_mem */
+ H5Z_EDC_t err_detect; /* Error detection info */
+ H5Z_cb_t filter_cb; /* I/O filter callback function */
+ uint8_t *key_buf = NULL;
+ H5S_t *dataspace = NULL;
+ bool sel_iter_init = false;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_PACKAGE
assert(chunk_list);
assert((chunk_msg_bufs && chunk_list->chunk_hash_table) || 0 == chunk_msg_bufs_len);
assert(io_info);
- assert(di);
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TRACE_ENTER(mpi_rank);
H5D_MPIO_TIME_START(mpi_rank, "Filtered collective chunk update");
#endif
- /* Set convenience pointers */
- type_info = &(di->type_info);
- assert(type_info);
-
- if (chunk_list->num_chunk_infos > 0) {
- /* Retrieve filter settings from API context */
- if (H5CX_get_err_detect(&err_detect) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info");
- if (H5CX_get_filter_cb(&filter_cb) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function");
-
- /* Set size of full chunks in dataset */
- file_chunk_size = di->dset->shared->layout.u.chunk.size;
-
- /* Determine if fill values should be written to chunks */
- should_fill = (di->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) ||
- ((di->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) &&
- di->dset->shared->dcpl_cache.fill.fill_defined);
- }
-
/*
* Allocate memory buffers for all owned chunks. Chunk data buffers are of the
* largest size between the chunk's current filtered size and the chunk's true
@@ -4340,11 +4721,33 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
* size; reading into a (smaller) buffer of size equal to the unfiltered
* chunk size would of course be bad.
*/
- for (i = 0; i < chunk_list->num_chunk_infos; i++) {
- H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[i];
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[info_idx];
+ H5D_mpio_filtered_dset_info_t *cached_dset_info;
+ hsize_t file_chunk_size;
assert(mpi_rank == chunk_entry->new_owner);
+ /* Find the cached dataset info for the dataset this chunk is in */
+ if (num_dset_infos > 1) {
+ HASH_FIND(hh, chunk_list->dset_info.dset_info_hash_table, &chunk_entry->index_info.dset_oloc_addr,
+ sizeof(haddr_t), cached_dset_info);
+ if (cached_dset_info == NULL) {
+ if (chunk_list->all_dset_indices_empty)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ break;
+ }
+ }
+ }
+ else
+ cached_dset_info = chunk_list->dset_info.single_dset_info;
+ assert(cached_dset_info);
+
+ file_chunk_size = cached_dset_info->file_chunk_size;
+
chunk_entry->chunk_buf_size = MAX(chunk_entry->chunk_current.length, file_chunk_size);
/*
@@ -4352,29 +4755,41 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
* out fill values to it, make sure to 0-fill its memory buffer
* so we don't use uninitialized memory.
*/
- if (!H5_addr_defined(chunk_entry->chunk_current.offset) && !should_fill)
+ if (!H5_addr_defined(chunk_entry->chunk_current.offset) && !cached_dset_info->should_fill)
chunk_entry->buf = H5MM_calloc(chunk_entry->chunk_buf_size);
else
chunk_entry->buf = H5MM_malloc(chunk_entry->chunk_buf_size);
if (NULL == chunk_entry->buf) {
- /* Push an error, but participate in collective read */
- HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
- break;
+ if (chunk_list->all_dset_indices_empty)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer");
+ break;
+ }
}
- /* Set chunk's new length for eventual filter pipeline calls */
- if (chunk_entry->need_read) {
+ if (!chunk_entry->need_read)
+ /* Set chunk's new length for eventual filter pipeline calls */
+ chunk_entry->chunk_new.length = file_chunk_size;
+ else {
/*
- * Check if chunk is currently allocated. If not, don't try to
- * read it from the file. Instead, just fill the chunk buffer
- * with the fill value if fill values are to be written.
+ * Check whether the chunk needs to be read from the file, based
+ * on whether the dataset's chunk index is empty or the chunk has
+ * a defined address in the file. If the chunk doesn't need to be
+ * read from the file, just fill the chunk buffer with the fill
+ * value if necessary.
*/
- if (H5_addr_defined(chunk_entry->chunk_current.offset)) {
- /* Set first read buffer */
- if (!base_read_buf)
- base_read_buf = chunk_entry->buf;
+ if (cached_dset_info->index_empty || !H5_addr_defined(chunk_entry->chunk_current.offset)) {
+ chunk_entry->need_read = false;
+ /* Update field keeping track of number of chunks to read */
+ assert(chunk_list->num_chunks_to_read > 0);
+ chunk_list->num_chunks_to_read--;
+ }
+
+ if (chunk_entry->need_read) {
/* Set chunk's new length for eventual filter pipeline calls */
if (chunk_entry->skip_filter_pline)
chunk_entry->chunk_new.length = file_chunk_size;
@@ -4382,81 +4797,57 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
chunk_entry->chunk_new.length = chunk_entry->chunk_current.length;
}
else {
- chunk_entry->need_read = FALSE;
-
- /* Update field keeping track of number of chunks to read */
- assert(chunk_list->num_chunks_to_read > 0);
- chunk_list->num_chunks_to_read--;
-
/* Set chunk's new length for eventual filter pipeline calls */
chunk_entry->chunk_new.length = file_chunk_size;
- if (should_fill) {
- /* Initialize fill value buffer if not already initialized */
- if (!fb_info_init) {
- hsize_t chunk_dims[H5S_MAX_RANK];
-
- assert(di->dset->shared->ndims == di->dset->shared->layout.u.chunk.ndims - 1);
- for (size_t j = 0; j < di->dset->shared->layout.u.chunk.ndims - 1; j++)
- chunk_dims[j] = (hsize_t)di->dset->shared->layout.u.chunk.dim[j];
+ /* Determine if fill values should be "read" for this unallocated chunk */
+ if (cached_dset_info->should_fill) {
+ assert(cached_dset_info->fb_info_init);
+ assert(cached_dset_info->fb_info.fill_buf);
- /* Get a dataspace for filling chunk memory buffers */
- if (NULL == (fill_space = H5S_create_simple(
- di->dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL)))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
- "unable to create chunk fill dataspace");
-
- /* Initialize fill value buffer */
- if (H5D__fill_init(&fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc,
- (void *)&di->dset->shared->dcpl_cache.pline,
- (H5MM_free_t)H5D__chunk_mem_free,
- (void *)&di->dset->shared->dcpl_cache.pline,
- &di->dset->shared->dcpl_cache.fill, di->dset->shared->type,
- di->dset->shared->type_id, 0, file_chunk_size) < 0)
+ /* Write fill value to memory buffer */
+ if (H5D__fill(cached_dset_info->fb_info.fill_buf,
+ cached_dset_info->dset_io_info->type_info.dset_type, chunk_entry->buf,
+ cached_dset_info->dset_io_info->type_info.mem_type,
+ cached_dset_info->fill_space) < 0) {
+ if (chunk_list->all_dset_indices_empty)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
- "can't initialize fill value buffer");
-
- fb_info_init = TRUE;
+ "couldn't fill chunk buffer with fill value");
+ else {
+ /* Push an error, but participate in collective read */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+ "couldn't fill chunk buffer with fill value");
+ break;
+ }
}
-
- /* Write fill value to memory buffer */
- assert(fb_info.fill_buf);
- if (H5D__fill(fb_info.fill_buf, di->dset->shared->type, chunk_entry->buf,
- type_info->mem_type, fill_space) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
- "couldn't fill chunk buffer with fill value");
}
}
}
- else
- chunk_entry->chunk_new.length = file_chunk_size;
}
- /*
- * If dataset is incrementally allocated and hasn't been written to
- * yet, the chunk index should be empty. In this case, a collective
- * read of chunks is essentially a no-op, so avoid it here.
- */
- index_empty = FALSE;
- if (di->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR)
- if (H5D__chunk_index_empty(di->dset, &index_empty) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty");
-
- if (!index_empty) {
- /* Perform collective vector read */
+ /* Perform collective vector read if necessary */
+ if (!chunk_list->all_dset_indices_empty)
if (H5D__mpio_collective_filtered_vec_io(chunk_list, io_info->f_sh, H5D_IO_OP_READ) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't perform vector I/O on filtered chunks");
- }
/*
* Now that all owned chunks have been read, update the chunks
* with modification data from the owning rank and other ranks.
*/
+ if (chunk_list->num_chunk_infos > 0) {
+ /* Retrieve filter settings from API context */
+ if (H5CX_get_err_detect(&err_detect) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info");
+ if (H5CX_get_filter_cb(&filter_cb) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function");
+ }
+
/* Process all chunks with data from the owning rank first */
- for (i = 0; i < chunk_list->num_chunk_infos; i++) {
- H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[i];
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ H5D_filtered_collective_chunk_info_t *chunk_entry = &chunk_list->chunk_infos[info_idx];
H5D_piece_info_t *chunk_info = chunk_entry->chunk_info;
+ hsize_t iter_nelmts;
assert(mpi_rank == chunk_entry->new_owner);
@@ -4465,7 +4856,7 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
* the file, so we need to unfilter it
*/
if (chunk_entry->need_read && !chunk_entry->skip_filter_pline) {
- if (H5Z_pipeline(&di->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
+ if (H5Z_pipeline(&chunk_info->dset_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
&(chunk_entry->index_info.filter_mask), err_detect, filter_cb,
(size_t *)&chunk_entry->chunk_new.length, &chunk_entry->chunk_buf_size,
&chunk_entry->buf) < 0)
@@ -4474,28 +4865,35 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
- if (H5D_select_io_mem(chunk_entry->buf, chunk_info->fspace, di->buf.cvp, chunk_info->mspace,
- type_info->dst_type_size, (size_t)iter_nelmts) < 0)
+ if (H5D_select_io_mem(chunk_entry->buf, chunk_info->fspace, chunk_info->dset_info->buf.cvp,
+ chunk_info->mspace, chunk_info->dset_info->type_info.dst_type_size,
+ (size_t)iter_nelmts) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't copy chunk data to write buffer");
}
/* Allocate iterator for memory selection */
- if (NULL == (sel_iter = H5FL_MALLOC(H5S_sel_iter_t)))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator");
+ if (chunk_msg_bufs_len > 0) {
+ assert(chunk_list->chunk_hash_table_keylen > 0);
+ if (NULL == (key_buf = H5MM_malloc(chunk_list->chunk_hash_table_keylen)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate hash table key buffer");
+
+ if (NULL == (sel_iter = H5FL_MALLOC(H5S_sel_iter_t)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator");
+ }
/* Now process all received chunk message buffers */
- for (i = 0; i < (size_t)chunk_msg_bufs_len; i++) {
+ for (size_t buf_idx = 0; buf_idx < (size_t)chunk_msg_bufs_len; buf_idx++) {
H5D_filtered_collective_chunk_info_t *chunk_entry = NULL;
- const unsigned char *msg_ptr = chunk_msg_bufs[i];
- hsize_t chunk_idx;
+ const unsigned char *msg_ptr = chunk_msg_bufs[buf_idx];
if (msg_ptr) {
- /* Retrieve the chunk's index value */
- memcpy(&chunk_idx, msg_ptr, sizeof(hsize_t));
- msg_ptr += sizeof(hsize_t);
+ /* Retrieve the chunk hash table key from the chunk message buffer */
+ H5MM_memcpy(key_buf, msg_ptr, chunk_list->chunk_hash_table_keylen);
+ msg_ptr += chunk_list->chunk_hash_table_keylen;
- /* Find the chunk entry according to its chunk index */
- HASH_FIND(hh, chunk_list->chunk_hash_table, &chunk_idx, sizeof(hsize_t), chunk_entry);
+ /* Find the chunk entry according to its chunk hash table key */
+ HASH_FIND(hh, chunk_list->chunk_hash_table, key_buf, chunk_list->chunk_hash_table_keylen,
+ chunk_entry);
if (chunk_entry == NULL)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find chunk entry");
if (mpi_rank != chunk_entry->new_owner)
@@ -4510,15 +4908,18 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
if (!chunk_entry->buf)
continue;
else {
+ hsize_t iter_nelmts;
+
/* Decode the chunk file dataspace from the message */
if (NULL == (dataspace = H5S_decode(&msg_ptr)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace");
- if (H5S_select_iter_init(sel_iter, dataspace, type_info->dst_type_size,
+ if (H5S_select_iter_init(sel_iter, dataspace,
+ chunk_entry->chunk_info->dset_info->type_info.dst_type_size,
H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
"unable to initialize memory selection information");
- sel_iter_init = TRUE;
+ sel_iter_init = true;
iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace);
@@ -4528,7 +4929,7 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
if (H5S_SELECT_ITER_RELEASE(sel_iter) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator");
- sel_iter_init = FALSE;
+ sel_iter_init = false;
if (dataspace) {
if (H5S_close(dataspace) < 0)
@@ -4536,50 +4937,49 @@ H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *ch
dataspace = NULL;
}
- H5MM_free(chunk_msg_bufs[i]);
- chunk_msg_bufs[i] = NULL;
+ H5MM_free(chunk_msg_bufs[buf_idx]);
+ chunk_msg_bufs[buf_idx] = NULL;
}
}
}
/* Finally, filter all the chunks */
- for (i = 0; i < chunk_list->num_chunk_infos; i++) {
- if (!chunk_list->chunk_infos[i].skip_filter_pline) {
- if (H5Z_pipeline(&di->dset->shared->dcpl_cache.pline, 0,
- &(chunk_list->chunk_infos[i].index_info.filter_mask), err_detect, filter_cb,
- (size_t *)&chunk_list->chunk_infos[i].chunk_new.length,
- &chunk_list->chunk_infos[i].chunk_buf_size, &chunk_list->chunk_infos[i].buf) < 0)
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ if (!chunk_list->chunk_infos[info_idx].skip_filter_pline) {
+ if (H5Z_pipeline(
+ &chunk_list->chunk_infos[info_idx].chunk_info->dset_info->dset->shared->dcpl_cache.pline,
+ 0, &(chunk_list->chunk_infos[info_idx].index_info.filter_mask), err_detect, filter_cb,
+ (size_t *)&chunk_list->chunk_infos[info_idx].chunk_new.length,
+ &chunk_list->chunk_infos[info_idx].chunk_buf_size,
+ &chunk_list->chunk_infos[info_idx].buf) < 0)
HGOTO_ERROR(H5E_PLINE, H5E_CANTFILTER, FAIL, "output pipeline failed");
}
#if H5_SIZEOF_SIZE_T > 4
/* Check for the chunk expanding too much to encode in a 32-bit value */
- if (chunk_list->chunk_infos[i].chunk_new.length > ((size_t)0xffffffff))
+ if (chunk_list->chunk_infos[info_idx].chunk_new.length > ((size_t)0xffffffff))
HGOTO_ERROR(H5E_DATASET, H5E_BADRANGE, FAIL, "chunk too large for 32-bit length");
#endif
}
done:
+ if (dataspace && (H5S_close(dataspace) < 0))
+ HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace");
+
if (sel_iter) {
if (sel_iter_init && H5S_SELECT_ITER_RELEASE(sel_iter) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator");
sel_iter = H5FL_FREE(H5S_sel_iter_t, sel_iter);
}
- if (dataspace && (H5S_close(dataspace) < 0))
- HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace");
- if (fill_space && (H5S_close(fill_space) < 0))
- HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space");
- /* Release the fill buffer info, if it's been initialized */
- if (fb_info_init && H5D__fill_term(&fb_info) < 0)
- HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info");
+ H5MM_free(key_buf);
/* On failure, try to free all resources used by entries in the chunk list */
if (ret_value < 0) {
- for (i = 0; i < chunk_list->num_chunk_infos; i++) {
- if (chunk_list->chunk_infos[i].buf) {
- H5MM_free(chunk_list->chunk_infos[i].buf);
- chunk_list->chunk_infos[i].buf = NULL;
+ for (size_t info_idx = 0; info_idx < chunk_list->num_chunk_infos; info_idx++) {
+ if (chunk_list->chunk_infos[info_idx].buf) {
+ H5MM_free(chunk_list->chunk_infos[info_idx].buf);
+ chunk_list->chunk_infos[info_idx].buf = NULL;
}
}
}
@@ -4608,21 +5008,20 @@ done:
static herr_t
H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list,
size_t *num_chunks_assigned_map, H5D_io_info_t *io_info,
- H5D_chk_idx_info_t *idx_info, int mpi_rank, int mpi_size)
+ size_t num_dset_infos, int mpi_rank, int mpi_size)
{
H5D_chunk_alloc_info_t *collective_list = NULL;
MPI_Datatype send_type;
MPI_Datatype recv_type;
- hbool_t send_type_derived = FALSE;
- hbool_t recv_type_derived = FALSE;
- hbool_t need_sort = FALSE;
+ bool send_type_derived = false;
+ bool recv_type_derived = false;
+ bool need_sort = false;
size_t collective_num_entries = 0;
size_t num_local_chunks_processed = 0;
- size_t i;
- void *gathered_array = NULL;
- int *counts_disps_array = NULL;
- int *counts_ptr = NULL;
- int *displacements_ptr = NULL;
+ void *gathered_array = NULL;
+ int *counts_disps_array = NULL;
+ int *counts_ptr = NULL;
+ int *displacements_ptr = NULL;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -4630,8 +5029,6 @@ H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t
assert(chunk_list);
assert(io_info);
- assert(idx_info);
- assert(idx_info->storage->idx_type != H5D_CHUNK_IDX_NONE);
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TRACE_ENTER(mpi_rank);
@@ -4670,20 +5067,20 @@ H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t
/* Set the receive counts from the assigned chunks map */
counts_ptr = counts_disps_array;
- for (i = 0; i < (size_t)mpi_size; i++)
- H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+ for (int curr_rank = 0; curr_rank < mpi_size; curr_rank++)
+ H5_CHECKED_ASSIGN(counts_ptr[curr_rank], int, num_chunks_assigned_map[curr_rank], size_t);
/* Set the displacements into the receive buffer for the gather operation */
displacements_ptr = &counts_disps_array[mpi_size];
*displacements_ptr = 0;
- for (i = 1; i < (size_t)mpi_size; i++)
- displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+ for (int curr_rank = 1; curr_rank < mpi_size; curr_rank++)
+ displacements_ptr[curr_rank] = displacements_ptr[curr_rank - 1] + counts_ptr[curr_rank - 1];
}
/* Perform gather operation */
if (H5_mpio_gatherv_alloc(chunk_list->chunk_infos, (int)chunk_list->num_chunk_infos, send_type,
- counts_ptr, displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank,
+ counts_ptr, displacements_ptr, recv_type, true, 0, io_info->comm, mpi_rank,
mpi_size, &gathered_array, &collective_num_entries) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
"can't gather chunk file space info to/from ranks");
@@ -4697,21 +5094,34 @@ H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t
* the 'simple' MPI_Allgatherv wrapper for this.
*/
if (H5_mpio_gatherv_alloc_simple(chunk_list->chunk_infos, (int)chunk_list->num_chunk_infos, send_type,
- recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size,
+ recv_type, true, 0, io_info->comm, mpi_rank, mpi_size,
&gathered_array, &collective_num_entries) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
"can't gather chunk file space info to/from ranks");
}
/* Collectively re-allocate the modified chunks (from each rank) in the file */
- collective_list = (H5D_chunk_alloc_info_t *)gathered_array;
- for (i = 0, num_local_chunks_processed = 0; i < collective_num_entries; i++) {
- H5D_chunk_alloc_info_t *coll_entry = &collective_list[i];
- hbool_t need_insert;
- hbool_t update_local_chunk;
-
- if (H5D__chunk_file_alloc(idx_info, &coll_entry->chunk_current, &coll_entry->chunk_new, &need_insert,
- NULL) < 0)
+ collective_list = (H5D_chunk_alloc_info_t *)gathered_array;
+ num_local_chunks_processed = 0;
+ for (size_t entry_idx = 0; entry_idx < collective_num_entries; entry_idx++) {
+ H5D_mpio_filtered_dset_info_t *cached_dset_info;
+ H5D_chunk_alloc_info_t *coll_entry = &collective_list[entry_idx];
+ bool need_insert;
+ bool update_local_chunk;
+
+ /* Find the cached dataset info for the dataset this chunk is in */
+ if (num_dset_infos > 1) {
+ HASH_FIND(hh, chunk_list->dset_info.dset_info_hash_table, &coll_entry->dset_oloc_addr,
+ sizeof(haddr_t), cached_dset_info);
+ if (cached_dset_info == NULL)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ }
+ else
+ cached_dset_info = chunk_list->dset_info.single_dset_info;
+ assert(cached_dset_info);
+
+ if (H5D__chunk_file_alloc(&cached_dset_info->chunk_idx_info, &coll_entry->chunk_current,
+ &coll_entry->chunk_new, &need_insert, NULL) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk");
/*
@@ -4719,9 +5129,12 @@ H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t
* rank, make sure to update the chunk entry in the local
* chunk list
*/
- update_local_chunk = (num_local_chunks_processed < chunk_list->num_chunk_infos) &&
- (coll_entry->chunk_idx ==
- chunk_list->chunk_infos[num_local_chunks_processed].index_info.chunk_idx);
+ update_local_chunk =
+ (num_local_chunks_processed < chunk_list->num_chunk_infos) &&
+ (coll_entry->dset_oloc_addr ==
+ chunk_list->chunk_infos[num_local_chunks_processed].index_info.dset_oloc_addr) &&
+ (coll_entry->chunk_idx ==
+ chunk_list->chunk_infos[num_local_chunks_processed].index_info.chunk_idx);
if (update_local_chunk) {
H5D_filtered_collective_chunk_info_t *local_chunk;
@@ -4747,7 +5160,7 @@ H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t
assert(H5_addr_defined(prev_chunk_offset) && H5_addr_defined(curr_chunk_offset));
if (curr_chunk_offset < prev_chunk_offset)
- need_sort = TRUE;
+ need_sort = true;
}
num_local_chunks_processed++;
@@ -4801,38 +5214,35 @@ done:
static herr_t
H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list,
size_t *num_chunks_assigned_map, H5D_io_info_t *io_info,
- H5D_dset_io_info_t *di, H5D_chk_idx_info_t *idx_info,
- int mpi_rank, int mpi_size)
+ size_t num_dset_infos, int mpi_rank, int mpi_size)
{
- H5D_chunk_ud_t chunk_ud;
- MPI_Datatype send_type;
- MPI_Datatype recv_type;
- hbool_t send_type_derived = FALSE;
- hbool_t recv_type_derived = FALSE;
- hsize_t scaled_coords[H5O_LAYOUT_NDIMS];
- size_t collective_num_entries = 0;
- size_t i;
- void *gathered_array = NULL;
- int *counts_disps_array = NULL;
- int *counts_ptr = NULL;
- int *displacements_ptr = NULL;
- int mpi_code;
- herr_t ret_value = SUCCEED;
+ MPI_Datatype send_type;
+ MPI_Datatype recv_type;
+ size_t collective_num_entries = 0;
+ bool send_type_derived = false;
+ bool recv_type_derived = false;
+ void *gathered_array = NULL;
+ int *counts_disps_array = NULL;
+ int *counts_ptr = NULL;
+ int *displacements_ptr = NULL;
+ int mpi_code;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_PACKAGE
assert(chunk_list);
assert(io_info);
- assert(di);
- assert(idx_info);
#ifdef H5Dmpio_DEBUG
H5D_MPIO_TRACE_ENTER(mpi_rank);
H5D_MPIO_TIME_START(mpi_rank, "Reinsertion of modified chunks into chunk index");
#endif
- /* Only re-insert chunks if index has an insert method */
- if (!idx_info->storage->ops->insert)
+ /*
+ * If no datasets involved have a chunk index 'insert'
+ * operation, this function is a no-op
+ */
+ if (chunk_list->no_dset_index_insert_methods)
HGOTO_DONE(SUCCEED);
/*
@@ -4867,20 +5277,20 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
/* Set the receive counts from the assigned chunks map */
counts_ptr = counts_disps_array;
- for (i = 0; i < (size_t)mpi_size; i++)
- H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+ for (int curr_rank = 0; curr_rank < mpi_size; curr_rank++)
+ H5_CHECKED_ASSIGN(counts_ptr[curr_rank], int, num_chunks_assigned_map[curr_rank], size_t);
/* Set the displacements into the receive buffer for the gather operation */
displacements_ptr = &counts_disps_array[mpi_size];
*displacements_ptr = 0;
- for (i = 1; i < (size_t)mpi_size; i++)
- displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+ for (int curr_rank = 1; curr_rank < mpi_size; curr_rank++)
+ displacements_ptr[curr_rank] = displacements_ptr[curr_rank - 1] + counts_ptr[curr_rank - 1];
}
/* Perform gather operation */
if (H5_mpio_gatherv_alloc(chunk_list->chunk_infos, (int)chunk_list->num_chunk_infos, send_type,
- counts_ptr, displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank,
+ counts_ptr, displacements_ptr, recv_type, true, 0, io_info->comm, mpi_rank,
mpi_size, &gathered_array, &collective_num_entries) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
"can't gather chunk index re-insertion info to/from ranks");
@@ -4894,17 +5304,18 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
* the 'simple' MPI_Allgatherv wrapper for this.
*/
if (H5_mpio_gatherv_alloc_simple(chunk_list->chunk_infos, (int)chunk_list->num_chunk_infos, send_type,
- recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size,
+ recv_type, true, 0, io_info->comm, mpi_rank, mpi_size,
&gathered_array, &collective_num_entries) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
"can't gather chunk index re-insertion info to/from ranks");
}
- /* Initialize static chunk udata fields from chunk index info */
- H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, idx_info);
-
- for (i = 0; i < collective_num_entries; i++) {
- H5D_chunk_insert_info_t *coll_entry = &((H5D_chunk_insert_info_t *)gathered_array)[i];
+ for (size_t entry_idx = 0; entry_idx < collective_num_entries; entry_idx++) {
+ H5D_mpio_filtered_dset_info_t *cached_dset_info;
+ H5D_chunk_insert_info_t *coll_entry = &((H5D_chunk_insert_info_t *)gathered_array)[entry_idx];
+ H5D_chunk_ud_t chunk_ud;
+ haddr_t prev_tag = HADDR_UNDEF;
+ hsize_t scaled_coords[H5O_LAYOUT_NDIMS];
/*
* We only need to reinsert this chunk if we had to actually
@@ -4913,13 +5324,28 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
if (!coll_entry->index_info.need_insert)
continue;
- chunk_ud.chunk_block = coll_entry->chunk_block;
- chunk_ud.chunk_idx = coll_entry->index_info.chunk_idx;
- chunk_ud.filter_mask = coll_entry->index_info.filter_mask;
- chunk_ud.common.scaled = scaled_coords;
+ /* Find the cached dataset info for the dataset this chunk is in */
+ if (num_dset_infos > 1) {
+ HASH_FIND(hh, chunk_list->dset_info.dset_info_hash_table, &coll_entry->index_info.dset_oloc_addr,
+ sizeof(haddr_t), cached_dset_info);
+ if (cached_dset_info == NULL)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "unable to find cached dataset info entry");
+ }
+ else
+ cached_dset_info = chunk_list->dset_info.single_dset_info;
+ assert(cached_dset_info);
+
+ chunk_ud.common.layout = cached_dset_info->chunk_idx_info.layout;
+ chunk_ud.common.storage = cached_dset_info->chunk_idx_info.storage;
+ chunk_ud.common.scaled = scaled_coords;
+
+ chunk_ud.chunk_block = coll_entry->chunk_block;
+ chunk_ud.chunk_idx = coll_entry->index_info.chunk_idx;
+ chunk_ud.filter_mask = coll_entry->index_info.filter_mask;
/* Calculate scaled coordinates for the chunk */
- if (idx_info->layout->idx_type == H5D_CHUNK_IDX_EARRAY && idx_info->layout->u.earray.unlim_dim > 0) {
+ if (cached_dset_info->chunk_idx_info.layout->idx_type == H5D_CHUNK_IDX_EARRAY &&
+ cached_dset_info->chunk_idx_info.layout->u.earray.unlim_dim > 0) {
/*
* Extensible arrays where the unlimited dimension is not
* the slowest-changing dimension "swizzle" the coordinates
@@ -4933,17 +5359,20 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
* callback that accepts a chunk index and provides the
* caller with the scaled coordinates for that chunk.
*/
- H5VM_array_calc_pre(chunk_ud.chunk_idx, di->dset->shared->ndims,
- idx_info->layout->u.earray.swizzled_down_chunks, scaled_coords);
+ H5VM_array_calc_pre(chunk_ud.chunk_idx, cached_dset_info->dset_io_info->dset->shared->ndims,
+ cached_dset_info->chunk_idx_info.layout->u.earray.swizzled_down_chunks,
+ scaled_coords);
- H5VM_unswizzle_coords(hsize_t, scaled_coords, idx_info->layout->u.earray.unlim_dim);
+ H5VM_unswizzle_coords(hsize_t, scaled_coords,
+ cached_dset_info->chunk_idx_info.layout->u.earray.unlim_dim);
}
else {
- H5VM_array_calc_pre(chunk_ud.chunk_idx, di->dset->shared->ndims,
- di->dset->shared->layout.u.chunk.down_chunks, scaled_coords);
+ H5VM_array_calc_pre(chunk_ud.chunk_idx, cached_dset_info->dset_io_info->dset->shared->ndims,
+ cached_dset_info->dset_io_info->dset->shared->layout.u.chunk.down_chunks,
+ scaled_coords);
}
- scaled_coords[di->dset->shared->ndims] = 0;
+ scaled_coords[cached_dset_info->dset_io_info->dset->shared->ndims] = 0;
#ifndef NDEBUG
/*
@@ -4955,10 +5384,18 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
* they match.
*/
for (size_t dbg_idx = 0; dbg_idx < chunk_list->num_chunk_infos; dbg_idx++) {
- if (coll_entry->index_info.chunk_idx == chunk_list->chunk_infos[dbg_idx].index_info.chunk_idx) {
- hbool_t coords_match =
+ bool same_chunk;
+
+ /* Chunks must have the same index and reside in the same dataset */
+ same_chunk = (0 == H5_addr_cmp(coll_entry->index_info.dset_oloc_addr,
+ chunk_list->chunk_infos[dbg_idx].index_info.dset_oloc_addr));
+ same_chunk = same_chunk && (coll_entry->index_info.chunk_idx ==
+ chunk_list->chunk_infos[dbg_idx].index_info.chunk_idx);
+
+ if (same_chunk) {
+ bool coords_match =
!memcmp(scaled_coords, chunk_list->chunk_infos[dbg_idx].chunk_info->scaled,
- di->dset->shared->ndims * sizeof(hsize_t));
+ cached_dset_info->dset_io_info->dset->shared->ndims * sizeof(hsize_t));
assert(coords_match && "Calculated scaled coordinates for chunk didn't match "
"chunk's actual scaled coordinates!");
@@ -4967,8 +5404,15 @@ H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *
}
#endif
- if ((idx_info->storage->ops->insert)(idx_info, &chunk_ud, di->dset) < 0)
+ /* Set metadata tagging with dataset oheader addr */
+ H5AC_tag(cached_dset_info->dset_io_info->dset->oloc.addr, &prev_tag);
+
+ if ((cached_dset_info->chunk_idx_info.storage->ops->insert)(
+ &cached_dset_info->chunk_idx_info, &chunk_ud, cached_dset_info->dset_io_info->dset) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk address into index");
+
+ /* Reset metadata tagging */
+ H5AC_tag(prev_tag, NULL);
}
done:
@@ -5017,16 +5461,16 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
- MPI_Datatype *resized_type, hbool_t *resized_type_derived)
+H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, bool *contig_type_derived,
+ MPI_Datatype *resized_type, bool *resized_type_derived)
{
MPI_Datatype struct_type = MPI_DATATYPE_NULL;
- hbool_t struct_type_derived = FALSE;
+ bool struct_type_derived = false;
MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL;
- hbool_t chunk_block_type_derived = FALSE;
- MPI_Datatype types[5];
- MPI_Aint displacements[5];
- int block_lengths[5];
+ bool chunk_block_type_derived = false;
+ MPI_Datatype types[6];
+ MPI_Aint displacements[6];
+ int block_lengths[6];
int field_count;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -5038,72 +5482,78 @@ H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, hbool_t *
assert(resized_type);
assert(resized_type_derived);
- *contig_type_derived = FALSE;
- *resized_type_derived = FALSE;
+ *contig_type_derived = false;
+ *resized_type_derived = false;
/* Create struct type for the inner H5F_block_t structure */
- if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+ if (H5F_mpi_get_file_block_type(false, &chunk_block_type, &chunk_block_type_derived) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description");
- field_count = 5;
+ field_count = 6;
assert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
/*
* Create structure type to pack chunk H5F_block_t structure
- * next to chunk_idx, orig_owner, new_owner and num_writers
- * fields
+ * next to chunk_idx, dset_oloc_addr, orig_owner, new_owner
+ * and num_writers fields
*/
block_lengths[0] = 1;
block_lengths[1] = 1;
block_lengths[2] = 1;
block_lengths[3] = 1;
block_lengths[4] = 1;
+ block_lengths[5] = 1;
displacements[0] = offsetof(H5D_chunk_redistribute_info_t, chunk_block);
displacements[1] = offsetof(H5D_chunk_redistribute_info_t, chunk_idx);
- displacements[2] = offsetof(H5D_chunk_redistribute_info_t, orig_owner);
- displacements[3] = offsetof(H5D_chunk_redistribute_info_t, new_owner);
- displacements[4] = offsetof(H5D_chunk_redistribute_info_t, num_writers);
+ displacements[2] = offsetof(H5D_chunk_redistribute_info_t, dset_oloc_addr);
+ displacements[3] = offsetof(H5D_chunk_redistribute_info_t, orig_owner);
+ displacements[4] = offsetof(H5D_chunk_redistribute_info_t, new_owner);
+ displacements[5] = offsetof(H5D_chunk_redistribute_info_t, num_writers);
types[0] = chunk_block_type;
types[1] = HSIZE_AS_MPI_TYPE;
- types[2] = MPI_INT;
+ types[2] = HADDR_AS_MPI_TYPE;
types[3] = MPI_INT;
types[4] = MPI_INT;
+ types[5] = MPI_INT;
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- *contig_type_derived = TRUE;
+ *contig_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
- /* Create struct type to extract the chunk_current, chunk_idx, orig_owner,
- * new_owner and num_writers fields from a H5D_filtered_collective_chunk_info_t
- * structure
+ /* Create struct type to extract the chunk_current, chunk_idx,
+ * dset_oloc_addr, orig_owner, new_owner and num_writers fields
+ * from a H5D_filtered_collective_chunk_info_t structure
*/
block_lengths[0] = 1;
block_lengths[1] = 1;
block_lengths[2] = 1;
block_lengths[3] = 1;
block_lengths[4] = 1;
+ block_lengths[5] = 1;
displacements[0] = offsetof(H5D_filtered_collective_chunk_info_t, chunk_current);
displacements[1] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.chunk_idx);
- displacements[2] = offsetof(H5D_filtered_collective_chunk_info_t, orig_owner);
- displacements[3] = offsetof(H5D_filtered_collective_chunk_info_t, new_owner);
- displacements[4] = offsetof(H5D_filtered_collective_chunk_info_t, num_writers);
+ displacements[2] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.dset_oloc_addr);
+ displacements[3] = offsetof(H5D_filtered_collective_chunk_info_t, orig_owner);
+ displacements[4] = offsetof(H5D_filtered_collective_chunk_info_t, new_owner);
+ displacements[5] = offsetof(H5D_filtered_collective_chunk_info_t, num_writers);
types[0] = chunk_block_type;
types[1] = HSIZE_AS_MPI_TYPE;
- types[2] = MPI_INT;
+ types[2] = HADDR_AS_MPI_TYPE;
types[3] = MPI_INT;
types[4] = MPI_INT;
+ types[5] = MPI_INT;
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- struct_type_derived = TRUE;
+ struct_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
struct_type, 0, sizeof(H5D_filtered_collective_chunk_info_t), resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
- *resized_type_derived = TRUE;
+ *resized_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
@@ -5122,12 +5572,12 @@ done:
if (*resized_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *resized_type_derived = FALSE;
+ *resized_type_derived = false;
}
if (*contig_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *contig_type_derived = FALSE;
+ *contig_type_derived = false;
}
}
@@ -5158,16 +5608,16 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
- MPI_Datatype *resized_type, hbool_t *resized_type_derived)
+H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, bool *contig_type_derived,
+ MPI_Datatype *resized_type, bool *resized_type_derived)
{
MPI_Datatype struct_type = MPI_DATATYPE_NULL;
- hbool_t struct_type_derived = FALSE;
+ bool struct_type_derived = false;
MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL;
- hbool_t chunk_block_type_derived = FALSE;
- MPI_Datatype types[3];
- MPI_Aint displacements[3];
- int block_lengths[3];
+ bool chunk_block_type_derived = false;
+ MPI_Datatype types[4];
+ MPI_Aint displacements[4];
+ int block_lengths[4];
int field_count;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -5179,59 +5629,66 @@ H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_
assert(resized_type);
assert(resized_type_derived);
- *contig_type_derived = FALSE;
- *resized_type_derived = FALSE;
+ *contig_type_derived = false;
+ *resized_type_derived = false;
/* Create struct type for the inner H5F_block_t structure */
- if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+ if (H5F_mpi_get_file_block_type(false, &chunk_block_type, &chunk_block_type_derived) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description");
- field_count = 3;
+ field_count = 4;
assert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
/*
* Create structure type to pack both chunk H5F_block_t structures
- * next to chunk_idx field
+ * next to chunk_idx and dset_oloc_addr fields
*/
block_lengths[0] = 1;
block_lengths[1] = 1;
block_lengths[2] = 1;
+ block_lengths[3] = 1;
displacements[0] = offsetof(H5D_chunk_alloc_info_t, chunk_current);
displacements[1] = offsetof(H5D_chunk_alloc_info_t, chunk_new);
displacements[2] = offsetof(H5D_chunk_alloc_info_t, chunk_idx);
+ displacements[3] = offsetof(H5D_chunk_alloc_info_t, dset_oloc_addr);
types[0] = chunk_block_type;
types[1] = chunk_block_type;
types[2] = HSIZE_AS_MPI_TYPE;
+ types[3] = HADDR_AS_MPI_TYPE;
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- *contig_type_derived = TRUE;
+ *contig_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
/*
- * Create struct type to extract the chunk_current, chunk_new and chunk_idx
- * fields from a H5D_filtered_collective_chunk_info_t structure
+ * Create struct type to extract the chunk_current, chunk_new, chunk_idx
+ * and dset_oloc_addr fields from a H5D_filtered_collective_chunk_info_t
+ * structure
*/
block_lengths[0] = 1;
block_lengths[1] = 1;
block_lengths[2] = 1;
+ block_lengths[3] = 1;
displacements[0] = offsetof(H5D_filtered_collective_chunk_info_t, chunk_current);
displacements[1] = offsetof(H5D_filtered_collective_chunk_info_t, chunk_new);
displacements[2] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.chunk_idx);
+ displacements[3] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.dset_oloc_addr);
types[0] = chunk_block_type;
types[1] = chunk_block_type;
types[2] = HSIZE_AS_MPI_TYPE;
+ types[3] = HADDR_AS_MPI_TYPE;
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- struct_type_derived = TRUE;
+ struct_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
struct_type, 0, sizeof(H5D_filtered_collective_chunk_info_t), resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
- *resized_type_derived = TRUE;
+ *resized_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
@@ -5250,12 +5707,12 @@ done:
if (*resized_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *resized_type_derived = FALSE;
+ *resized_type_derived = false;
}
if (*contig_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *contig_type_derived = FALSE;
+ *contig_type_derived = false;
}
}
@@ -5288,17 +5745,17 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
- MPI_Datatype *resized_type, hbool_t *resized_type_derived)
+H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, bool *contig_type_derived,
+ MPI_Datatype *resized_type, bool *resized_type_derived)
{
MPI_Datatype struct_type = MPI_DATATYPE_NULL;
- hbool_t struct_type_derived = FALSE;
+ bool struct_type_derived = false;
MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL;
- hbool_t chunk_block_type_derived = FALSE;
+ bool chunk_block_type_derived = false;
MPI_Aint contig_type_extent;
- MPI_Datatype types[4];
- MPI_Aint displacements[4];
- int block_lengths[4];
+ MPI_Datatype types[5];
+ MPI_Aint displacements[5];
+ int block_lengths[5];
int field_count;
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -5310,14 +5767,14 @@ H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig
assert(resized_type);
assert(resized_type_derived);
- *contig_type_derived = FALSE;
- *resized_type_derived = FALSE;
+ *contig_type_derived = false;
+ *resized_type_derived = false;
/* Create struct type for an H5F_block_t structure */
- if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+ if (H5F_mpi_get_file_block_type(false, &chunk_block_type, &chunk_block_type_derived) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description");
- field_count = 4;
+ field_count = 5;
assert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
/*
@@ -5330,29 +5787,32 @@ H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig
block_lengths[1] = 1;
block_lengths[2] = 1;
block_lengths[3] = 1;
+ block_lengths[4] = 1;
displacements[0] = offsetof(H5D_chunk_insert_info_t, chunk_block);
displacements[1] = offsetof(H5D_chunk_insert_info_t, index_info.chunk_idx);
- displacements[2] = offsetof(H5D_chunk_insert_info_t, index_info.filter_mask);
- displacements[3] = offsetof(H5D_chunk_insert_info_t, index_info.need_insert);
+ displacements[2] = offsetof(H5D_chunk_insert_info_t, index_info.dset_oloc_addr);
+ displacements[3] = offsetof(H5D_chunk_insert_info_t, index_info.filter_mask);
+ displacements[4] = offsetof(H5D_chunk_insert_info_t, index_info.need_insert);
types[0] = chunk_block_type;
types[1] = HSIZE_AS_MPI_TYPE;
- types[2] = MPI_UNSIGNED;
- types[3] = MPI_C_BOOL;
+ types[2] = HADDR_AS_MPI_TYPE;
+ types[3] = MPI_UNSIGNED;
+ types[4] = MPI_C_BOOL;
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- struct_type_derived = TRUE;
+ struct_type_derived = true;
contig_type_extent = (MPI_Aint)(sizeof(H5F_block_t) + sizeof(H5D_chunk_index_info_t));
if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(struct_type, 0, contig_type_extent, contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
- *contig_type_derived = TRUE;
+ *contig_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
- struct_type_derived = FALSE;
+ struct_type_derived = false;
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
@@ -5363,17 +5823,18 @@ H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig
*/
displacements[0] = offsetof(H5D_filtered_collective_chunk_info_t, chunk_new);
displacements[1] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.chunk_idx);
- displacements[2] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.filter_mask);
- displacements[3] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.need_insert);
+ displacements[2] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.dset_oloc_addr);
+ displacements[3] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.filter_mask);
+ displacements[4] = offsetof(H5D_filtered_collective_chunk_info_t, index_info.need_insert);
if (MPI_SUCCESS !=
(mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
- struct_type_derived = TRUE;
+ struct_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
struct_type, 0, sizeof(H5D_filtered_collective_chunk_info_t), resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
- *resized_type_derived = TRUE;
+ *resized_type_derived = true;
if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
@@ -5392,12 +5853,12 @@ done:
if (*resized_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *resized_type_derived = FALSE;
+ *resized_type_derived = false;
}
if (*contig_type_derived) {
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- *contig_type_derived = FALSE;
+ *contig_type_derived = false;
}
}
@@ -5571,6 +6032,8 @@ H5D__mpio_dump_collective_filtered_chunk_list(H5D_filtered_collective_io_info_t
chunk_rank < 3 ? 0 : chunk_entry->chunk_info->scaled[2],
chunk_rank < 4 ? 0 : chunk_entry->chunk_info->scaled[3]);
H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk Index: %" PRIuHSIZE, chunk_entry->index_info.chunk_idx);
+ H5D_MPIO_DEBUG_VA(mpi_rank, " Dataset Object Header Address: %" PRIuHADDR,
+ chunk_entry->index_info.dset_oloc_addr);
H5D_MPIO_DEBUG_VA(mpi_rank, " Filter Mask: %u", chunk_entry->index_info.filter_mask);
H5D_MPIO_DEBUG_VA(mpi_rank, " Need Insert: %s",
chunk_entry->index_info.need_insert ? "YES" : "NO");