diff options
-rw-r--r-- | examples/h5rados_dset_create.c | 84 | ||||
-rw-r--r-- | examples/h5rados_dset_open.c | 126 | ||||
-rw-r--r-- | examples/h5rados_dset_read.c | 86 | ||||
-rw-r--r-- | examples/h5rados_dset_rpartial.c | 142 | ||||
-rw-r--r-- | examples/h5rados_dset_rss.c | 134 | ||||
-rw-r--r-- | examples/h5rados_dset_wpartial.c | 129 | ||||
-rw-r--r-- | examples/h5rados_dset_write.c | 81 | ||||
-rw-r--r-- | examples/h5rados_dset_wss.c | 135 | ||||
-rw-r--r-- | examples/h5rados_example.h | 14 | ||||
-rw-r--r-- | examples/h5rados_file_create.c | 54 | ||||
-rw-r--r-- | examples/h5rados_file_open.c | 54 | ||||
-rw-r--r-- | examples/h5rados_group_create.c | 63 | ||||
-rw-r--r-- | examples/h5rados_group_open.c | 63 | ||||
-rw-r--r-- | src/H5VLrados.c | 4553 | ||||
-rw-r--r-- | src/H5VLrados.h | 123 | ||||
-rw-r--r-- | src/H5VLrados_public.h | 46 |
16 files changed, 5887 insertions, 0 deletions
diff --git a/examples/h5rados_dset_create.c b/examples/h5rados_dset_create.c new file mode 100644 index 0000000..0e14a7f --- /dev/null +++ b/examples/h5rados_dset_create.c @@ -0,0 +1,84 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, space = -1, fapl = -1, dcpl = H5P_DEFAULT; + hsize_t dims[2] = {4, 6}; + hsize_t cdims[2]; + + (void)MPI_Init(&argc, &argv); + + if((argc != 3) && (argc != 5)) + PRINTF_ERROR("argc is not 3 or 5\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Set up DCPL */ + if(argc == 5) { + cdims[0] = (hsize_t)atoi(argv[3]); + cdims[1] = (hsize_t)atoi(argv[4]); + if((dcpl = H5Pcreate(H5P_DATASET_CREATE)) < 0) + ERROR; + if(H5Pset_chunk(dcpl, 2, cdims) < 0) + ERROR; + } /* end if */ + + /* Set up dataspace */ + if((space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + printf("Creating dataset\n"); + + /* Create dataset */ + if((dset = H5Dcreate2(file, argv[2], H5T_NATIVE_INT, space, H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0) + ERROR; + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Sclose(space) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + if((dcpl != H5P_DEFAULT) && (H5Pclose(dcpl) < 0)) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Sclose(space); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_open.c b/examples/h5rados_dset_open.c new file mode 100644 index 0000000..19e29ed --- /dev/null +++ b/examples/h5rados_dset_open.c @@ -0,0 +1,126 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, fapl = -1, space = -1, type = -1, dcpl = -1, dapl = -1, def_dcpl = -1, def_dapl = -1; + int ndims; + hsize_t dims[2]; + htri_t tri_ret; + + (void)MPI_Init(&argc, &argv); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDONLY, fapl)) < 0) + ERROR; + + printf("Opening dataset\n"); + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + /* Check dataset dataspace */ + if((space = H5Dget_space(dset)) < 0) + ERROR; + if((ndims = H5Sget_simple_extent_ndims(space)) < 0) + ERROR; + if(ndims != 2) + PRINTF_ERROR("ndims == %d, expected 2\n", ndims); + if(H5Sget_simple_extent_dims(space, dims, NULL) < 0) + ERROR; + if(dims[0] != 4) + PRINTF_ERROR("dims[0] == %d, expected 4\n", (int)dims[0]); + if(dims[1] != 6) + PRINTF_ERROR("dims[1] == %d, expected 6\n", (int)dims[1]); + + /* Check dataset datatype */ + if((type = H5Dget_type(dset)) < 0) + ERROR; + if((tri_ret = H5Tequal(type, H5T_NATIVE_INT)) < 0) + ERROR; + if(!tri_ret) + PRINTF_ERROR("datatype does not equal H5T_NATIVE_INT\n"); + + /* Check DCPL */ + if((dcpl = H5Dget_create_plist(dset)) < 0) + ERROR; + if((def_dcpl = H5Pcreate(H5P_DATASET_CREATE)) < 0) + ERROR; + if((tri_ret = H5Pequal(dcpl, def_dcpl)) < 0) + ERROR; + if(!tri_ret) + PRINTF_ERROR("DCPL does not equal default\n"); + + /* Check DAPL */ + if((dapl = H5Dget_access_plist(dset)) < 0) + ERROR; + if((def_dapl = H5Pcreate(H5P_DATASET_ACCESS)) < 0) + ERROR; + if((tri_ret = H5Pequal(dapl, def_dapl)) < 0) + ERROR; + if(!tri_ret) + PRINTF_ERROR("DAPL does not equal default\n"); + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + if(H5Sclose(space) < 0) + ERROR; + if(H5Tclose(type) < 0) + ERROR; + if(H5Pclose(dcpl) < 0) + ERROR; + if(H5Pclose(dapl) < 0) + ERROR; + if(H5Pclose(def_dcpl) < 0) + ERROR; + if(H5Pclose(def_dapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Pclose(fapl); + H5Sclose(space); + H5Tclose(type); + H5Pclose(dcpl); + H5Pclose(dapl); + H5Pclose(def_dcpl); + H5Pclose(def_dapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_read.c b/examples/h5rados_dset_read.c new file mode 100644 index 0000000..fc7292c --- /dev/null +++ b/examples/h5rados_dset_read.c @@ -0,0 +1,86 @@ +#include "h5rados_example.h" +#include <time.h> + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, fapl = -1; + int buf[4][6]; + int i, j; + + (void)MPI_Init(&argc, &argv); + + /* Seed random number generator */ + srand(time(NULL)); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDONLY, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + printf("Reading dataset\n"); + + /* Initialize buffer */ + for(i = 0; i < 4; i++) + for(j = 0; j < 6; j++) + buf[i][j] = -1; + + /* Read data */ + if(H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf) < 0) + ERROR; + + /* Print buffer */ + printf("Successfully read data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) + printf("%d ", buf[i][j]); + printf("\n"); + } + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_rpartial.c b/examples/h5rados_dset_rpartial.c new file mode 100644 index 0000000..622b961 --- /dev/null +++ b/examples/h5rados_dset_rpartial.c @@ -0,0 +1,142 @@ +#include "h5rados_example.h" +#include <time.h> + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, file_space = -1, mem_space = -1, fapl = -1; + hsize_t dims[2] = {4, 6}; + hsize_t start[2], count[2]; + int buf[4][6]; + int rank, mpi_size; + char *file_sel_str[2] = {"XXX...", "...XXX"}; + int i, j; + + (void)MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + if(mpi_size > 2) + PRINTF_ERROR("mpi_size > 2\n"); + + /* Seed random number generator */ + srand(time(NULL)); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + if(rank == 1) + MPI_Barrier(MPI_COMM_WORLD); + + printf("---------------Rank %d---------------\n", rank); + printf("Selecting elements denoted with X\n"); + printf("Memory File\n"); + printf("...... %s\n", file_sel_str[rank]); + for(i = 1; i < 4; i++) + printf(".XXXX. %s\n", file_sel_str[rank]); + + if(rank == 0) + MPI_Barrier(MPI_COMM_WORLD); + else + printf("Reading dataset\n"); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Set up dataspaces */ + if((file_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if((mem_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + start[0] = 0; + start[1] = 3 * rank; + count[0] = 4; + count[1] = 3; + if(H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start, NULL, count, NULL) < 0) + ERROR; + start[0] = 1; + start[1] = 1; + count[0] = 3; + count[1] = 4; + if(H5Sselect_hyperslab(mem_space, H5S_SELECT_SET, start, NULL, count, NULL) < 0) + ERROR; + + /* Initialize buffer */ + for(i = 0; i < 4; i++) + for(j = 0; j < 6; j++) + buf[i][j] = -1; + + /* Read data */ + if(H5Dread(dset, H5T_NATIVE_INT, mem_space, file_space, H5P_DEFAULT, buf) < 0) + ERROR; + + if(rank == 1) + MPI_Barrier(MPI_COMM_WORLD); + + /* Fill and print buffer */ + printf("---------------Rank %d---------------\n", rank); + printf("Successfully read data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) + printf("%d ", buf[i][j]); + printf("\n"); + } + + if(rank == 0) + MPI_Barrier(MPI_COMM_WORLD); + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Sclose(file_space) < 0) + ERROR; + if(H5Sclose(mem_space) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Sclose(file_space); + H5Sclose(mem_space); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_rss.c b/examples/h5rados_dset_rss.c new file mode 100644 index 0000000..456da0c --- /dev/null +++ b/examples/h5rados_dset_rss.c @@ -0,0 +1,134 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, file_space = -1, mem_space = -1, fapl = -1; + hsize_t dims[2] = {4, 6}; + hsize_t fstart[2], count[2], mstart[2]; + int buf[4][6]; + int i, j; + + (void)MPI_Init(&argc, &argv); + + + if(argc != 9) + PRINTF_ERROR("argc != 9\n"); + + fstart[0] = (hsize_t)atoi(argv[3]); + fstart[1] = (hsize_t)atoi(argv[4]); + count[0] = (hsize_t)atoi(argv[5]); + count[1] = (hsize_t)atoi(argv[6]); + mstart[0] = (hsize_t)atoi(argv[7]); + mstart[1] = (hsize_t)atoi(argv[8]); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + printf("Selecting elements denoted with X\n"); + printf("File:\n"); + for(i = 0; i < 4; i++) { + if((i >= fstart[0]) && (i < fstart[0] + count[0])) { + for(j = 0; j < 6; j++) + if((j >= fstart[1]) && (j < fstart[1] + count[1])) + printf("X"); + else + printf("."); + printf("\n"); + } + else + printf("......\n"); + } + printf("\nMemory:\n"); + for(i = 0; i < 4; i++) { + if((i >= mstart[0]) && (i < mstart[0] + count[0])) { + for(j = 0; j < 6; j++) + if((j >= mstart[1]) && (j < mstart[1] + count[1])) + printf("X"); + else + printf("."); + printf("\n"); + } + else + printf("......\n"); + } + + /* Set up dataspaces */ + if((file_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if((mem_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if(H5Sselect_hyperslab(file_space, H5S_SELECT_SET, fstart, NULL, count, NULL) < 0) + ERROR; + if(H5Sselect_hyperslab(mem_space, H5S_SELECT_SET, mstart, NULL, count, NULL) < 0) + ERROR; + + /* Initialize buffer */ + for(i = 0; i < 4; i++) + for(j = 0; j < 6; j++) + buf[i][j] = -1; + + /* Read data */ + if(H5Dread(dset, H5T_NATIVE_INT, mem_space, file_space, H5P_DEFAULT, buf) < 0) + ERROR; + + /* Fill and print buffer */ + printf("Successfully read data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) + printf("%d ", buf[i][j]); + printf("\n"); + } + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Sclose(file_space) < 0) + ERROR; + if(H5Sclose(mem_space) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Sclose(file_space); + H5Sclose(mem_space); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_wpartial.c b/examples/h5rados_dset_wpartial.c new file mode 100644 index 0000000..bb53982 --- /dev/null +++ b/examples/h5rados_dset_wpartial.c @@ -0,0 +1,129 @@ +#include "h5rados_example.h" +#include <time.h> + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, file_space = -1, mem_space = -1, fapl = -1; + hsize_t dims[2] = {4, 6}; + hsize_t start[2], count[2]; + int buf[4][6]; + int rank, mpi_size; + char *file_sel_str[2] = {"XXX...", "...XXX"}; + int i, j; + + (void)MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + if(mpi_size != 2) + PRINTF_ERROR("mpi_size != 2\n"); + + /* Seed random number generator */ + srand(time(NULL)); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + if(rank == 1) + MPI_Barrier(MPI_COMM_WORLD); + + printf("---------------Rank %d---------------\n", rank); + printf("Selecting elements denoted with X\n"); + printf("Memory File\n"); + printf("...... %s\n", file_sel_str[rank]); + for(i = 1; i < 4; i++) + printf(".XXXX. %s\n", file_sel_str[rank]); + + /* Fill and print buffer */ + printf("Writing data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) { + buf[i][j] = rand() % 10; + printf("%d ", buf[i][j]); + } + printf("\n"); + } + + if(rank == 0) + MPI_Barrier(MPI_COMM_WORLD); + + + /* Set up dataspaces */ + if((file_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if((mem_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + start[0] = 0; + start[1] = 3 * rank; + count[0] = 4; + count[1] = 3; + if(H5Sselect_hyperslab(file_space, H5S_SELECT_SET, start, NULL, count, NULL) < 0) + ERROR; + start[0] = 1; + start[1] = 1; + count[0] = 3; + count[1] = 4; + if(H5Sselect_hyperslab(mem_space, H5S_SELECT_SET, start, NULL, count, NULL) < 0) + ERROR; + + /* Write data */ + if(H5Dwrite(dset, H5T_NATIVE_INT, mem_space, file_space, H5P_DEFAULT, buf) < 0) + ERROR; + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Sclose(file_space) < 0) + ERROR; + if(H5Sclose(mem_space) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Sclose(file_space); + H5Sclose(mem_space); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_write.c b/examples/h5rados_dset_write.c new file mode 100644 index 0000000..dad96fa --- /dev/null +++ b/examples/h5rados_dset_write.c @@ -0,0 +1,81 @@ +#include "h5rados_example.h" +#include <time.h> + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, fapl = -1; + int buf[4][6]; + int i, j; + + (void)MPI_Init(&argc, &argv); + + /* Seed random number generator */ + srand(time(NULL)); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + /* Fill and print buffer */ + printf("Writing data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) { + buf[i][j] = rand() % 10; + printf("%d ", buf[i][j]); + } + printf("\n"); + } + + /* Write data */ + if(H5Dwrite(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf) < 0) + ERROR; + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_dset_wss.c b/examples/h5rados_dset_wss.c new file mode 100644 index 0000000..cefc1ed --- /dev/null +++ b/examples/h5rados_dset_wss.c @@ -0,0 +1,135 @@ +#include "h5rados_example.h" +#include <time.h> + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, dset = -1, file_space = -1, mem_space = -1, fapl = -1; + hsize_t dims[2] = {4, 6}; + hsize_t mstart[2], count[2], fstart[2]; + int buf[4][6]; + int i, j; + + (void)MPI_Init(&argc, &argv); + + /* Seed random number generator */ + srand(time(NULL)); + + if(argc != 9) + PRINTF_ERROR("argc != 9\n"); + + mstart[0] = (hsize_t)atoi(argv[3]); + mstart[1] = (hsize_t)atoi(argv[4]); + count[0] = (hsize_t)atoi(argv[5]); + count[1] = (hsize_t)atoi(argv[6]); + fstart[0] = (hsize_t)atoi(argv[7]); + fstart[1] = (hsize_t)atoi(argv[8]); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + /* Open dataset */ + if((dset = H5Dopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + printf("Selecting elements denoted with X\n"); + printf("Memory:\n"); + for(i = 0; i < 4; i++) { + if((i >= mstart[0]) && (i < mstart[0] + count[0])) { + for(j = 0; j < 6; j++) + if((j >= mstart[1]) && (j < mstart[1] + count[1])) + printf("X"); + else + printf("."); + printf("\n"); + } + else + printf("......\n"); + } + printf("\nFile:\n"); + for(i = 0; i < 4; i++) { + if((i >= fstart[0]) && (i < fstart[0] + count[0])) { + for(j = 0; j < 6; j++) + if((j >= fstart[1]) && (j < fstart[1] + count[1])) + printf("X"); + else + printf("."); + printf("\n"); + } + else + printf("......\n"); + } + + /* Fill and print buffer */ + printf("Writing data. Buffer is:\n"); + for(i = 0; i < 4; i++) { + for(j = 0; j < 6; j++) { + buf[i][j] = rand() % 10; + printf("%d ", buf[i][j]); + } + printf("\n"); + } + + + /* Set up dataspaces */ + if((file_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if((mem_space = H5Screate_simple(2, dims, NULL)) < 0) + ERROR; + if(H5Sselect_hyperslab(file_space, H5S_SELECT_SET, fstart, NULL, count, NULL) < 0) + ERROR; + if(H5Sselect_hyperslab(mem_space, H5S_SELECT_SET, mstart, NULL, count, NULL) < 0) + ERROR; + + /* Write data */ + if(H5Dwrite(dset, H5T_NATIVE_INT, mem_space, file_space, H5P_DEFAULT, buf) < 0) + ERROR; + + /* Close */ + if(H5Dclose(dset) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Sclose(file_space) < 0) + ERROR; + if(H5Sclose(mem_space) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose(dset); + H5Fclose(file); + H5Sclose(file_space); + H5Sclose(mem_space); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_example.h b/examples/h5rados_example.h new file mode 100644 index 0000000..4516b15 --- /dev/null +++ b/examples/h5rados_example.h @@ -0,0 +1,14 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <mpi.h> +#include <hdf5.h> +#include <rados/librados.h> +#include <H5VLrados_public.h> + +/* Macros for printing standard messages and issuing errors */ +#define AT() printf (" at %s:%d in %s()...\n", __FILE__, __LINE__, __FUNCTION__) +#define FAILED() do {puts("*FAILED*");fflush(stdout);} while(0) +#define ERROR do {FAILED(); AT(); goto error;} while(0) +#define PRINTF_ERROR(...) do {FAILED(); AT(); printf(" " __VA_ARGS__); printf("\n"); goto error;} while(0) + diff --git a/examples/h5rados_file_create.c b/examples/h5rados_file_create.c new file mode 100644 index 0000000..493be18 --- /dev/null +++ b/examples/h5rados_file_create.c @@ -0,0 +1,54 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, fapl = -1; + + (void)MPI_Init(&argc, &argv); + + if(argc != 2) + PRINTF_ERROR("argc != 2\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Create file */ + if((file = H5Fcreate(argv[1], H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0) + ERROR; + + /* Close */ + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_file_open.c b/examples/h5rados_file_open.c new file mode 100644 index 0000000..176d26e --- /dev/null +++ b/examples/h5rados_file_open.c @@ -0,0 +1,54 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, fapl = -1; + + (void)MPI_Init(&argc, &argv); + + if(argc != 2) + PRINTF_ERROR("argc != 2\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Create file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDONLY, fapl)) < 0) + ERROR; + + /* Close */ + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_group_create.c b/examples/h5rados_group_create.c new file mode 100644 index 0000000..fcfbbc0 --- /dev/null +++ b/examples/h5rados_group_create.c @@ -0,0 +1,63 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, fapl = -1, grp = -1; + + (void)MPI_Init(&argc, &argv); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* OPEN file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDWR, fapl)) < 0) + ERROR; + + printf("Creating group\n"); + + /* Create group */ + if((grp = H5Gcreate2(file, argv[2], H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + ERROR; + + /* Close */ + if(H5Gclose(grp) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Gclose(grp); + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/examples/h5rados_group_open.c b/examples/h5rados_group_open.c new file mode 100644 index 0000000..3339480 --- /dev/null +++ b/examples/h5rados_group_open.c @@ -0,0 +1,63 @@ +#include "h5rados_example.h" + +int main(int argc, char *argv[]) { + rados_t cluster; + char *pool = "mypool"; + hid_t file = -1, fapl = -1, grp = -1; + + (void)MPI_Init(&argc, &argv); + + if(argc != 3) + PRINTF_ERROR("argc != 3\n"); + + if(rados_create(&cluster, NULL) < 0) + ERROR; + if(rados_conf_read_file(cluster, "ceph.conf") < 0) + ERROR; + + /* Initialize VOL */ + if(H5VLrados_init(cluster, pool) < 0) + ERROR; + + /* Set up FAPL */ + if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + ERROR; + if(H5Pset_fapl_rados(fapl, MPI_COMM_WORLD, MPI_INFO_NULL) < 0) + ERROR; + if(H5Pset_all_coll_metadata_ops(fapl, true) < 0) + ERROR; + + /* Open file */ + if((file = H5Fopen(argv[1], H5F_ACC_RDONLY, fapl)) < 0) + ERROR; + + printf("Opening group\n"); + + /* Open group */ + if((grp = H5Gopen2(file, argv[2], H5P_DEFAULT)) < 0) + ERROR; + + /* Close */ + if(H5Gclose(grp) < 0) + ERROR; + if(H5Fclose(file) < 0) + ERROR; + if(H5Pclose(fapl) < 0) + ERROR; + + printf("Success\n"); + + (void)MPI_Finalize(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Gclose(grp); + H5Fclose(file); + H5Pclose(fapl); + } H5E_END_TRY; + + (void)MPI_Finalize(); + return 1; +} + diff --git a/src/H5VLrados.c b/src/H5VLrados.c new file mode 100644 index 0000000..76a865e --- /dev/null +++ b/src/H5VLrados.c @@ -0,0 +1,4553 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Neil Fortner <nfortne2@hdfgroup.org> + * December, 2017 + * + * Purpose: The RADOS VOL plugin where access is forwarded to the RADOS API + */ + +#define H5O_FRIEND /* Suppress error about including H5Opkg */ + +#include "H5private.h" /* Generic Functions */ +#include "H5Dprivate.h" /* Datasets */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fprivate.h" /* Files */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5Gprivate.h" /* Groups */ +#include "H5Iprivate.h" /* IDs */ +#include "H5MMprivate.h" /* Memory management */ +#include "H5Opkg.h" /* Objects */ +#include "H5Pprivate.h" /* Property lists */ +#include "H5Sprivate.h" /* Dataspaces */ +#include "H5VLprivate.h" /* VOL plugins */ +#include "H5VLrados.h" /* RADOS plugin */ + +hid_t H5VL_RADOS_g = -1; + +/* + * Macros + */ +/* Stack allocation sizes */ +#define H5VL_RADOS_FOI_BUF_SIZE 1024 +#define H5VL_RADOS_LINK_VAL_BUF_SIZE 256 +#define H5VL_RADOS_GINFO_BUF_SIZE 256 +#define H5VL_RADOS_DINFO_BUF_SIZE 1024 +#define H5VL_RADOS_SEQ_LIST_LEN 128 + +/* Definitions for building oids */ +#define H5VL_RADOS_IDX_MASK 0x3fffffffffffffffull +#define H5VL_RADOS_TYPE_MASK 0xc000000000000000ull +#define H5VL_RADOS_TYPE_GRP 0x0000000000000000ull +#define H5VL_RADOS_TYPE_DSET 0x4000000000000000ull +#define H5VL_RADOS_TYPE_DTYPE 0x8000000000000000ull + +/* Definitions for chunking code */ +#define H5VL_RADOS_DEFAULT_NUM_SEL_CHUNKS 64 + +/* + * Typedefs + */ +/* RADOS-specific file access properties */ +typedef struct H5VL_rados_fapl_t { + MPI_Comm comm; /*communicator */ + MPI_Info info; /*file information */ +} H5VL_rados_fapl_t; + +/* Enum to indicate if the supplied read buffer can be used as a type conversion + * or background buffer */ +typedef enum { + H5VL_RADOS_TCONV_REUSE_NONE, /* Cannot reuse buffer */ + H5VL_RADOS_TCONV_REUSE_TCONV, /* Use buffer as type conversion buffer */ + H5VL_RADOS_TCONV_REUSE_BKG /* Use buffer as background buffer */ +} H5VL_rados_tconv_reuse_t; + +/* Udata type for H5Dscatter callback */ +typedef struct H5VL_rados_scatter_cb_ud_t { + void *buf; + size_t len; +} H5VL_rados_scatter_cb_ud_t; + +#if 0 +/* Udata type for memory space H5Diterate callback */ +typedef struct { + daos_iod_t *iods; + daos_sg_list_t *sgls; + daos_iov_t *sg_iovs; + hbool_t is_vl_str; + size_t base_type_size; + uint64_t offset; + uint64_t idx; +} H5VL_rados_vl_mem_ud_t; + +/* Udata type for file space H5Diterate callback */ +typedef struct { + uint8_t **akeys; + daos_iod_t *iods; + uint64_t idx; +} H5VL_rados_vl_file_ud_t; +#endif + +/* Information about a singular selected chunk during a Dataset read/write */ +typedef struct H5VL_rados_select_chunk_info_t { + uint64_t chunk_coords[H5S_MAX_RANK]; /* The starting coordinates ("upper left corner") of the chunk */ + hid_t mspace_id; /* The memory space corresponding to the + selection in the chunk in memory */ + hid_t fspace_id; /* The file space corresponding to the + selection in the chunk in the file */ +} H5VL_rados_select_chunk_info_t; + +/* + * Prototypes + */ +static void *H5VL_rados_fapl_copy(const void *_old_fa); +static herr_t H5VL_rados_fapl_free(void *_fa); +static herr_t H5VL_rados_term(hid_t vtpl_id); + +/* File callbacks */ +static void *H5VL_rados_file_create(const char *name, unsigned flags, + hid_t fcpl_id, hid_t fapl_id, hid_t dxpl_id, void **req); +static void *H5VL_rados_file_open(const char *name, unsigned flags, + hid_t fapl_id, hid_t dxpl_id, void **req); +//static herr_t H5VL_iod_file_get(void *file, H5VL_file_get_t get_type, hid_t dxpl_id, void **req, va_list arguments); +static herr_t H5VL_rados_file_specific(void *_item, + H5VL_file_specific_t specific_type, hid_t dxpl_id, void **req, + va_list arguments); +static herr_t H5VL_rados_file_close(void *_file, hid_t dxpl_id, void **req); + +/* Link callbacks */ +/*static herr_t H5VL_rados_link_create(H5VL_link_create_type_t create_type, + void *_item, H5VL_loc_params_t loc_params, hid_t lcpl_id, hid_t lapl_id, + hid_t dxpl_id, void **req); +static herr_t H5VL_rados_link_specific(void *_item, + H5VL_loc_params_t loc_params, H5VL_link_specific_t specific_type, + hid_t dxpl_id, void **req, va_list arguments);*/ + +/* Group callbacks */ +static void *H5VL_rados_group_create(void *_item, H5VL_loc_params_t loc_params, + const char *name, hid_t gcpl_id, hid_t gapl_id, hid_t dxpl_id, void **req); +static void *H5VL_rados_group_open(void *_item, H5VL_loc_params_t loc_params, + const char *name, hid_t gapl_id, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_group_close(void *_grp, hid_t dxpl_id, void **req); + +/* Dataset callbacks */ +static void *H5VL_rados_dataset_create(void *_item, + H5VL_loc_params_t loc_params, const char *name, hid_t dcpl_id, + hid_t dapl_id, hid_t dxpl_id, void **req); +static void *H5VL_rados_dataset_open(void *_item, H5VL_loc_params_t loc_params, + const char *name, hid_t dapl_id, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_dataset_read(void *_dset, hid_t mem_type_id, + hid_t mem_space_id, hid_t file_space_id, hid_t dxpl_id, void *buf, + void **req); +static herr_t H5VL_rados_dataset_write(void *_dset, hid_t mem_type_id, + hid_t mem_space_id, hid_t file_space_id, hid_t dxpl_id, const void *buf, + void **req); +/*static herr_t H5VL_rados_dataset_specific(void *_dset, H5VL_dataset_specific_t specific_type, + hid_t dxpl_id, void **req, va_list arguments);*/ +static herr_t H5VL_rados_dataset_get(void *_dset, H5VL_dataset_get_t get_type, + hid_t dxpl_id, void **req, va_list arguments); +static herr_t H5VL_rados_dataset_close(void *_dset, hid_t dxpl_id, void **req); + +/* Datatype callbacks */ +/*static void *H5VL_rados_datatype_commit(void *obj, H5VL_loc_params_t loc_params, + const char *name, hid_t type_id, hid_t lcpl_id, hid_t tcpl_id, + hid_t tapl_id, hid_t dxpl_id, void **req); +static void *H5VL_rados_datatype_open(void *_item, H5VL_loc_params_t loc_params, + const char *name, hid_t tapl_id, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_datatype_get(void *obj, H5VL_datatype_get_t get_type, + hid_t dxpl_id, void **req, va_list arguments);*/ + +/* Object callbacks */ +/*static void *H5VL_rados_object_open(void *_item, H5VL_loc_params_t loc_params, + H5I_type_t *opened_type, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_object_optional(void *_item, hid_t dxpl_id, void **req, + va_list arguments);*/ + +/* Attribute callbacks */ +/*static void *H5VL_rados_attribute_create(void *_obj, + H5VL_loc_params_t loc_params, const char *name, hid_t acpl_id, + hid_t aapl_id, hid_t dxpl_id, void **req); +static void *H5VL_rados_attribute_open(void *_obj, H5VL_loc_params_t loc_params, + const char *name, hid_t aapl_id, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_attribute_read(void *_attr, hid_t mem_type_id, + void *buf, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_attribute_write(void *_attr, hid_t mem_type_id, + const void *buf, hid_t dxpl_id, void **req); +static herr_t H5VL_rados_attribute_get(void *_item, H5VL_attr_get_t get_type, + hid_t dxpl_id, void **req, va_list arguments); +static herr_t H5VL_rados_attribute_specific(void *_item, + H5VL_loc_params_t loc_params, H5VL_attr_specific_t specific_type, + hid_t dxpl_id, void **req, va_list arguments); +static herr_t H5VL_rados_attribute_close(void *_attr, hid_t dxpl_id, + void **req);*/ + +/* Helper routines */ +static herr_t H5VL_rados_write_max_oid(H5VL_rados_file_t *file); +static herr_t H5VL_rados_file_flush(H5VL_rados_file_t *file); +static herr_t H5VL_rados_file_close_helper(H5VL_rados_file_t *file, + hid_t dxpl_id, void **req); + +static herr_t H5VL_rados_link_read(H5VL_rados_group_t *grp, const char *name, + H5VL_rados_link_val_t *val); +static herr_t H5VL_rados_link_write(H5VL_rados_group_t *grp, const char *name, + H5VL_rados_link_val_t *val); +static herr_t H5VL_rados_link_follow(H5VL_rados_group_t *grp, const char *name, + hid_t dxpl_id, void **req, uint64_t *oid); +static herr_t H5VL_rados_link_follow_comp(H5VL_rados_group_t *grp, char *name, + size_t name_len, hid_t dxpl_id, void **req, uint64_t *oid); + +static H5VL_rados_group_t *H5VL_rados_group_traverse(H5VL_rados_item_t *item, + char *path, hid_t dxpl_id, void **req, char **obj_name, + void **gcpl_buf_out, uint64_t *gcpl_len_out); +static H5VL_rados_group_t *H5VL_rados_group_traverse_const( + H5VL_rados_item_t *item, const char *path, hid_t dxpl_id, void **req, + const char **obj_name, void **gcpl_buf_out, uint64_t *gcpl_len_out); +static void *H5VL_rados_group_create_helper(H5VL_rados_file_t *file, + hid_t gcpl_id, hid_t gapl_id, hid_t dxpl_id, void **req, + H5VL_rados_group_t *parent_grp, const char *name, hbool_t collective); +static void *H5VL_rados_group_open_helper(H5VL_rados_file_t *file, + uint64_t oid, hid_t gapl_id, hid_t dxpl_id, void **req, void **gcpl_buf_out, + uint64_t *gcpl_len_out); +static void *H5VL_rados_group_reconstitute(H5VL_rados_file_t *file, + uint64_t oid, uint8_t *gcpl_buf, hid_t gapl_id, hid_t dxpl_id, void **req); + +static htri_t H5VL_rados_need_bkg(hid_t src_type_id, hid_t dst_type_id, + size_t *dst_type_size, hbool_t *fill_bkg); +static herr_t H5VL_rados_tconv_init(hid_t src_type_id, size_t *src_type_size, + hid_t dst_type_id, size_t *dst_type_size, hbool_t *_types_equal, + H5VL_rados_tconv_reuse_t *reuse, hbool_t *_need_bkg, hbool_t *fill_bkg); +static herr_t H5VL_rados_get_selected_chunk_info(hid_t dcpl_id, + hid_t file_space_id, hid_t mem_space_id, + H5VL_rados_select_chunk_info_t **chunk_info, size_t *chunk_info_len); +static herr_t H5VL_rados_build_io_op_merge(H5S_t *mem_space, H5S_t *file_space, + size_t type_size, size_t tot_nelem, void *rbuf, const void *wbuf, + rados_read_op_t read_op, rados_write_op_t write_op); +static herr_t H5VL_rados_build_io_op_match(H5S_t *file_space, size_t type_size, + size_t tot_nelem, void *rbuf, const void *wbuf, rados_read_op_t read_op, + rados_write_op_t write_op); +static herr_t H5VL_rados_build_io_op_contig(H5S_t *file_space, size_t type_size, + size_t tot_nelem, void *rbuf, const void *wbuf, rados_read_op_t read_op, + rados_write_op_t write_op); +static herr_t H5VL_rados_scatter_cb(const void **src_buf, + size_t *src_buf_bytes_used, void *_udata); +/*static herr_t H5VL_rados_dataset_mem_vl_rd_cb(void *_elem, hid_t type_id, + unsigned ndim, const hsize_t *point, void *_udata); +static herr_t H5VL_rados_dataset_file_vl_cb(void *_elem, hid_t type_id, + unsigned ndim, const hsize_t *point, void *_udata); +static herr_t H5VL_rados_dataset_mem_vl_wr_cb(void *_elem, hid_t type_id, + unsigned ndim, const hsize_t *point, void *_udata); + +static herr_t H5VL_rados_datatype_close(void *_dtype, hid_t dxpl_id, + void **req); + +static herr_t H5VL_rados_object_close(void *_obj, hid_t dxpl_id, void **req);*/ + +/* Free list definitions */ +H5FL_DEFINE(H5VL_rados_file_t); +H5FL_DEFINE(H5VL_rados_group_t); +H5FL_DEFINE(H5VL_rados_dset_t); +/*H5FL_DEFINE(H5VL_rados_dtype_t); +H5FL_DEFINE(H5VL_rados_attr_t);*/ + +/* The RADOS VOL plugin struct */ +static H5VL_class_t H5VL_rados_g = { + HDF5_VOL_RADOS_VERSION_1, /* Version number */ + H5_VOL_RADOS, /* Plugin value */ + "rados", /* name */ + NULL, /* initialize */ + H5VL_rados_term, /* terminate */ + sizeof(H5VL_rados_fapl_t), /*fapl_size */ + H5VL_rados_fapl_copy, /*fapl_copy */ + H5VL_rados_fapl_free, /*fapl_free */ + { /* attribute_cls */ + NULL,//H5VL_rados_attribute_create, /* create */ + NULL,//H5VL_rados_attribute_open, /* open */ + NULL,//H5VL_rados_attribute_read, /* read */ + NULL,//H5VL_rados_attribute_write, /* write */ + NULL,//H5VL_rados_attribute_get, /* get */ + NULL,//H5VL_rados_attribute_specific, /* specific */ + NULL, /* optional */ + NULL,//H5VL_rados_attribute_close /* close */ + }, + { /* dataset_cls */ + H5VL_rados_dataset_create, /* create */ + H5VL_rados_dataset_open, /* open */ + H5VL_rados_dataset_read, /* read */ + H5VL_rados_dataset_write, /* write */ + H5VL_rados_dataset_get, /* get */ + NULL,//H5VL_iod_dataset_specific, /* specific */ + NULL, /* optional */ + H5VL_rados_dataset_close /* close */ + }, + { /* datatype_cls */ + NULL,//H5VL_rados_datatype_commit, /* commit */ + NULL,//H5VL_rados_datatype_open, /* open */ + NULL,//H5VL_rados_datatype_get, /* get */ + NULL, /* specific */ + NULL, /* optional */ + NULL,//H5VL_rados_datatype_close /* close */ + }, + { /* file_cls */ + H5VL_rados_file_create, /* create */ + H5VL_rados_file_open, /* open */ + NULL,//H5VL_iod_file_get, /* get */ + H5VL_rados_file_specific, /* specific */ + NULL, /* optional */ + H5VL_rados_file_close /* close */ + }, + { /* group_cls */ + H5VL_rados_group_create, /* create */ + H5VL_rados_group_open, /* open */ + NULL,//H5VL_iod_group_get, /* get */ + NULL, /* specific */ + NULL, /* optional */ + H5VL_rados_group_close /* close */ + }, + { /* link_cls */ + NULL,//H5VL_rados_link_create, /* create */ + NULL,//H5VL_iod_link_copy, /* copy */ + NULL,//H5VL_iod_link_move, /* move */ + NULL,//H5VL_iod_link_get, /* get */ + NULL,//H5VL_rados_link_specific, /* specific */ + NULL /* optional */ + }, + { /* object_cls */ + NULL,//H5VL_rados_object_open, /* open */ + NULL, /* copy */ + NULL, /* get */ + NULL,//H5VL_iod_object_specific, /* specific */ + NULL,//H5VL_rados_object_optional /* optional */ + }, + { + NULL,//H5VL_iod_cancel, + NULL,//H5VL_iod_test, + NULL,//H5VL_iod_wait + }, + NULL +}; + +/* The RADOS cluster */ +rados_t cluster_g; +hbool_t cluster_init_g = FALSE; + +/* The RADOS IO context */ +rados_ioctx_t ioctx_g; +hbool_t ioctx_init_g = FALSE; + + +/* Create a RADOS string oid given the file name and binary oid */ +static herr_t +H5VL_rados_oid_create_string(const H5VL_rados_file_t *file, uint64_t bin_oid, + char **oid) +{ + char *tmp_oid = NULL; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Allocate space for oid */ + if(NULL == (tmp_oid = (char *)H5MM_malloc(2 + file->file_name_len + 16 + 1))) + HGOTO_ERROR(H5E_VOL, H5E_CANTALLOC, FAIL, "can't allocate RADOS object id") + + /* Encode file name and binary oid into string oid */ + if(HDsnprintf(tmp_oid, 2 + file->file_name_len + 16 + 1, "ob%s%016llX", + file->file_name, (long long unsigned)bin_oid) + != 2 + (int)file->file_name_len + 16) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't encode string object id") + + /* Return oid string value */ + *oid = tmp_oid; + tmp_oid = NULL; + +done: + H5MM_xfree(tmp_oid); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_oid_create_string() */ + + +/* Create a RADOS string oid for a data chunk given the file name, binary oid, + * dataset rank, and chunk location. If *oid is not NULL, it is assumed to be a + * buffer large enough, i.e. one previously returned by this function with the + * same file and rank */ +static herr_t +H5VL_rados_oid_create_chunk(const H5VL_rados_file_t *file, uint64_t bin_oid, + int rank, uint64_t *chunk_loc, char **oid) +{ + char *tmp_buf = NULL; + char *enc_buf = NULL; + size_t oid_len; + size_t oid_off; + int i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert((rank >= 0) && (rank <= 99)); + + /* Calculate space needed for oid */ + oid_len = 2 + file->file_name_len + 16 + ((size_t)rank * 16) + 1; + + /* Assign encoding buffer and allocate buffer, if needed */ + if(*oid) + enc_buf = *oid; + else { + if(NULL == (tmp_buf = (char *)H5MM_malloc(oid_len))) + HGOTO_ERROR(H5E_VOL, H5E_CANTALLOC, FAIL, "can't allocate RADOS object id") + enc_buf = tmp_buf; + } /* end else */ + + /* Encode file name and binary oid into string oid */ + if(HDsnprintf(enc_buf, oid_len, "%02d%s%016llX", rank, file->file_name, + (long long unsigned)bin_oid) != 2 + (int)file->file_name_len + 16) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't encode string object id") + oid_off = 2 + file->file_name_len + 16; + + /* Encode chunk location */ + for(i = 0; i < rank; i++) { + if(HDsnprintf(enc_buf + oid_off, oid_len - oid_off, "%016llX", (long long unsigned)chunk_loc[i]) + != 16) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't encode string object id") + oid_off += 16; + } /* end for */ + + /* Return oid string value */ + if(!*oid) { + *oid = tmp_buf; + tmp_buf = NULL; + } /* end if */ + +done: + H5MM_xfree(tmp_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_oid_create_chunk() */ + + +/* Create a binary RADOS oid given the object type and a 64 bit index (top 2 + * bits are ignored) */ +static void +H5VL_rados_oid_create_binary(uint64_t idx, H5I_type_t obj_type, + uint64_t *bin_oid) +{ + /* Initialize bin_oid */ + *bin_oid = idx & H5VL_RADOS_IDX_MASK; + + /* Set type_bits */ + if(obj_type == H5I_GROUP) + *bin_oid |= H5VL_RADOS_TYPE_GRP; + else if(obj_type == H5I_DATASET) + *bin_oid |= H5VL_RADOS_TYPE_DSET; + else { + HDassert(obj_type == H5I_DATATYPE); + *bin_oid |= H5VL_RADOS_TYPE_DTYPE; + } /* end else */ + + return; +} /* end H5VL_rados_oid_create_binary() */ + + +/* Create a RADOS oid given the file name, object type and a 64 bit index (top 2 + * bits are ignored) */ +static herr_t +H5VL_rados_oid_create(const H5VL_rados_file_t *file, uint64_t idx, + H5I_type_t obj_type, uint64_t *bin_oid, char **oid) +{ + uint64_t tmp_bin_oid = *bin_oid; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Create binary oid */ + H5VL_rados_oid_create_binary(idx, obj_type, &tmp_bin_oid); + + /* Create sting oid */ + if(H5VL_rados_oid_create_string(file, tmp_bin_oid, oid) < 0) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't encode string object id") + + /* Return oid binary value (string already returned) */ + *bin_oid = tmp_bin_oid; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_oid_create() */ + + +/* Retrieve the 64 bit object index from a RADOS oid */ +static uint64_t +H5VL_rados_oid_to_idx(uint64_t bin_oid) +{ + return bin_oid & H5VL_RADOS_IDX_MASK; +} /* end H5VL_rados_oid_to_idx() */ + + +/*------------------------------------------------------------------------- + * Function: H5VLrados_init + * + * Purpose: Initialize this vol plugin by connecting to the cluster + * and registering the driver with the library. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5VLrados_init(rados_t rados_cluster, const char *rados_pool) +{ + int ret; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + + /* Check if already initialized */ + if(H5VL_RADOS_g >= 0) + HGOTO_DONE(SUCCEED) + + /* Register the RADOS VOL, if it isn't already */ + if(H5VL_rados_init() < 0) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "unable to initialize RADOS VOL plugin") + + /* Save cluster */ + cluster_g = rados_cluster; + + /* Connect to cluster */ + if((ret = rados_connect(rados_cluster)) < 0) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't connect to cluster: %s", strerror(-ret)) + cluster_init_g = TRUE; + + /* Create IO context */ + if((ret = rados_ioctx_create(cluster_g, rados_pool, &ioctx_g)) < 0) + HGOTO_ERROR(H5E_VOL, H5E_CANTINIT, FAIL, "can't create IO context: %s", strerror(-ret)) + ioctx_init_g = TRUE; + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5VLrados_init() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_init + * + * Purpose: Initialize this vol plugin by registering the driver with the + * library. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5VL_rados_init(void) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Register interfaces that might not be initialized in time (for example if + * we open an object without knowing its type first, H5Oopen will not + * initialize that type) */ + if(H5G_init() < 0) + HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "unable to initialize group interface") + if(H5D_init() < 0) + HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "unable to initialize dataset interface") + if(H5T_init() < 0) + HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "unable to initialize datatype interface") + + /* Register the RADOS VOL, if it isn't already */ + if(NULL == H5I_object_verify(H5VL_RADOS_g, H5I_VOL)) { + if((H5VL_RADOS_g = H5VL_register((const H5VL_class_t *)&H5VL_rados_g, + sizeof(H5VL_class_t), TRUE)) < 0) + HGOTO_ERROR(H5E_ATOM, H5E_CANTINSERT, FAIL, "can't create ID for RADOS plugin") + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_init() */ + + +/*------------------------------------------------------------------------- + * Function: H5VLrados_term + * + * Purpose: Shut down the RADOS VOL + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * January, 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5VLrados_term(void) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE0("e",""); + + /* Terminate the plugin */ + if(H5VL_rados_term(-1) < 0) + HGOTO_ERROR(H5E_VOL, H5E_CLOSEERROR, FAIL, "can't close RADOS VOL plugin") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5VLrados_term() */ + + +/*--------------------------------------------------------------------------- + * Function: H5VL_rados_term + * + * Purpose: Shut down the RADOS VOL + * + * Returns: Non-negative on success/Negative on failure + * + *--------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_term(hid_t H5_ATTR_UNUSED vtpl_id) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + if(H5VL_RADOS_g >= 0) { + /* Terminate RADOS */ + if(ioctx_init_g) { + rados_ioctx_destroy(ioctx_g); + ioctx_init_g = FALSE; + } /* end if */ + if(cluster_init_g) { + rados_shutdown(cluster_g); + cluster_init_g = FALSE; + } /* end if */ + + /* "Forget" plugin id. This should normally be called by the library + * when it is closing the id, so no need to close it here. */ + H5VL_RADOS_g = -1; + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_term() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pset_fapl_rados + * + * Purpose: Modify the file access property list to use the H5VL_RADOS + * plugin defined in this source file. file_comm and + * file_info identify the communicator and info object used + * to coordinate actions on file create, open, flush, and + * close. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * January, 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_fapl_rados(hid_t fapl_id, MPI_Comm file_comm, MPI_Info file_info) +{ + H5VL_rados_fapl_t fa; + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value; + + FUNC_ENTER_API(FAIL) + H5TRACE3("e", "iMcMi", fapl_id, file_comm, file_info); + + if(H5VL_RADOS_g < 0) + HGOTO_ERROR(H5E_VOL, H5E_UNINITIALIZED, FAIL, "RADOS VOL plugin not initialized") + + if(fapl_id == H5P_DEFAULT) + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list") + + if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") + + if(MPI_COMM_NULL == file_comm) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a valid communicator") + + /* Initialize driver specific properties */ + fa.comm = file_comm; + fa.info = file_info; + + ret_value = H5P_set_vol(plist, H5VL_RADOS_g, &fa); + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5Pset_fapl_rados() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_fapl_copy + * + * Purpose: Copies the rados-specific file access properties. + * + * Return: Success: Ptr to a new property list + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_fapl_copy(const void *_old_fa) +{ + const H5VL_rados_fapl_t *old_fa = (const H5VL_rados_fapl_t*)_old_fa; + H5VL_rados_fapl_t *new_fa = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + if(NULL == (new_fa = (H5VL_rados_fapl_t *)H5MM_malloc(sizeof(H5VL_rados_fapl_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed") + + /* Copy the general information */ + HDmemcpy(new_fa, old_fa, sizeof(H5VL_rados_fapl_t)); + + /* Clear allocated fields, so they aren't freed if something goes wrong. No + * need to clear info since it is only freed if comm is not null. */ + new_fa->comm = MPI_COMM_NULL; + + /* Duplicate communicator and Info object. */ + if(FAIL == H5FD_mpi_comm_info_dup(old_fa->comm, old_fa->info, &new_fa->comm, &new_fa->info)) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") + + ret_value = new_fa; + +done: + if (NULL == ret_value) { + /* cleanup */ + if(new_fa && H5VL_rados_fapl_free(new_fa) < 0) + HDONE_ERROR(H5E_PLIST, H5E_CANTFREE, NULL, "can't free fapl") + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_fapl_copy() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_fapl_free + * + * Purpose: Frees the rados-specific file access properties. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_fapl_free(void *_fa) +{ + herr_t ret_value = SUCCEED; + H5VL_rados_fapl_t *fa = (H5VL_rados_fapl_t*)_fa; + + FUNC_ENTER_NOAPI_NOINIT + + assert(fa); + + /* Free the internal communicator and INFO object */ + if(fa->comm != MPI_COMM_NULL) + if(H5FD_mpi_comm_info_free(&fa->comm, &fa->info) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "Communicator/Info free failed") + + /* free the struct */ + H5MM_xfree(fa); + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_fapl_free() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_create + * + * Purpose: Creates a file as a rados HDF5 file. + * + * Return: Success: the file id. + * Failure: NULL + * + * Programmer: Neil Fortner + * January, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_file_create(const char *name, unsigned flags, hid_t fcpl_id, + hid_t fapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_fapl_t *fa = NULL; + H5P_genplist_t *plist = NULL; /* Property list pointer */ + H5VL_rados_file_t *file = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* + * Adjust bit flags by turning on the creation bit and making sure that + * the EXCL or TRUNC bit is set. All newly-created files are opened for + * reading and writing. + */ + if(0==(flags & (H5F_ACC_EXCL|H5F_ACC_TRUNC))) + flags |= H5F_ACC_EXCL; /*default*/ + flags |= H5F_ACC_RDWR | H5F_ACC_CREAT; + + /* Get information from the FAPL */ + if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list") + if(NULL == (fa = (H5VL_rados_fapl_t *)H5P_get_vol_info(plist))) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get RADOS info struct") + + /* allocate the file object that is returned to the user */ + if(NULL == (file = H5FL_CALLOC(H5VL_rados_file_t))) + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, NULL, "can't allocate RADOS file struct") + file->glob_md_oid = NULL; + file->root_grp = NULL; + file->fcpl_id = FAIL; + file->fapl_id = FAIL; + + /* Fill in fields of file we know */ + file->item.type = H5I_FILE; + file->item.file = file; + file->item.rc = 1; + if(NULL == (file->file_name = HDstrdup(name))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't copy file name") + file->file_name_len = HDstrlen(name); + file->flags = flags; + file->max_oid = 0; + if(H5VL_rados_oid_create_string(file, file->max_oid, &file->glob_md_oid) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't create oid for globabl metadata object") + file->max_oid_dirty = FALSE; + if((file->fcpl_id = H5Pcopy(fcpl_id)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTCOPY, NULL, "failed to copy fcpl") + if((file->fapl_id = H5Pcopy(fapl_id)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTCOPY, NULL, "failed to copy fapl") + + /* Duplicate communicator and Info object. */ + if(FAIL == H5FD_mpi_comm_info_dup(fa->comm, fa->info, &file->comm, &file->info)) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") + + /* Obtain the process rank and size from the communicator attached to the + * fapl ID */ + MPI_Comm_rank(fa->comm, &file->my_rank); + MPI_Comm_size(fa->comm, &file->num_procs); + + /* Determine if we requested collective object ops for the file */ + if(H5Pget_all_coll_metadata_ops(fapl_id, &file->collective) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get collective access property") + + /* Create root group */ + if(NULL == (file->root_grp = (H5VL_rados_group_t *)H5VL_rados_group_create_helper(file, fcpl_id, H5P_GROUP_ACCESS_DEFAULT, dxpl_id, req, NULL, NULL, TRUE))) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't create root group") + + /* Create root group oid */ + HDassert(H5VL_rados_oid_to_idx(file->root_grp->obj.bin_oid) == (uint64_t)1); + + ret_value = (void *)file; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) { + /* Close file */ + if(file && H5VL_rados_file_close_helper(file, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "can't close file") + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_create() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_open + * + * Purpose: Opens a file as a RADOS HDF5 file. + * + * Return: Success: the file id. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_file_open(const char *name, unsigned flags, hid_t fapl_id, + hid_t dxpl_id, void **req) +{ + H5VL_rados_fapl_t *fa = NULL; + H5P_genplist_t *plist = NULL; /* Property list pointer */ + H5VL_rados_file_t *file = NULL; + char foi_buf_static[H5VL_RADOS_FOI_BUF_SIZE]; + char *foi_buf_dyn = NULL; + char *foi_buf = foi_buf_static; + void *gcpl_buf = NULL; + uint64_t gcpl_len; + uint64_t root_grp_oid; + hbool_t must_bcast = FALSE; + uint8_t *p; + int ret; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Get information from the FAPL */ + if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list") + if(NULL == (fa = (H5VL_rados_fapl_t *)H5P_get_vol_info(plist))) + HGOTO_ERROR(H5E_SYM, H5E_CANTGET, NULL, "can't get RADOS info struct") + + /* allocate the file object that is returned to the user */ + if(NULL == (file = H5FL_CALLOC(H5VL_rados_file_t))) + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, NULL, "can't allocate RADOS file struct") + //file->glob_md_oh = DAOS_HDL_INVAL; + file->root_grp = NULL; + file->fcpl_id = FAIL; + file->fapl_id = FAIL; + + /* Fill in fields of file we know */ + file->item.type = H5I_FILE; + file->item.file = file; + file->item.rc = 1; + if(NULL == (file->file_name = HDstrdup(name))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't copy file name") + file->file_name_len = HDstrlen(name); + file->flags = flags; + if(H5VL_rados_oid_create_string(file, file->max_oid, &file->glob_md_oid) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't create oid for globabl metadata object") + if((file->fapl_id = H5Pcopy(fapl_id)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTCOPY, NULL, "failed to copy fapl") + + /* Duplicate communicator and Info object. */ + if(FAIL == H5FD_mpi_comm_info_dup(fa->comm, fa->info, &file->comm, &file->info)) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") + + /* Obtain the process rank and size from the communicator attached to the + * fapl ID */ + MPI_Comm_rank(fa->comm, &file->my_rank); + MPI_Comm_size(fa->comm, &file->num_procs); + + /* Generate root group oid */ + H5VL_rados_oid_create_binary((uint64_t)1, H5I_GROUP, &root_grp_oid); + + /* Determine if we requested collective object ops for the file */ + if(H5Pget_all_coll_metadata_ops(fapl_id, &file->collective) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get collective access property") + + if(file->my_rank == 0) { + /* If there are other processes and we fail we must bcast anyways so they + * don't hang */ + if(file->num_procs > 1) + must_bcast = TRUE; + + /* Read max oid directly to foi_buf */ + /* Check for does not exist here and assume 0? -NAF */ + if((ret = rados_read(ioctx_g, file->glob_md_oid, foi_buf, 8, 0)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, NULL, "can't read metadata from dataset: %s", strerror(-ret)) + + /* Decode max oid */ + p = (uint8_t *)foi_buf; + UINT64DECODE(p, file->max_oid) + + /* Open root group */ + if(NULL == (file->root_grp = (H5VL_rados_group_t *)H5VL_rados_group_open_helper(file, root_grp_oid, H5P_GROUP_ACCESS_DEFAULT, dxpl_id, req, (file->num_procs > 1) ? &gcpl_buf : NULL, &gcpl_len))) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't open root group") + + /* Bcast global handles if there are other processes */ + if(file->num_procs > 1) { + /* Check if the file open info won't fit into the static buffer */ + if(gcpl_len + 2 * sizeof(uint64_t) > sizeof(foi_buf_static)) { + /* Allocate dynamic buffer */ + if(NULL == (foi_buf_dyn = (char *)H5MM_malloc(gcpl_len + 2 * sizeof(uint64_t)))) + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, NULL, "can't allocate space for global container handle") + + /* Use dynamic buffer */ + foi_buf = foi_buf_dyn; + + /* Copy max oid from static buffer */ + HDmemcpy(foi_buf, foi_buf_static, sizeof(uint64_t)); + } /* end if */ + + /* Max oid already encoded (read in encoded form from rados) */ + HDassert(p == ((uint8_t *)foi_buf) + sizeof(uint64_t)); + + /* Encode GCPL length */ + UINT64ENCODE(p, gcpl_len) + + /* Copy GCPL buffer */ + HDmemcpy(p, gcpl_buf, gcpl_len); + + /* We are about to bcast so we no longer need to bcast on failure */ + must_bcast = FALSE; + + /* MPI_Bcast foi_buf */ + if(MPI_SUCCESS != MPI_Bcast(foi_buf, (int)sizeof(foi_buf_static), MPI_BYTE, 0, fa->comm)) + HGOTO_ERROR(H5E_FILE, H5E_MPI, NULL, "can't bcast global container handle") + + /* Need a second bcast if we had to allocate a dynamic buffer */ + if(foi_buf == foi_buf_dyn) + if(MPI_SUCCESS != MPI_Bcast((char *)p, (int)(gcpl_len), MPI_BYTE, 0, fa->comm)) + HGOTO_ERROR(H5E_FILE, H5E_MPI, NULL, "can't bcast file open info (second bcast)") + } /* end if */ + } /* end if */ + else { + HDassert(sizeof(foi_buf_static) >= 2 * sizeof(uint64_t)); + + /* Receive file open info */ + if(MPI_SUCCESS != MPI_Bcast(foi_buf, (int)sizeof(foi_buf_static), MPI_BYTE, 0, fa->comm)) + HGOTO_ERROR(H5E_FILE, H5E_MPI, NULL, "can't bcast global container handle") + + /* Decode max OID */ + p = (uint8_t *)foi_buf; + UINT64DECODE(p, file->max_oid) + + /* Decode GCPL length */ + UINT64DECODE(p, gcpl_len) + + /* Check for gcpl_len set to 0 - indicates failure */ + if(gcpl_len == 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "lead process failed to open file") + + /* Check if we need to perform another bcast */ + if(gcpl_len + 2 * sizeof(uint64_t) > sizeof(foi_buf_static)) { + /* Check if we need to allocate a dynamic buffer */ + if(gcpl_len > sizeof(foi_buf_static)) { + /* Allocate dynamic buffer */ + if(NULL == (foi_buf_dyn = (char *)H5MM_malloc(gcpl_len))) + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, NULL, "can't allocate space for global pool handle") + foi_buf = foi_buf_dyn; + } /* end if */ + + /* Receive info buffer */ + if(MPI_SUCCESS != MPI_Bcast(foi_buf_dyn, (int)(gcpl_len), MPI_BYTE, 0, fa->comm)) + HGOTO_ERROR(H5E_FILE, H5E_MPI, NULL, "can't bcast global container handle (second bcast)") + + p = (uint8_t *)foi_buf; + } /* end if */ + + /* Reconstitute root group from revieved GCPL */ + if(NULL == (file->root_grp = (H5VL_rados_group_t *)H5VL_rados_group_reconstitute(file, root_grp_oid, p, H5P_GROUP_ACCESS_DEFAULT, dxpl_id, req))) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't reconstitute root group") + } /* end else */ + + /* FCPL was stored as root group's GCPL (as GCPL is the parent of FCPL). + * Point to it. */ + file->fcpl_id = file->root_grp->gcpl_id; + if(H5Iinc_ref(file->fcpl_id) < 0) + HGOTO_ERROR(H5E_ATOM, H5E_CANTINC, NULL, "can't increment FCPL ref count") + + ret_value = (void *)file; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) { + /* Bcast bcast_buf_64 as '0' if necessary - this will trigger failures + * in the other processes so we do not need to do the second bcast. */ + if(must_bcast) { + HDmemset(foi_buf_static, 0, sizeof(foi_buf_static)); + if(MPI_SUCCESS != MPI_Bcast(foi_buf_static, sizeof(foi_buf_static), MPI_BYTE, 0, fa->comm)) + HDONE_ERROR(H5E_FILE, H5E_MPI, NULL, "can't bcast global handle sizes") + } /* end if */ + + /* Close file */ + if(file && H5VL_rados_file_close_helper(file, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "can't close file") + } /* end if */ + + /* Clean up buffers */ + foi_buf_dyn = (char *)H5MM_xfree(foi_buf_dyn); + gcpl_buf = H5MM_xfree(gcpl_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_open() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_flush + * + * Purpose: Flushes a RADOS file. Currently just writes the max oid. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_file_flush(H5VL_rados_file_t *file) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + /* Nothing to do if no write intent */ + if(!(file->flags & H5F_ACC_RDWR)) + HGOTO_DONE(SUCCEED) + + /* Write max oid */ + if(H5VL_rados_write_max_oid(file) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "can't write max OID") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_flush() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_specific + * + * Purpose: Perform an operation + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_file_specific(void *item, H5VL_file_specific_t specific_type, + hid_t H5_ATTR_UNUSED dxpl_id, void H5_ATTR_UNUSED **req, + va_list H5_ATTR_UNUSED arguments) +{ + H5VL_rados_file_t *file = ((H5VL_rados_item_t *)item)->file; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + switch (specific_type) { + /* H5Fflush` */ + case H5VL_FILE_FLUSH: + if(H5VL_rados_file_flush(file) < 0) + HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "can't flush file") + + break; + /* H5Fmount */ + case H5VL_FILE_MOUNT: + /* H5Fmount */ + case H5VL_FILE_UNMOUNT: + /* H5Fis_accessible */ + case H5VL_FILE_IS_ACCESSIBLE: + default: + HGOTO_ERROR(H5E_VOL, H5E_UNSUPPORTED, FAIL, "invalid or unsupported specific operation") + } /* end switch */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_specific() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_close_helper + * + * Purpose: Closes a RADOS HDF5 file. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_file_close_helper(H5VL_rados_file_t *file, hid_t dxpl_id, void **req) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(file); + + /* Free file data structures */ + if(file->file_name) + HDfree(file->file_name); + file->glob_md_oid = H5MM_xfree(file->glob_md_oid); + if(file->comm || file->info) + if(H5FD_mpi_comm_info_free(&file->comm, &file->info) < 0) + HDONE_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "Communicator/Info free failed") + /* Note: Use of H5I_dec_app_ref is a hack, using H5I_dec_ref doesn't reduce + * app reference count incremented by use of public API to create the ID, + * while use of H5Idec_ref clears the error stack. In general we can't use + * public APIs in the "done" section or in close routines for this reason, + * until we implement a separate error stack for the VOL plugin */ + if(file->fapl_id != FAIL && H5I_dec_app_ref(file->fapl_id) < 0) + HDONE_ERROR(H5E_SYM, H5E_CANTDEC, FAIL, "failed to close plist") + if(file->fcpl_id != FAIL && H5I_dec_app_ref(file->fcpl_id) < 0) + HDONE_ERROR(H5E_SYM, H5E_CANTDEC, FAIL, "failed to close plist") + if(file->root_grp) + if(H5VL_rados_group_close(file->root_grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "can't close root group") + file = H5FL_FREE(H5VL_rados_file_t, file); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_close_helper() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_file_close + * + * Purpose: Closes a RADOS HDF5 file. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_file_close(void *_file, hid_t dxpl_id, void **req) +{ + H5VL_rados_file_t *file = (H5VL_rados_file_t *)_file; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(file); + + /* Flush the file */ + if(H5VL_rados_file_flush(file) < 0) + HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "can't flush file") + + /* Close the file */ + if(H5VL_rados_file_close_helper(file, dxpl_id, req) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "can't close file") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_file_close() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_write_max_oid + * + * Purpose: Writes the max OID (object index) to the global metadata + * object + * + * Return: Success: 0 + * Failure: 1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_write_max_oid(H5VL_rados_file_t *file) +{ + int ret; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Write max oid to global metadata object if necessary */ + if(file->max_oid_dirty) { + uint8_t wbuf[8]; + uint8_t *p = wbuf; + + UINT64ENCODE(p, file->max_oid) + + if((ret = rados_write_full(ioctx_g, file->glob_md_oid, (const char *)wbuf, (size_t)8)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't write metadata to group: %s", strerror(-ret)) + file->max_oid_dirty = FALSE; + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_write_max_oid() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_link_read + * + * Purpose: Reads the specified link from the given group. Note that + * if the returned link is a soft link, val->target.soft must + * eventually be freed. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: Neil Fortner + * December, 2016 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_link_read(H5VL_rados_group_t *grp, const char *name, + H5VL_rados_link_val_t *val) +{ + rados_read_op_t read_op; + hbool_t read_op_init = FALSE; + rados_omap_iter_t iter; + hbool_t iter_init = FALSE; + char *key; + char *omap_val; + size_t val_len; + uint8_t *p; + int ret; + int read_ret; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Create read op */ + if(NULL == (read_op = rados_create_read_op())) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't create read operation") + read_op_init = TRUE; + + /* Add operation to get link value */ + /* Add prefix RADOSINC */ + rados_read_op_omap_get_vals_by_keys(read_op, (const char * const *)&name, 1, &iter, &read_ret); + iter_init = TRUE; + + /* Execute read operation */ + if((ret = rados_read_op_operate(read_op, ioctx_g, grp->obj.oid, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't perform read operation: %s", strerror(-ret)) + if(read_ret < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't read link value: %s", strerror(-read_ret)) + + /* Get link value */ + if((ret = rados_omap_get_next(iter, &key, &omap_val, &val_len)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't get link value: %s", strerror(-ret)) + + /* Check for no link found */ + if(val_len == 0) + HGOTO_ERROR(H5E_SYM, H5E_NOTFOUND, FAIL, "link not found") + + /* Decode link type */ + p = (uint8_t *)omap_val; + val->type = (H5L_type_t)*p++; + + /* Decode remainder of link value */ + switch(val->type) { + case H5L_TYPE_HARD: + /* Decode oid */ + UINT64DECODE(p, val->target.hard) + + break; + + case H5L_TYPE_SOFT: + /* Allocate soft link buffer and copy string. */ + if(NULL == (val->target.soft = (char *)H5MM_malloc(val_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate link value buffer") + HDmemcpy(val->target.soft, val + 1, val_len - 1); + + /* Add null terminator */ + val->target.soft[val_len - 1] = '\0'; + + break; + + case H5L_TYPE_ERROR: + case H5L_TYPE_EXTERNAL: + case H5L_TYPE_MAX: + default: + HGOTO_ERROR(H5E_SYM, H5E_BADVALUE, FAIL, "invalid or unsupported link type") + } /* end switch */ + +done: + if(iter_init) + rados_omap_get_end(iter); + if(read_op_init) + rados_release_read_op(read_op); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_link_read() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_link_write + * + * Purpose: Writes the specified link to the given group + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_link_write(H5VL_rados_group_t *grp, const char *name, + H5VL_rados_link_val_t *val) +{ + rados_write_op_t write_op; + hbool_t write_op_init = FALSE; + size_t val_len; + uint8_t *val_buf; + uint8_t val_buf_static[H5VL_RADOS_LINK_VAL_BUF_SIZE]; + uint8_t *val_buf_dyn = NULL; + uint8_t *p; + int ret; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for write access */ + if(!(grp->obj.item.file->flags & H5F_ACC_RDWR)) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "no write intent on file") + + val_buf = val_buf_static; + + /* Encode type specific value information */ + switch(val->type) { + case H5L_TYPE_HARD: + HDassert(sizeof(val_buf_static) >= sizeof(val->target.hard) + 1); + + /* Encode link type */ + p = val_buf; + *p++ = (uint8_t)val->type; + + /* Encode oid */ + UINT64ENCODE(p, val->target.hard) + + val_len = (size_t)9; + + break; + + case H5L_TYPE_SOFT: + /* Allocate larger buffer for soft link if necessary */ + val_len = HDstrlen(val->target.soft) + 1; + if(val_len > sizeof(val_buf_static)) { + if(NULL == (val_buf_dyn = H5MM_malloc(val_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate link value buffer") + val_buf = val_buf_dyn; + } /* end if */ + + /* Encode link type */ + p = val_buf; + *p++ = (uint8_t)val->type; + + /* Copy link target */ + HDmemcpy(p, val->target.soft, val_len - 1); + + break; + + case H5L_TYPE_ERROR: + case H5L_TYPE_EXTERNAL: + case H5L_TYPE_MAX: + default: + HGOTO_ERROR(H5E_SYM, H5E_BADVALUE, FAIL, "invalid or unsupported link type") + } /* end switch */ + + /* Create write op */ + if(NULL == (write_op = rados_create_write_op())) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't create write operation") + write_op_init = TRUE; + + /* Add operation to write link */ + /* Add prefix RADOSINC */ + rados_write_op_omap_set(write_op, &name, (const char * const *)&val_buf, &val_len, 1); + + /* Execute write operation */ + if((ret = rados_write_op_operate(write_op, ioctx_g, grp->obj.oid, NULL, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't perform write operation: %s", strerror(-ret)) + +done: + if(write_op_init) + rados_release_write_op(write_op); + H5MM_xfree(val_buf_dyn); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_link_write() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_link_follow + * + * Purpose: Follows the link in grp identified with name, and returns + * in oid the oid of the target object. name must be NULL + * terminated. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_link_follow(H5VL_rados_group_t *grp, const char *name, hid_t dxpl_id, + void **req, uint64_t *oid) +{ + H5VL_rados_link_val_t link_val; + hbool_t link_val_alloc = FALSE; + H5VL_rados_group_t *target_grp = NULL; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(grp); + HDassert(name); + HDassert(oid); + + /* Read link to group */ + if(H5VL_rados_link_read(grp, name, &link_val) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't read link") + + switch(link_val.type) { + case H5L_TYPE_HARD: + /* Simply return the read oid */ + *oid = link_val.target.hard; + + break; + + case H5L_TYPE_SOFT: + { + char *target_name = NULL; + + link_val_alloc = TRUE; + + /* Traverse the soft link path */ + if(NULL == (target_grp = H5VL_rados_group_traverse(&grp->obj.item, link_val.target.soft, dxpl_id, req, &target_name, NULL, NULL))) + HGOTO_ERROR(H5E_SYM, H5E_BADITER, FAIL, "can't traverse path") + + /* Check for no target_name, in this case just return + * target_grp's oid */ + if(target_name[0] == '\0' + || (target_name[0] == '.' && target_name[1] == '\0')) + *oid = target_grp->obj.bin_oid; + else + /* Follow the last element in the path */ + if(H5VL_rados_link_follow(target_grp, target_name, dxpl_id, req, oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't follow link") + + break; + } /* end block */ + + case H5L_TYPE_ERROR: + case H5L_TYPE_EXTERNAL: + case H5L_TYPE_MAX: + default: + HGOTO_ERROR(H5E_SYM, H5E_BADVALUE, FAIL, "invalid or unsupported link type") + } /* end switch */ + +done: + /* Clean up */ + if(link_val_alloc) { + HDassert(link_val.type == H5L_TYPE_SOFT); + H5MM_free(link_val.target.soft); + } /* end if */ + + if(target_grp) + if(H5VL_rados_group_close(target_grp, dxpl_id, req) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CLOSEERROR, FAIL, "can't close group") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_link_follow() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_link_follow_comp + * + * Purpose: Follows the link in grp identified with name, and returns + * in oid the oid of the target object. name may be a + * component of a path, only the first name_len bytes of name + * are examined. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_link_follow_comp(H5VL_rados_group_t *grp, char *name, + size_t name_len, hid_t dxpl_id, void **req, uint64_t *oid) +{ + char saved_end = name[name_len]; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(grp); + HDassert(name); + HDassert(oid); + + /* Add null terminator to name so we can use the underlying routine */ + name[name_len] = '\0'; + + /* Follow the link now that name is NULL terminated */ + if(H5VL_rados_link_follow(grp, name, dxpl_id, req, oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, FAIL, "can't follow link to group") + +done: + /* Put name back the way it was */ + name[name_len] = saved_end; + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_link_follow_comp() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_traverse + * + * Purpose: Given a path name and base object, returns the final group + * in the path and the object name. obj_name points into the + * buffer given by path, so it does not need to be freed. + * The group must be closed with H5VL_rados_group_close(). + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static H5VL_rados_group_t * +H5VL_rados_group_traverse(H5VL_rados_item_t *item, char *path, + hid_t dxpl_id, void **req, char **obj_name, void **gcpl_buf_out, + uint64_t *gcpl_len_out) +{ + H5VL_rados_group_t *grp = NULL; + char *next_obj; + uint64_t oid; + H5VL_rados_group_t *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(item); + HDassert(path); + HDassert(obj_name); + + /* Initialize obj_name */ + *obj_name = path; + + /* Open starting group */ + if((*obj_name)[0] == '/') { + grp = item->file->root_grp; + (*obj_name)++; + } /* end if */ + else { + if(item->type == H5I_GROUP) + grp = (H5VL_rados_group_t *)item; + else if(item->type == H5I_FILE) + grp = ((H5VL_rados_file_t *)item)->root_grp; + else + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "item not a file or group") + } /* end else */ + + grp->obj.item.rc++; + + /* Search for '/' */ + next_obj = strchr(*obj_name, '/'); + + /* Traverse path */ + while(next_obj) { + /* Free gcpl_buf_out */ + if(gcpl_buf_out) + *gcpl_buf_out = H5MM_xfree(*gcpl_buf_out); + + /* Follow link to next group in path */ + HDassert(next_obj > *obj_name); + if(H5VL_rados_link_follow_comp(grp, *obj_name, (size_t)(next_obj - *obj_name), dxpl_id, req, &oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't follow link to group") + + /* Close previous group */ + if(H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + grp = NULL; + + /* Open group */ + if(NULL == (grp = (H5VL_rados_group_t *)H5VL_rados_group_open_helper(item->file, oid, H5P_GROUP_ACCESS_DEFAULT, dxpl_id, req, gcpl_buf_out, gcpl_len_out))) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't open group") + + /* Advance to next path element */ + *obj_name = next_obj + 1; + next_obj = strchr(*obj_name, '/'); + } /* end while */ + + /* Set return values */ + ret_value = grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CLOSEERROR, NULL, "can't close group") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_traverse() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_traverse_const + * + * Purpose: Wrapper for H5VL_rados_group_traverse for a const path. + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static H5VL_rados_group_t * +H5VL_rados_group_traverse_const(H5VL_rados_item_t *item, const char *path, + hid_t dxpl_id, void **req, const char **obj_name, void **gcpl_buf_out, + uint64_t *gcpl_len_out) +{ + H5VL_rados_group_t *grp = NULL; + char *tmp_path = NULL; + char *tmp_obj_name; + H5VL_rados_group_t *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(item); + HDassert(path); + HDassert(obj_name); + + /* Make a temporary copy of path so we do not write to the user's const + * buffer (since the RADOS API expects null terminated strings we must + * insert null terminators to pass path components to RADOS. We could + * alternatively copy each path name but this is simpler and shares more + * code with other VOL plugins) */ + if(NULL == (tmp_path = HDstrdup(path))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't duplicate path name") + + /* Forward the call to the non-const routine */ + if(NULL == (grp = H5VL_rados_group_traverse(item, tmp_path, dxpl_id, req, + &tmp_obj_name, gcpl_buf_out, gcpl_len_out))) + HGOTO_ERROR(H5E_SYM, H5E_BADITER, NULL, "can't traverse path") + + /* Set *obj_name in path to match tmp_obj_name in tmp_path */ + *obj_name = path + (tmp_obj_name - tmp_path); + + /* Set return value */ + ret_value = grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CLOSEERROR, NULL, "can't close group") + + H5MM_xfree(tmp_path); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_traverse_const() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_create_helper + * + * Purpose: Performs the actual group creation. + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * January, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_group_create_helper(H5VL_rados_file_t *file, hid_t gcpl_id, + hid_t gapl_id, hid_t dxpl_id, void **req, H5VL_rados_group_t *parent_grp, + const char *name, hbool_t collective) +{ + H5VL_rados_group_t *grp = NULL; + void *gcpl_buf = NULL; + int ret; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(file->flags & H5F_ACC_RDWR); + + /* Allocate the group object that is returned to the user */ + if(NULL == (grp = H5FL_CALLOC(H5VL_rados_group_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate RADOS group struct") + grp->obj.item.type = H5I_GROUP; + grp->obj.item.file = file; + grp->obj.item.rc = 1; + grp->gcpl_id = FAIL; + grp->gapl_id = FAIL; + + /* Generate group oid */ + if(H5VL_rados_oid_create(file, file->max_oid + (uint64_t)1, H5I_GROUP, &grp->obj.bin_oid, &grp->obj.oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't generate group oid") + + /* Update max_oid */ + file->max_oid = H5VL_rados_oid_to_idx(grp->obj.bin_oid); + + /* Create group and write metadata if this process should */ + if(!collective || (file->my_rank == 0)) { + size_t gcpl_size = 0; + + /* Create group */ + /* Write max OID */ + /*if(H5VL_rados_write_max_oid(file) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't write max OID")*/ + + /* Encode GCPL */ + if(H5Pencode(gcpl_id, NULL, &gcpl_size) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "can't determine serialized length of gcpl") + if(NULL == (gcpl_buf = H5MM_malloc(gcpl_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate buffer for serialized gcpl") + if(H5Pencode(gcpl_id, gcpl_buf, &gcpl_size) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't serialize gcpl") + + /* Write internal metadata to group */ + if((ret = rados_write_full(ioctx_g, grp->obj.oid, gcpl_buf, gcpl_size)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't write metadata to group: %s", strerror(-ret)) + + /* Mark max OID as dirty */ + file->max_oid_dirty = TRUE; + + /* Write link to group if requested */ + if(parent_grp) { + H5VL_rados_link_val_t link_val; + + HDassert(name); + + link_val.type = H5L_TYPE_HARD; + link_val.target.hard = grp->obj.bin_oid; + if(H5VL_rados_link_write(parent_grp, name, &link_val) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't create link to group") + } /* end if */ + } /* end if */ + + /* Finish setting up group struct */ + if((grp->gcpl_id = H5Pcopy(gcpl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy gcpl"); + if((grp->gapl_id = H5Pcopy(gapl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy gapl"); + + ret_value = (void *)grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_FILE, H5E_CLOSEERROR, NULL, "can't close group") + + /* Free memory */ + gcpl_buf = H5MM_xfree(gcpl_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_create_helper() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_create + * + * Purpose: Sends a request to RADOS to create a group + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_group_create(void *_item, + H5VL_loc_params_t H5_ATTR_UNUSED loc_params, const char *name, + hid_t gcpl_id, hid_t gapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_item_t *item = (H5VL_rados_item_t *)_item; + H5VL_rados_group_t *grp = NULL; + H5VL_rados_group_t *target_grp = NULL; + const char *target_name = NULL; + hbool_t collective = item->file->collective; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for write access */ + if(!(item->file->flags & H5F_ACC_RDWR)) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, NULL, "no write intent on file") + + /* Check for collective access, if not already set by the file */ + if(!collective) + if(H5Pget_all_coll_metadata_ops(gapl_id, &collective) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTGET, NULL, "can't get collective access property") + + /* Traverse the path */ + if(!collective || (item->file->my_rank == 0)) + if(NULL == (target_grp = H5VL_rados_group_traverse_const(item, name, dxpl_id, req, &target_name, NULL, NULL))) + HGOTO_ERROR(H5E_SYM, H5E_BADITER, NULL, "can't traverse path") + + /* Create group and link to group */ + if(NULL == (grp = (H5VL_rados_group_t *)H5VL_rados_group_create_helper(item->file, gcpl_id, gapl_id, dxpl_id, req, target_grp, target_name, collective))) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't create group") + + /* Set return value */ + ret_value = (void *)grp; + +done: + /* Close target group */ + if(target_grp && H5VL_rados_group_close(target_grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_create() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_open_helper + * + * Purpose: Performs the actual group open, given the oid. + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_group_open_helper(H5VL_rados_file_t *file, uint64_t oid, + hid_t gapl_id, hid_t dxpl_id, void **req, void **gcpl_buf_out, + uint64_t *gcpl_len_out) +{ + H5VL_rados_group_t *grp = NULL; + void *gcpl_buf = NULL; + uint64_t gcpl_len; + time_t pmtime; + int ret; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Allocate the group object that is returned to the user */ + if(NULL == (grp = H5FL_CALLOC(H5VL_rados_group_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate RADOS group struct") + grp->obj.item.type = H5I_GROUP; + grp->obj.item.file = file; + grp->obj.item.rc = 1; + grp->obj.bin_oid = oid; + if(H5VL_rados_oid_create_string(file, oid, &grp->obj.oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't encode string oid") + grp->gcpl_id = FAIL; + grp->gapl_id = FAIL; + + /* Read internal metadata size from group */ + if((ret = rados_stat(ioctx_g, grp->obj.oid, &gcpl_len, &pmtime)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTDECODE, NULL, "can't read metadata size from group: %s", strerror(-ret)) + + /* Check for metadata not found */ + if(gcpl_len == (uint64_t)0) + HGOTO_ERROR(H5E_SYM, H5E_NOTFOUND, NULL, "internal metadata not found") + + /* Allocate buffer for GCPL */ + if(NULL == (gcpl_buf = H5MM_malloc(gcpl_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate buffer for serialized gcpl") + + /* Read internal metadata from group */ + if((ret = rados_read(ioctx_g, grp->obj.oid, gcpl_buf, gcpl_len, 0)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTDECODE, NULL, "can't read metadata from group: %s", strerror(-ret)) + + /* Decode GCPL */ + if((grp->gcpl_id = H5Pdecode(gcpl_buf)) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_CANTDECODE, NULL, "can't deserialize GCPL") + + /* Finish setting up group struct */ + if((grp->gapl_id = H5Pcopy(gapl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy gapl"); + + /* Return GCPL info if requested, relinquish ownership of gcpl_buf if so */ + if(gcpl_buf_out) { + HDassert(gcpl_len_out); + HDassert(!*gcpl_buf_out); + + *gcpl_buf_out = gcpl_buf; + gcpl_buf = NULL; + + *gcpl_len_out = gcpl_len; + } /* end if */ + + ret_value = (void *)grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + + /* Free memory */ + gcpl_buf = H5MM_xfree(gcpl_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_open_helper() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_reconstitute + * + * Purpose: Reconstitutes a group object opened by another process. + * + * Return: Success: group object. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_group_reconstitute(H5VL_rados_file_t *file, uint64_t oid, + uint8_t *gcpl_buf, hid_t gapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_group_t *grp = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Allocate the group object that is returned to the user */ + if(NULL == (grp = H5FL_CALLOC(H5VL_rados_group_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate RADOS group struct") + grp->obj.item.type = H5I_GROUP; + grp->obj.item.file = file; + grp->obj.item.rc = 1; + grp->obj.bin_oid = oid; + if(H5VL_rados_oid_create_string(file, oid, &grp->obj.oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't encode string oid") + grp->gcpl_id = FAIL; + grp->gapl_id = FAIL; + + /* Decode GCPL */ + if((grp->gcpl_id = H5Pdecode(gcpl_buf)) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_CANTDECODE, NULL, "can't deserialize GCPL") + + /* Finish setting up group struct */ + if((grp->gapl_id = H5Pcopy(gapl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy gapl"); + + ret_value = (void *)grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_reconstitute() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_open + * + * Purpose: Sends a request to RADOS to open a group + * + * Return: Success: dataset object. + * Failure: NULL + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_group_open(void *_item, H5VL_loc_params_t loc_params, + const char *name, hid_t gapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_item_t *item = (H5VL_rados_item_t *)_item; + H5VL_rados_group_t *grp = NULL; + H5VL_rados_group_t *target_grp = NULL; + const char *target_name = NULL; + uint64_t oid; + uint8_t *gcpl_buf = NULL; + uint64_t gcpl_len = 0; + uint8_t ginfo_buf_static[H5VL_RADOS_GINFO_BUF_SIZE]; + uint8_t *p; + hbool_t collective = item->file->collective; + hbool_t must_bcast = FALSE; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for collective access, if not already set by the file */ + if(!collective) + if(H5Pget_all_coll_metadata_ops(gapl_id, &collective) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTGET, NULL, "can't get collective access property") + + /* Check if we're actually opening the group or just receiving the group + * info from the leader */ + if(!collective || (item->file->my_rank == 0)) { + if(collective && (item->file->num_procs > 1)) + must_bcast = TRUE; + + /* Check for open by address */ + if(H5VL_OBJECT_BY_ADDR == loc_params.type) { + /* Generate oid from address */ + oid = (uint64_t)loc_params.loc_data.loc_by_addr.addr; + + /* Open group */ + if(NULL == (grp = (H5VL_rados_group_t *)H5VL_rados_group_open_helper(item->file, oid, gapl_id, dxpl_id, req, (collective && (item->file->num_procs > 1)) ? (void **)&gcpl_buf : NULL, &gcpl_len))) + HGOTO_ERROR(H5E_SYM, H5E_CANTOPENOBJ, NULL, "can't open group") + } /* end if */ + else { + /* Open using name parameter */ + /* Traverse the path */ + if(NULL == (target_grp = H5VL_rados_group_traverse_const(item, name, dxpl_id, req, &target_name, (collective && (item->file->num_procs > 1)) ? (void **)&gcpl_buf : NULL, &gcpl_len))) + HGOTO_ERROR(H5E_SYM, H5E_BADITER, NULL, "can't traverse path") + + /* Check for no target_name, in this case just return target_grp */ + if(target_name[0] == '\0' + || (target_name[0] == '.' && target_name[1] == '\0')) { + size_t gcpl_size; + + /* Take ownership of target_grp */ + grp = target_grp; + target_grp = NULL; + + /* Encode GCPL */ + if(H5Pencode(grp->gcpl_id, NULL, &gcpl_size) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "can't determine serialized length of gcpl") + if(NULL == (gcpl_buf = (uint8_t *)H5MM_malloc(gcpl_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate buffer for serialized gcpl") + gcpl_len = (uint64_t)gcpl_size; + if(H5Pencode(grp->gcpl_id, gcpl_buf, &gcpl_size) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't serialize gcpl") + } /* end if */ + else { + gcpl_buf = (uint8_t *)H5MM_xfree(gcpl_buf); + gcpl_len = 0; + + /* Follow link to group */ + if(H5VL_rados_link_follow(target_grp, target_name, dxpl_id, req, &oid) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't follow link to group") + + /* Open group */ + if(NULL == (grp = (H5VL_rados_group_t *)H5VL_rados_group_open_helper(item->file, oid, gapl_id, dxpl_id, req, (collective && (item->file->num_procs > 1)) ? (void **)&gcpl_buf : NULL, &gcpl_len))) + HGOTO_ERROR(H5E_SYM, H5E_CANTOPENOBJ, NULL, "can't open group") + } /* end else */ + } /* end else */ + + /* Broadcast group info if there are other processes that need it */ + if(collective && (item->file->num_procs > 1)) { + HDassert(gcpl_buf); + HDassert(sizeof(ginfo_buf_static) >= 2 * sizeof(uint64_t)); + + /* Encode oid */ + p = ginfo_buf_static; + UINT64ENCODE(p, grp->obj.bin_oid) + + /* Encode GCPL length */ + UINT64ENCODE(p, gcpl_len) + + /* Copy GCPL to ginfo_buf_static if it will fit */ + if((gcpl_len + 2 * sizeof(uint64_t)) <= sizeof(ginfo_buf_static)) + (void)HDmemcpy(p, gcpl_buf, gcpl_len); + + /* We are about to bcast so we no longer need to bcast on failure */ + must_bcast = FALSE; + + /* MPI_Bcast ginfo_buf */ + if(MPI_SUCCESS != MPI_Bcast((char *)ginfo_buf_static, sizeof(ginfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_SYM, H5E_MPI, NULL, "can't bcast group info") + + /* Need a second bcast if it did not fit in the receivers' static + * buffers */ + if(gcpl_len + 2 * sizeof(uint64_t) > sizeof(ginfo_buf_static)) + if(MPI_SUCCESS != MPI_Bcast((char *)gcpl_buf, (int)gcpl_len, MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_SYM, H5E_MPI, NULL, "can't bcast GCPL") + } /* end if */ + } /* end if */ + else { + /* Receive GCPL */ + if(MPI_SUCCESS != MPI_Bcast((char *)ginfo_buf_static, sizeof(ginfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_SYM, H5E_MPI, NULL, "can't bcast group info") + + /* Decode oid */ + p = ginfo_buf_static; + UINT64DECODE(p, oid) + + /* Decode GCPL length */ + UINT64DECODE(p, gcpl_len) + + /* Check for gcpl_len set to 0 - indicates failure */ + if(gcpl_len == 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "lead process failed to open group") + + /* Check if we need to perform another bcast */ + if(gcpl_len + 2 * sizeof(uint64_t) > sizeof(ginfo_buf_static)) { + /* Allocate a dynamic buffer if necessary */ + if(gcpl_len > sizeof(ginfo_buf_static)) { + if(NULL == (gcpl_buf = (uint8_t *)H5MM_malloc(gcpl_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate space for global pool handle") + p = gcpl_buf; + } /* end if */ + else + p = ginfo_buf_static; + + /* Receive GCPL */ + if(MPI_SUCCESS != MPI_Bcast((char *)p, (int)gcpl_len, MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_SYM, H5E_MPI, NULL, "can't bcast GCPL") + } /* end if */ + + /* Reconstitute group from received oid and GCPL buffer */ + if(NULL == (grp = (H5VL_rados_group_t *)H5VL_rados_group_reconstitute(item->file, oid, p, gapl_id, dxpl_id, req))) + HGOTO_ERROR(H5E_SYM, H5E_CANTINIT, NULL, "can't reconstitute group") + } /* end else */ + + /* Set return value */ + ret_value = (void *)grp; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) { + /* Bcast gcpl_buf as '0' if necessary - this will trigger failures in + * other processes so we do not need to do the second bcast. */ + if(must_bcast) { + HDmemset(ginfo_buf_static, 0, sizeof(ginfo_buf_static)); + if(MPI_SUCCESS != MPI_Bcast(ginfo_buf_static, sizeof(ginfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HDONE_ERROR(H5E_SYM, H5E_MPI, NULL, "can't bcast empty group info") + } /* end if */ + + /* Close group */ + if(grp && H5VL_rados_group_close(grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + } /* end if */ + + /* Close target group */ + if(target_grp && H5VL_rados_group_close(target_grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_SYM, H5E_CLOSEERROR, NULL, "can't close group") + + /* Free memory */ + gcpl_buf = (uint8_t *)H5MM_xfree(gcpl_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_open() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_group_close + * + * Purpose: Closes a RADOS HDF5 group. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * February, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_group_close(void *_grp, hid_t H5_ATTR_UNUSED dxpl_id, + void H5_ATTR_UNUSED **req) +{ + H5VL_rados_group_t *grp = (H5VL_rados_group_t *)_grp; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(grp); + + if(--grp->obj.item.rc == 0) { + /* Free group data structures */ + grp->obj.oid = H5MM_xfree(grp->obj.oid); + if(grp->gcpl_id != FAIL && H5I_dec_app_ref(grp->gcpl_id) < 0) + HDONE_ERROR(H5E_SYM, H5E_CANTDEC, FAIL, "failed to close plist") + if(grp->gapl_id != FAIL && H5I_dec_app_ref(grp->gapl_id) < 0) + HDONE_ERROR(H5E_SYM, H5E_CANTDEC, FAIL, "failed to close plist") + grp = H5FL_FREE(H5VL_rados_group_t, grp); + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_group_close() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_need_bkg + * + * Purpose: Determine if a background buffer is needed for conversion. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * March, 2018 + * + *------------------------------------------------------------------------- + */ +static htri_t +H5VL_rados_need_bkg(hid_t src_type_id, hid_t dst_type_id, size_t *dst_type_size, + hbool_t *fill_bkg) +{ + hid_t memb_type_id = -1; + hid_t src_memb_type_id = -1; + char *memb_name = NULL; + size_t memb_size; + H5T_class_t tclass; + htri_t ret_value; + + FUNC_ENTER_NOAPI_NOINIT + + /* Get destination type size */ + if((*dst_type_size = H5Tget_size(dst_type_id)) == 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source type size") + + /* Get datatype class */ + if(H5T_NO_CLASS == (tclass = H5Tget_class(dst_type_id))) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get type class") + + switch(tclass) { + case H5T_INTEGER: + case H5T_FLOAT: + case H5T_TIME: + case H5T_STRING: + case H5T_BITFIELD: + case H5T_OPAQUE: + case H5T_ENUM: + /* No background buffer necessary */ + ret_value = FALSE; + + break; + + case H5T_COMPOUND: + { + int nmemb; + size_t size_used = 0; + int src_i; + int i; + + /* We must always provide a background buffer for compound + * conversions. Only need to check further to see if it must be + * filled. */ + ret_value = TRUE; + + /* Get number of compound members */ + if((nmemb = H5Tget_nmembers(dst_type_id)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get number of destination compound members") + + /* Iterate over compound members, checking for a member in + * dst_type_id with no match in src_type_id */ + for(i = 0; i < nmemb; i++) { + /* Get member type */ + if((memb_type_id = H5Tget_member_type(dst_type_id, (unsigned)i)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get compound member type") + + /* Get member name */ + if(NULL == (memb_name = H5Tget_member_name(dst_type_id, (unsigned)i))) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get compound member name") + + /* Check for matching name in source type */ + H5E_BEGIN_TRY { + src_i = H5Tget_member_index(src_type_id, memb_name); + } H5E_END_TRY + + /* Free memb_name */ + if(H5free_memory(memb_name) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, "can't free member name") + memb_name = NULL; + + /* If no match was found, this type is not being filled in, + * so we must fill the background buffer */ + if(src_i < 0) { + if(H5Tclose(memb_type_id) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CLOSEERROR, FAIL, "can't close member type") + memb_type_id = -1; + *fill_bkg = TRUE; + HGOTO_DONE(TRUE) + } /* end if */ + + /* Open matching source type */ + if((src_memb_type_id = H5Tget_member_type(src_type_id, (unsigned)src_i)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get compound member type") + + /* Recursively check member type, this will fill in the + * member size */ + if(H5VL_rados_need_bkg(src_memb_type_id, memb_type_id, &memb_size, fill_bkg) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't check if background buffer needed") + + /* Close source member type */ + if(H5Tclose(src_memb_type_id) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CLOSEERROR, FAIL, "can't close member type") + src_memb_type_id = -1; + + /* Close member type */ + if(H5Tclose(memb_type_id) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CLOSEERROR, FAIL, "can't close member type") + memb_type_id = -1; + + /* If the source member type needs the background filled, so + * does the parent */ + if(*fill_bkg) + HGOTO_DONE(TRUE) + + /* Keep track of the size used in compound */ + size_used += memb_size; + } /* end for */ + + /* Check if all the space in the type is used. If not, we must + * fill the background buffer. */ + /* TODO: This is only necessary on read, we don't care about + * compound gaps in the "file" DSMINC */ + HDassert(size_used <= *dst_type_size); + if(size_used != *dst_type_size) + *fill_bkg = TRUE; + + break; + } /* end block */ + + case H5T_ARRAY: + /* Get parent type */ + if((memb_type_id = H5Tget_super(dst_type_id)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get array parent type") + + /* Get source parent type */ + if((src_memb_type_id = H5Tget_super(src_type_id)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get array parent type") + + /* Recursively check parent type */ + if((ret_value = H5VL_rados_need_bkg(src_memb_type_id, memb_type_id, &memb_size, fill_bkg)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't check if background buffer needed") + + /* Close source parent type */ + if(H5Tclose(src_memb_type_id) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CLOSEERROR, FAIL, "can't close array parent type") + src_memb_type_id = -1; + + /* Close parent type */ + if(H5Tclose(memb_type_id) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CLOSEERROR, FAIL, "can't close array parent type") + memb_type_id = -1; + + break; + + case H5T_REFERENCE: + case H5T_VLEN: + /* Not yet supported */ + HGOTO_ERROR(H5E_DATATYPE, H5E_UNSUPPORTED, FAIL, "reference and vlen types not supported") + + break; + + case H5T_NO_CLASS: + case H5T_NCLASSES: + default: + HGOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "invalid type class") + } /* end switch */ + +done: + /* Cleanup on failure */ + if(ret_value < 0) { + if(memb_type_id >= 0) + if(H5I_dec_app_ref(memb_type_id) < 0) + HDONE_ERROR(H5E_DATATYPE, H5E_CANTDEC, FAIL, "failed to close member type") + if(src_memb_type_id >= 0) + if(H5I_dec_app_ref(src_memb_type_id) < 0) + HDONE_ERROR(H5E_DATATYPE, H5E_CANTDEC, FAIL, "failed to close source member type") + memb_name = (char *)H5MM_xfree(memb_name); + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_need_bkg() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_tconv_init + * + * Purpose: DSMINC + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_tconv_init(hid_t src_type_id, size_t *src_type_size, + hid_t dst_type_id, size_t *dst_type_size, hbool_t *_types_equal, + H5VL_rados_tconv_reuse_t *reuse, hbool_t *_need_bkg, hbool_t *fill_bkg) +{ + htri_t need_bkg = FALSE; + htri_t types_equal; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(src_type_size); + HDassert(dst_type_size); + HDassert(_types_equal); + HDassert(_need_bkg); + HDassert(fill_bkg); + HDassert(!*fill_bkg); + + /* Get source type size */ + if((*src_type_size = H5Tget_size(src_type_id)) == 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source type size") + + /* Check if the types are equal */ + if((types_equal = H5Tequal(src_type_id, dst_type_id)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTCOMPARE, FAIL, "can't check if types are equal") + if(types_equal) + /* Types are equal, no need for conversion, just set dst_type_size */ + *dst_type_size = *src_type_size; + else { + /* Check if we need a background buffer */ + if((need_bkg = H5VL_rados_need_bkg(src_type_id, dst_type_id, dst_type_size, fill_bkg)) < 0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't check if background buffer needed") + + /* Check for reusable destination buffer */ + if(reuse) { + HDassert(*reuse == H5VL_RADOS_TCONV_REUSE_NONE); + + /* Use dest buffer for type conversion if it large enough, otherwise + * use it for the background buffer if one is needed. */ + if(dst_type_size >= src_type_size) + *reuse = H5VL_RADOS_TCONV_REUSE_TCONV; + else if(need_bkg) + *reuse = H5VL_RADOS_TCONV_REUSE_BKG; + } /* end if */ + } /* end else */ + + /* Set return values */ + *_types_equal = types_equal; + *_need_bkg = need_bkg; + +done: + /* Cleanup on failure */ + if(ret_value < 0) { + *reuse = H5VL_RADOS_TCONV_REUSE_NONE; + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_tconv_init() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_create + * + * Purpose: Sends a request to RADOS to create a dataset + * + * Return: Success: dataset object. + * Failure: NULL + * + * Programmer: Neil Fortner + * March, 2018 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_dataset_create(void *_item, + H5VL_loc_params_t H5_ATTR_UNUSED loc_params, const char *name, + hid_t dcpl_id, hid_t dapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_item_t *item = (H5VL_rados_item_t *)_item; + H5VL_rados_dset_t *dset = NULL; + H5P_genplist_t *plist = NULL; /* Property list pointer */ + hid_t type_id, space_id; + H5VL_rados_group_t *target_grp = NULL; + uint8_t *md_buf = NULL; + hbool_t collective = item->file->collective; + int ret; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for write access */ + if(!(item->file->flags & H5F_ACC_RDWR)) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, NULL, "no write intent on file") + + /* Check for collective access, if not already set by the file */ + if(!collective) + if(H5Pget_all_coll_metadata_ops(dapl_id, &collective) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, NULL, "can't get collective access property") + + /* Get the dcpl plist structure */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(dcpl_id))) + HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, NULL, "can't find object for ID") + + /* get creation properties */ + if(H5P_get(plist, H5VL_PROP_DSET_TYPE_ID, &type_id) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get property value for datatype id") + if(H5P_get(plist, H5VL_PROP_DSET_SPACE_ID, &space_id) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get property value for space id") + + /* Allocate the dataset object that is returned to the user */ + if(NULL == (dset = H5FL_CALLOC(H5VL_rados_dset_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate RADOS dataset struct") + dset->obj.item.type = H5I_DATASET; + dset->obj.item.file = item->file; + dset->obj.item.rc = 1; + dset->type_id = FAIL; + dset->space_id = FAIL; + dset->dcpl_id = FAIL; + dset->dapl_id = FAIL; + + /* Generate dataset oid */ + if(H5VL_rados_oid_create(item->file, item->file->max_oid + (uint64_t)1, H5I_DATASET, &dset->obj.bin_oid, &dset->obj.oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't generate dataset oid") + + /* Update max_oid */ + item->file->max_oid = H5VL_rados_oid_to_idx(dset->obj.bin_oid); + + /* Create dataset and write metadata if this process should */ + if(!collective || (item->file->my_rank == 0)) { + const char *target_name = NULL; + H5VL_rados_link_val_t link_val; + uint8_t *p; + size_t type_size = 0; + size_t space_size = 0; + size_t dcpl_size = 0; + size_t md_size = 0; + + /* Traverse the path */ + if(NULL == (target_grp = H5VL_rados_group_traverse_const(item, name, dxpl_id, req, &target_name, NULL, NULL))) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, NULL, "can't traverse path") + + /* Create dataset */ + /* Write max OID */ + /*if(H5VL_daosm_write_max_oid(item->file) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't write max OID")*/ + + /* Determine buffer sizes */ + if(H5Tencode(type_id, NULL, &type_size) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "can't determine serialized length of datatype") + if(H5Sencode(space_id, NULL, &space_size) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "can't determine serialized length of dataaspace") + if(H5Pencode(dcpl_id, NULL, &dcpl_size) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "can't determine serialized length of dcpl") + md_size = (3 * sizeof(uint64_t)) + type_size + space_size + dcpl_size; + + /* Allocate metadata buffer */ + if(NULL == (md_buf = H5MM_malloc(md_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate buffer for constant metadata") + + /* Encode info lengths */ + p = md_buf; + UINT64ENCODE(p, (uint64_t)type_size) + UINT64ENCODE(p, (uint64_t)space_size) + UINT64ENCODE(p, (uint64_t)dcpl_size) + + /* Encode datatype */ + if(H5Tencode(type_id, md_buf + (3 * sizeof(uint64_t)), &type_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, NULL, "can't serialize datatype") + + /* Encode dataspace */ + if(H5Sencode(space_id, md_buf + (3 * sizeof(uint64_t)) + type_size, &space_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, NULL, "can't serialize dataaspace") + + /* Encode DCPL */ + if(H5Pencode(dcpl_id, md_buf + (3 * sizeof(uint64_t)) + type_size + space_size, &dcpl_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, NULL, "can't serialize dcpl") + + /* Write internal metadata to dataset */ + if((ret = rados_write_full(ioctx_g, dset->obj.oid, (const char *)md_buf, md_size)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't write metadata to dataset: %s", strerror(-ret)) + + /* Mark max OID as dirty */ + item->file->max_oid_dirty = TRUE; + + /* Create link to dataset */ + link_val.type = H5L_TYPE_HARD; + link_val.target.hard = dset->obj.bin_oid; + if(H5VL_rados_link_write(target_grp, target_name, &link_val) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't create link to dataset") + } /* end if */ + + /* Finish setting up dataset struct */ + if((dset->type_id = H5Tcopy(type_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy datatype") + if((dset->space_id = H5Scopy(space_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy dataspace") + if(H5Sselect_all(dset->space_id) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTDELETE, NULL, "can't change selection") + if((dset->dcpl_id = H5Pcopy(dcpl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy dcpl") + if((dset->dapl_id = H5Pcopy(dapl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy dapl") + + /* Set return value */ + ret_value = (void *)dset; + +done: + /* Close target group */ + if(target_grp && H5VL_rados_group_close(target_grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, NULL, "can't close group") + + /* Cleanup on failure */ + /* Destroy RADOS object if created before failure DSMINC */ + if(NULL == ret_value) + /* Close dataset */ + if(dset && H5VL_rados_dataset_close(dset, dxpl_id, req) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, NULL, "can't close dataset") + + /* Free memory */ + md_buf = H5MM_xfree(md_buf); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_create() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_open + * + * Purpose: Sends a request to RADOS to open a dataset + * + * Return: Success: dataset object. + * Failure: NULL + * + * Programmer: Neil Fortner + * March, 2016 + * + *------------------------------------------------------------------------- + */ +static void * +H5VL_rados_dataset_open(void *_item, + H5VL_loc_params_t H5_ATTR_UNUSED loc_params, const char *name, + hid_t dapl_id, hid_t dxpl_id, void **req) +{ + H5VL_rados_item_t *item = (H5VL_rados_item_t *)_item; + H5VL_rados_dset_t *dset = NULL; + H5VL_rados_group_t *target_grp = NULL; + const char *target_name = NULL; + uint64_t type_len = 0; + uint64_t space_len = 0; + uint64_t dcpl_len = 0; + time_t pmtime; + uint8_t dinfo_buf_static[H5VL_RADOS_DINFO_BUF_SIZE]; + uint8_t *dinfo_buf_dyn = NULL; + uint8_t *dinfo_buf = dinfo_buf_static; + uint8_t *p; + hbool_t collective = item->file->collective; + hbool_t must_bcast = FALSE; + int ret; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for collective access, if not already set by the file */ + if(!collective) + if(H5Pget_all_coll_metadata_ops(dapl_id, &collective) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, NULL, "can't get collective access property") + + /* Allocate the dataset object that is returned to the user */ + if(NULL == (dset = H5FL_CALLOC(H5VL_rados_dset_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate RADOS dataset struct") + dset->obj.item.type = H5I_DATASET; + dset->obj.item.file = item->file; + dset->obj.item.rc = 1; + dset->type_id = FAIL; + dset->space_id = FAIL; + dset->dcpl_id = FAIL; + dset->dapl_id = FAIL; + + /* Check if we're actually opening the group or just receiving the dataset + * info from the leader */ + if(!collective || (item->file->my_rank == 0)) { + uint64_t md_len = 0; + + if(collective && (item->file->num_procs > 1)) + must_bcast = TRUE; + + /* Check for open by address */ + if(H5VL_OBJECT_BY_ADDR == loc_params.type) { + /* Generate oid from address */ + dset->obj.bin_oid = (uint64_t)loc_params.loc_data.loc_by_addr.addr; + if(H5VL_rados_oid_create_string(item->file, dset->obj.bin_oid, &dset->obj.oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't encode string oid") + } /* end if */ + else { + /* Open using name parameter */ + /* Traverse the path */ + if(NULL == (target_grp = H5VL_rados_group_traverse_const(item, name, dxpl_id, req, &target_name, NULL, NULL))) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, NULL, "can't traverse path") + + /* Follow link to dataset */ + if(H5VL_rados_link_follow(target_grp, target_name, dxpl_id, req, &dset->obj.bin_oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't follow link to dataset") + + /* Create string oid */ + if(H5VL_rados_oid_create_string(item->file, dset->obj.bin_oid, &dset->obj.oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "can't encode string oid") + } /* end else */ + + /* Read internal metadata size from dataset */ + if((ret = rados_stat(ioctx_g, dset->obj.oid, &md_len, &pmtime)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, NULL, "can't read metadata size from group: %s", strerror(-ret)) + + /* Check for metadata not found */ + if(md_len == (uint64_t)0) + HGOTO_ERROR(H5E_DATASET, H5E_NOTFOUND, NULL, "internal metadata not found") + + /* Allocate dynamic buffer if necessary */ + if(md_len + sizeof(uint64_t) > sizeof(dinfo_buf_static)) { + if(NULL == (dinfo_buf_dyn = H5MM_malloc(md_len + sizeof(uint64_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate buffer for constant dataset metadata") + dinfo_buf = dinfo_buf_dyn; + } /* end if */ + + /* Read internal metadata from dataset */ + if((ret = rados_read(ioctx_g, dset->obj.oid, (char *)(dinfo_buf + sizeof(uint64_t)), md_len, 0)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, NULL, "can't read metadata from dataset: %s", strerror(-ret)) + + /* Decode info lengths */ + p = (uint8_t *)dinfo_buf + sizeof(uint64_t); + UINT64DECODE(p, type_len) + UINT64DECODE(p, space_len) + UINT64DECODE(p, dcpl_len) + if(type_len + space_len + dcpl_len + (3 * sizeof(uint64_t)) != md_len) + HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, NULL, "dataset internal metadata size mismatch") + + /* Broadcast dataset info if there are other processes that need it */ + if(collective && (item->file->num_procs > 1)) { + HDassert(dinfo_buf); + HDassert(sizeof(dinfo_buf_static) >= 4 * sizeof(uint64_t)); + + /* Encode oid */ + p = dinfo_buf; + UINT64ENCODE(p, dset->obj.bin_oid) + + /* MPI_Bcast dinfo_buf */ + HDassert((md_len + sizeof(uint64_t) >= sizeof(dinfo_buf_static)) || (dinfo_buf == dinfo_buf_static)); + if(MPI_SUCCESS != MPI_Bcast((char *)dinfo_buf, sizeof(dinfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_DATASET, H5E_MPI, NULL, "can't bcast dataset info") + + /* Need a second bcast if it did not fit in the receivers' static + * buffer */ + if(dinfo_buf != dinfo_buf_static) { + HDassert(md_len + sizeof(uint64_t) > sizeof(dinfo_buf_static)); + if(MPI_SUCCESS != MPI_Bcast((char *)dinfo_buf + (4 * sizeof(uint64_t)), (int)(md_len - (3 * sizeof(uint64_t))), MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_DATASET, H5E_MPI, NULL, "can't bcast dataset info (second bcast)") + } /* end if */ + + /* Reset p */ + p = dinfo_buf + (4 * sizeof(uint64_t)); + } /* end if */ + } /* end if */ + else { + uint64_t tot_len = 0; + + /* Receive dataset info */ + if(MPI_SUCCESS != MPI_Bcast((char *)dinfo_buf, sizeof(dinfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_DATASET, H5E_MPI, NULL, "can't bcast dataset info") + + /* Decode oid */ + p = dinfo_buf_static; + UINT64DECODE(p, dset->obj.bin_oid) + + /* Decode serialized info lengths */ + UINT64DECODE(p, type_len) + UINT64DECODE(p, space_len) + UINT64DECODE(p, dcpl_len) + tot_len = type_len + space_len + dcpl_len; + + /* Check for type_len set to 0 - indicates failure */ + if(type_len == 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "lead process failed to open dataset") + + /* Check if we need to perform another bcast */ + if(tot_len + (4 * sizeof(uint64_t)) > sizeof(dinfo_buf_static)) { + /* Allocate a dynamic buffer if necessary */ + if(tot_len > sizeof(dinfo_buf_static)) { + if(NULL == (dinfo_buf_dyn = (uint8_t *)H5MM_malloc(tot_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "can't allocate space for dataset info") + dinfo_buf = dinfo_buf_dyn; + } /* end if */ + + /* Receive dataset info */ + if(MPI_SUCCESS != MPI_Bcast((char *)dinfo_buf, (int)tot_len, MPI_BYTE, 0, item->file->comm)) + HGOTO_ERROR(H5E_DATASET, H5E_MPI, NULL, "can't bcast dataset info (second bcast)") + + p = dinfo_buf; + } /* end if */ + } /* end else */ + + /* Decode datatype, dataspace, and DCPL */ + if((dset->type_id = H5Tdecode(p)) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_CANTDECODE, NULL, "can't deserialize datatype") + p += type_len; + if((dset->space_id = H5Sdecode(p)) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_CANTDECODE, NULL, "can't deserialize datatype") + if(H5Sselect_all(dset->space_id) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTDELETE, NULL, "can't change selection") + p += space_len; + if((dset->dcpl_id = H5Pdecode(p)) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_CANTDECODE, NULL, "can't deserialize dataset creation property list") + + /* Finish setting up dataset struct */ + if((dset->dapl_id = H5Pcopy(dapl_id)) < 0) + HGOTO_ERROR(H5E_SYM, H5E_CANTCOPY, NULL, "failed to copy dapl"); + + /* Set return value */ + ret_value = (void *)dset; + +done: + /* Cleanup on failure */ + if(NULL == ret_value) { + /* Bcast dinfo_buf as '0' if necessary - this will trigger failures in + * in other processes so we do not need to do the second bcast. */ + if(must_bcast) { + HDmemset(dinfo_buf_static, 0, sizeof(dinfo_buf_static)); + if(MPI_SUCCESS != MPI_Bcast(dinfo_buf_static, sizeof(dinfo_buf_static), MPI_BYTE, 0, item->file->comm)) + HDONE_ERROR(H5E_DATASET, H5E_MPI, NULL, "can't bcast empty dataset info") + } /* end if */ + + /* Close dataset */ + if(dset && H5VL_rados_dataset_close(dset, dxpl_id, req) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, NULL, "can't close dataset") + } /* end if */ + + /* Close target group */ + if(target_grp && H5VL_rados_group_close(target_grp, dxpl_id, req) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, NULL, "can't close group") + + /* Free memory */ + dinfo_buf_dyn = (uint8_t *)H5MM_xfree(dinfo_buf_dyn); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_open() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_get_selected_chunk_info + * + * Purpose: Calculates the starting coordinates for the chunks selected + * in the file space given by file_space_id and sets up + * individual memory and file spaces for each chunk. The chunk + * coordinates and dataspaces are returned through the + * chunk_info struct pointer. + * + * XXX: Note that performance could be increased by + * calculating all of the chunks in the entire dataset + * and then caching them in the dataset object for + * re-use in subsequent reads/writes + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * May, 2018 + * Based on H5VL_daosm_get_selected_chunk_info by Jordan + * Henderson, May, 2017 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_get_selected_chunk_info(hid_t dcpl, + hid_t file_space_id, hid_t mem_space_id, + H5VL_rados_select_chunk_info_t **chunk_info, size_t *chunk_info_len) +{ + H5VL_rados_select_chunk_info_t *_chunk_info = NULL; + hssize_t num_sel_points; + hssize_t chunk_file_space_adjust[H5O_LAYOUT_NDIMS]; + hsize_t chunk_dims[H5S_MAX_RANK]; + hsize_t file_sel_start[H5S_MAX_RANK], file_sel_end[H5S_MAX_RANK]; + hsize_t mem_sel_start[H5S_MAX_RANK], mem_sel_end[H5S_MAX_RANK]; + hsize_t start_coords[H5O_LAYOUT_NDIMS], end_coords[H5O_LAYOUT_NDIMS]; + hsize_t selection_start_coords[H5O_LAYOUT_NDIMS]; + hsize_t num_sel_points_cast; + htri_t space_same_shape = FALSE; + size_t info_buf_alloced; + size_t i, j; + H5S_t *fspace = NULL, *mspace = NULL; + int fspace_ndims, mspace_ndims; + int increment_dim; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(chunk_info); + HDassert(chunk_info_len); + + if ((num_sel_points = H5Sget_select_npoints(file_space_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "can't get number of points select in dataspace") + H5_CHECKED_ASSIGN(num_sel_points_cast, hsize_t, num_sel_points, hssize_t); + + /* Get the chunking information */ + if (H5Pget_chunk(dcpl, H5S_MAX_RANK, chunk_dims) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get chunking information") + + if ((fspace_ndims = H5Sget_simple_extent_ndims(file_space_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get file space dimensionality") + if ((mspace_ndims = H5Sget_simple_extent_ndims(mem_space_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get memory space dimensionality") + HDassert(mspace_ndims == fspace_ndims); + + /* Get the bounding box for the current selection in the file space */ + if (H5Sget_select_bounds(file_space_id, file_sel_start, file_sel_end) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get bounding box for file selection") + + if (H5Sget_select_bounds(mem_space_id, mem_sel_start, mem_sel_end) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get bounding box for memory selection") + + /* Calculate the adjustment for memory selection from the file selection */ + for (i = 0; i < (size_t) fspace_ndims; i++) { + H5_CHECK_OVERFLOW(file_sel_start[i], hsize_t, hssize_t); + H5_CHECK_OVERFLOW(mem_sel_start[i], hsize_t, hssize_t); + chunk_file_space_adjust[i] = (hssize_t) file_sel_start[i] - (hssize_t) mem_sel_start[i]; + } /* end for */ + + if (NULL == (_chunk_info = (H5VL_rados_select_chunk_info_t *) H5MM_malloc(H5VL_RADOS_DEFAULT_NUM_SEL_CHUNKS * sizeof(*_chunk_info)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for selected chunk info buffer") + info_buf_alloced = H5VL_RADOS_DEFAULT_NUM_SEL_CHUNKS * sizeof(*_chunk_info); + + /* Calculate the coordinates for the initial chunk */ + for (i = 0; i < (size_t) fspace_ndims; i++) { + start_coords[i] = selection_start_coords[i] = (file_sel_start[i] / chunk_dims[i]) * chunk_dims[i]; + end_coords[i] = (start_coords[i] + chunk_dims[i]) - 1; + } /* end for */ + + if (NULL == (fspace = (H5S_t *) H5I_object_verify(file_space_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + if (NULL == (mspace = (H5S_t *) H5I_object_verify(mem_space_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + if (FAIL == (space_same_shape = H5S_select_shape_same(fspace, mspace))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Iterate through each "chunk" in the dataset */ + for (i = 0; num_sel_points_cast;) { + /* Check for intersection of file selection and "chunk". If there is + * an intersection, set up a valid memory and file space for the chunk. */ + if (TRUE == H5S_hyper_intersect_block(fspace, start_coords, end_coords)) { + hssize_t chunk_mem_space_adjust[H5O_LAYOUT_NDIMS]; + hssize_t chunk_sel_npoints; + hid_t tmp_chunk_fspace_id; + H5S_t *tmp_chunk_fspace = NULL; + + /* Re-allocate selected chunk info buffer if necessary */ + while (i > (info_buf_alloced / sizeof(*_chunk_info)) - 1) { + if (NULL == (_chunk_info = (H5VL_rados_select_chunk_info_t *) H5MM_realloc(_chunk_info, 2 * info_buf_alloced))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't reallocate space for selected chunk info buffer") + info_buf_alloced *= 2; + } /* end while */ + + /* + * Set up the file Dataspace for this chunk. + */ + + /* Create temporary chunk for selection operations */ + if ((tmp_chunk_fspace_id = H5Scopy(file_space_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOPY, FAIL, "unable to copy file space") + + if (NULL == (tmp_chunk_fspace = (H5S_t *) H5I_object_verify(tmp_chunk_fspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Make certain selections are stored in span tree form (not "optimized hyperslab" or "all") */ + if (H5S_hyper_convert(tmp_chunk_fspace) < 0) { + (void) H5S_close(tmp_chunk_fspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to convert selection to span trees") + } /* end if */ + + /* "AND" temporary chunk and current chunk */ + if (H5S_select_hyperslab(tmp_chunk_fspace, H5S_SELECT_AND, start_coords, NULL, chunk_dims, NULL) < 0) { + (void) H5S_close(tmp_chunk_fspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSELECT, FAIL, "can't create chunk selection") + } /* end if */ + + /* Resize chunk's dataspace dimensions to size of chunk */ + if (H5S_set_extent_real(tmp_chunk_fspace, chunk_dims) < 0) { + (void) H5S_close(tmp_chunk_fspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSELECT, FAIL, "can't adjust chunk dimensions") + } /* end if */ + + /* Move selection back to have correct offset in chunk */ + if (H5S_SELECT_ADJUST_U(tmp_chunk_fspace, start_coords) < 0) { + (void) H5S_close(tmp_chunk_fspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSELECT, FAIL, "can't adjust chunk selection") + } /* end if */ + + /* Copy the chunk's coordinates to the selected chunk info buffer */ + HDmemcpy(_chunk_info[i].chunk_coords, start_coords, (size_t) fspace_ndims * sizeof(hsize_t)); + + _chunk_info[i].fspace_id = tmp_chunk_fspace_id; + + /* + * Now set up the memory Dataspace for this chunk. + */ + if (space_same_shape) { + hid_t tmp_chunk_mspace_id; + H5S_t *tmp_chunk_mspace = NULL; + + if ((tmp_chunk_mspace_id = H5Scopy(mem_space_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOPY, FAIL, "unable to copy memory space") + + if (NULL == (tmp_chunk_mspace = (H5S_t *) H5I_object_verify(tmp_chunk_mspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Release the current selection */ + if (H5S_SELECT_RELEASE(tmp_chunk_mspace) < 0) { + (void) H5S_close(tmp_chunk_mspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection") + } /* end if */ + + /* Copy the chunk's file space selection to its memory space selection */ + if (H5S_select_copy(tmp_chunk_mspace, tmp_chunk_fspace, FALSE) < 0) { + (void) H5S_close(tmp_chunk_mspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOPY, FAIL, "unable to copy selection") + } /* end if */ + + /* Compute the adjustment for the chunk */ + for (j = 0; j < (size_t) fspace_ndims; j++) { + H5_CHECK_OVERFLOW(_chunk_info[i].chunk_coords[j], hsize_t, hssize_t); + chunk_mem_space_adjust[j] = chunk_file_space_adjust[j] - (hssize_t) _chunk_info[i].chunk_coords[j]; + } /* end for */ + + /* Adjust the selection */ + if (H5S_hyper_adjust_s(tmp_chunk_mspace, chunk_mem_space_adjust) < 0) { + (void) H5S_close(tmp_chunk_mspace); + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSELECT, FAIL, "can't adjust chunk memory space selection") + } /* end if */ + + _chunk_info[i].mspace_id = tmp_chunk_mspace_id; + } /* end if */ + else { + HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, FAIL, "file and memory selections must currently have the same shape") + } /* end else */ + + i++; + + /* Determine if there are more chunks to process */ + if ((chunk_sel_npoints = H5S_GET_SELECT_NPOINTS(tmp_chunk_fspace)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get number of points selected in chunk file space") + + num_sel_points_cast -= (hsize_t) chunk_sel_npoints; + + if (num_sel_points_cast == 0) + HGOTO_DONE(SUCCEED) + } /* end if */ + + /* Set current increment dimension */ + increment_dim = fspace_ndims - 1; + + /* Increment chunk location in fastest changing dimension */ + H5_CHECK_OVERFLOW(chunk_dims[increment_dim], hsize_t, hssize_t); + start_coords[increment_dim] += chunk_dims[increment_dim]; + end_coords[increment_dim] += chunk_dims[increment_dim]; + + /* Bring chunk location back into bounds, if necessary */ + if (start_coords[increment_dim] > file_sel_end[increment_dim]) { + do { + /* Reset current dimension's location to 0 */ + start_coords[increment_dim] = selection_start_coords[increment_dim]; + end_coords[increment_dim] = (start_coords[increment_dim] + chunk_dims[increment_dim]) - 1; + + /* Decrement current dimension */ + increment_dim--; + + /* Increment chunk location in current dimension */ + start_coords[increment_dim] += chunk_dims[increment_dim]; + end_coords[increment_dim] = (start_coords[increment_dim] + chunk_dims[increment_dim]) - 1; + } while (start_coords[increment_dim] > file_sel_end[increment_dim]); + } /* end if */ + } /* end for */ + +done: + if (ret_value >= 0) { + *chunk_info = _chunk_info; + *chunk_info_len = i; + } /* end if */ + else { + if (_chunk_info) + H5MM_free(_chunk_info); + } /* end else */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_get_selected_chunk_info() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_build_io_op_merge + * + * Purpose: RADOSINC + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_build_io_op_merge(H5S_t *mem_space, H5S_t *file_space, + size_t type_size, size_t tot_nelem, void *rbuf, const void *wbuf, + rados_read_op_t read_op, rados_write_op_t write_op) +{ + H5S_sel_iter_t mem_sel_iter; /* Selection iteration info */ + hbool_t mem_sel_iter_init = FALSE; /* Selection iteration info has been initialized */ + H5S_sel_iter_t file_sel_iter; /* Selection iteration info */ + hbool_t file_sel_iter_init = FALSE; /* Selection iteration info has been initialized */ + size_t mem_nseq = 0; + size_t file_nseq = 0; + size_t nelem; + hsize_t mem_off[H5VL_RADOS_SEQ_LIST_LEN]; + size_t mem_len[H5VL_RADOS_SEQ_LIST_LEN]; + hsize_t file_off[H5VL_RADOS_SEQ_LIST_LEN]; + size_t file_len[H5VL_RADOS_SEQ_LIST_LEN]; + size_t io_len; + size_t tot_len = tot_nelem * type_size; + size_t mem_i = 0; + size_t file_i = 0; + size_t mem_ei = 0; + size_t file_ei = 0; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(!rbuf != !wbuf); + HDassert(tot_nelem > 0); + + /* Initialize selection iterators */ + if(H5S_select_iter_init(&mem_sel_iter, mem_space, type_size) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + mem_sel_iter_init = TRUE; /* Selection iteration info has been initialized */ + if(H5S_select_iter_init(&file_sel_iter, file_space, type_size) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + file_sel_iter_init = TRUE; /* Selection iteration info has been initialized */ + + /* Generate sequences from the file space until finished */ + do { + /* Get the sequences of bytes if necessary */ + HDassert(mem_i <= mem_nseq); + if(mem_i == mem_nseq) { + if(H5S_SELECT_GET_SEQ_LIST(mem_space, 0, &mem_sel_iter, (size_t)H5VL_RADOS_SEQ_LIST_LEN, (size_t)-1, &mem_nseq, &nelem, mem_off, mem_len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + mem_i = 0; + } /* end if */ + HDassert(file_i <= file_nseq); + if(file_i == file_nseq) { + if(H5S_SELECT_GET_SEQ_LIST(file_space, 0, &file_sel_iter, (size_t)H5VL_RADOS_SEQ_LIST_LEN, (size_t)-1, &file_nseq, &nelem, file_off, file_len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + file_i = 0; + } /* end if */ + + /* Calculate number of elements to put in next merged offset/length + * pair */ + io_len = mem_len[mem_i] <= file_len[file_i] ? mem_len[mem_i] : file_len[file_i]; + + /* Add to I/O op */ + if(rbuf) + rados_read_op_read(read_op, (uint64_t)(file_off[file_i] + file_ei), + io_len, (char *)rbuf + mem_off[mem_i] + mem_ei, NULL, NULL); + else + rados_write_op_write(write_op, + (const char *)wbuf + mem_off[mem_i] + mem_ei, + io_len, (uint64_t)(file_off[file_i] + file_ei)); + + /* Update indices */ + if(io_len == mem_len[mem_i]) { + mem_i++; + mem_ei = 0; + } /* end if */ + else { + HDassert(mem_len[mem_i] > io_len); + mem_len[mem_i] -= io_len; + mem_ei += io_len; + } /* end else */ + if(io_len == file_len[file_i]) { + file_i++; + file_ei = 0; + } /* end if */ + else { + HDassert(file_len[file_i] > io_len); + file_len[file_i] -= io_len; + file_ei += io_len; + } /* end else */ + tot_len -= io_len; + } while(tot_len > 0); + +done: + /* Release selection iterators */ + if(mem_sel_iter_init && H5S_SELECT_ITER_RELEASE(&mem_sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + if(file_sel_iter_init && H5S_SELECT_ITER_RELEASE(&file_sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_build_io_op_merge() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_build_io_op_match + * + * Purpose: RADOSINC + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_build_io_op_match(H5S_t *file_space, size_t type_size, + size_t tot_nelem, void *rbuf, const void *wbuf, rados_read_op_t read_op, + rados_write_op_t write_op) +{ + H5S_sel_iter_t sel_iter; /* Selection iteration info */ + hbool_t sel_iter_init = FALSE; /* Selection iteration info has been initialized */ + size_t nseq; + size_t nelem; + hsize_t off[H5VL_RADOS_SEQ_LIST_LEN]; + size_t len[H5VL_RADOS_SEQ_LIST_LEN]; + size_t szi; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(!rbuf != !wbuf); + HDassert(tot_nelem > 0); + + /* Initialize selection iterator */ + if(H5S_select_iter_init(&sel_iter, file_space, type_size) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + sel_iter_init = TRUE; /* Selection iteration info has been initialized */ + + /* Generate sequences from the file space until finished */ + do { + /* Get the sequences of bytes */ + if(H5S_SELECT_GET_SEQ_LIST(file_space, 0, &sel_iter, (size_t)H5VL_RADOS_SEQ_LIST_LEN, (size_t)-1, &nseq, &nelem, off, len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + tot_nelem -= nelem; + + /* Create io ops from offsets and lengths */ + if(rbuf) + for(szi = 0; szi < nseq; szi++) + rados_read_op_read(read_op, (uint64_t)off[szi], len[szi], + (char *)rbuf + off[szi], NULL, NULL); + else + for(szi = 0; szi < nseq; szi++) + rados_write_op_write(write_op, (const char *)wbuf + off[szi], + len[szi], (uint64_t)off[szi]); + } while(tot_nelem > 0); + +done: + /* Release selection iterator */ + if(sel_iter_init && H5S_SELECT_ITER_RELEASE(&sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_build_io_op_match() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_build_io_op_contig + * + * Purpose: RADOSINC + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_build_io_op_contig(H5S_t *file_space, size_t type_size, + size_t tot_nelem, void *rbuf, const void *wbuf, rados_read_op_t read_op, + rados_write_op_t write_op) +{ + H5S_sel_iter_t sel_iter; /* Selection iteration info */ + hbool_t sel_iter_init = FALSE; /* Selection iteration info has been initialized */ + size_t nseq; + size_t nelem; + hsize_t off[H5VL_RADOS_SEQ_LIST_LEN]; + size_t len[H5VL_RADOS_SEQ_LIST_LEN]; + size_t mem_off = 0; + size_t szi; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(rbuf || wbuf); + HDassert(tot_nelem > 0); + + /* Initialize selection iterator */ + if(H5S_select_iter_init(&sel_iter, file_space, type_size) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + sel_iter_init = TRUE; /* Selection iteration info has been initialized */ + + /* Generate sequences from the file space until finished */ + do { + /* Get the sequences of bytes */ + if(H5S_SELECT_GET_SEQ_LIST(file_space, 0, &sel_iter, (size_t)H5VL_RADOS_SEQ_LIST_LEN, (size_t)-1, &nseq, &nelem, off, len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + tot_nelem -= nelem; + + /* Create io ops from offsets and lengths */ + for(szi = 0; szi < nseq; szi++) { + if(rbuf) + rados_read_op_read(read_op, (uint64_t)off[szi], len[szi], + (char *)rbuf + mem_off, NULL, NULL); + if(wbuf) + rados_write_op_write(write_op, (const char *)wbuf + mem_off, + len[szi], (uint64_t)off[szi]); + mem_off += len[szi]; + } /* end for */ + } while(tot_nelem > 0); + +done: + /* Release selection iterator */ + if(sel_iter_init && H5S_SELECT_ITER_RELEASE(&sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_build_io_op_contig() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_scatter_cb + * + * Purpose: Callback function for H5Dscatter. Simply passes the + * entire buffer described by udata to H5Dscatter. + * + * Return: SUCCEED (never fails) + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_scatter_cb(const void **src_buf, size_t *src_buf_bytes_used, + void *_udata) +{ + H5VL_rados_scatter_cb_ud_t *udata = (H5VL_rados_scatter_cb_ud_t *)_udata; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + /* Set src_buf and src_buf_bytes_used to use the entire buffer */ + *src_buf = udata->buf; + *src_buf_bytes_used = udata->len; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5VL_rados_scatter_cb() */ + +#if 0 + +/*------------------------------------------------------------------------- + * Function: H5VL_daosm_dataset_mem_vl_rd_cb + * + * Purpose: H5Diterate callback for iterating over the memory space + * before reading vl data. Allocates vl read buffers, + * up scatter gather lists (sgls), and reshapes iods if + * necessary to skip empty elements. + * + * Return: Success: 0 + * Failure: -1, dataset not written. + * + * Programmer: Neil Fortner + * May, 2017 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_daosm_dataset_mem_vl_rd_cb(void *_elem, hid_t H5_ATTR_UNUSED type_id, + unsigned H5_ATTR_UNUSED ndim, const hsize_t H5_ATTR_UNUSED *point, + void *_udata) +{ + H5VL_daosm_vl_mem_ud_t *udata = (H5VL_daosm_vl_mem_ud_t *)_udata; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Set up constant sgl info */ + udata->sgls[udata->idx].sg_nr.num = 1; + udata->sgls[udata->idx].sg_nr.num_out = 0; + udata->sgls[udata->idx].sg_iovs = &udata->sg_iovs[udata->idx]; + + /* Check for empty element */ + if(udata->iods[udata->idx].iod_size == 0) { + /* Increment offset, slide down following elements */ + udata->offset++; + + /* Zero out read buffer */ + if(udata->is_vl_str) + *(char **)_elem = NULL; + else + HDmemset(_elem, 0, sizeof(hvl_t)); + } /* end if */ + else { + HDassert(udata->idx >= udata->offset); + + /* Check for vlen string */ + if(udata->is_vl_str) { + char *elem = NULL; + + /* Allocate buffer for this vl element */ + if(NULL == (elem = (char *)HDmalloc((size_t)udata->iods[udata->idx].iod_size + 1))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate vl data buffer") + *(char **)_elem = elem; + + /* Add null terminator */ + elem[udata->iods[udata->idx].iod_size] = '\0'; + + /* Set buffer location in sgl */ + daos_iov_set(&udata->sg_iovs[udata->idx - udata->offset], elem, udata->iods[udata->idx].iod_size); + } /* end if */ + else { + /* Standard vlen, find hvl_t struct for this element */ + hvl_t *elem = (hvl_t *)_elem; + + HDassert(udata->base_type_size > 0); + + /* Allocate buffer for this vl element and set size */ + elem->len = (size_t)udata->iods[udata->idx].iod_size / udata->base_type_size; + if(NULL == (elem->p = HDmalloc((size_t)udata->iods[udata->idx].iod_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate vl data buffer") + + /* Set buffer location in sgl */ + daos_iov_set(&udata->sg_iovs[udata->idx - udata->offset], elem->p, udata->iods[udata->idx].iod_size); + } /* end if */ + + /* Slide down iod if necessary */ + if(udata->offset) + udata->iods[udata->idx - udata->offset] = udata->iods[udata->idx]; + } /* end else */ + + /* Advance idx */ + udata->idx++; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_daosm_dataset_mem_vl_rd_cb() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_daosm_dataset_file_vl_cb + * + * Purpose: H5Diterate callback for iterating over the file space + * before vl data I/O. Sets up akeys and iods (except for + * iod record sizes). + * + * Return: Success: 0 + * Failure: -1, dataset not written. + * + * Programmer: Neil Fortner + * May, 2017 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_daosm_dataset_file_vl_cb(void H5_ATTR_UNUSED *_elem, + hid_t H5_ATTR_UNUSED type_id, unsigned ndim, const hsize_t *point, + void *_udata) +{ + H5VL_daosm_vl_file_ud_t *udata = (H5VL_daosm_vl_file_ud_t *)_udata; + size_t akey_len = ndim * sizeof(uint64_t); + uint64_t coordu64; + uint8_t *p; + unsigned i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Create akey for this element */ + if(NULL == (udata->akeys[udata->idx] = (uint8_t *)H5MM_malloc(akey_len))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for akey") + p = udata->akeys[udata->idx]; + for(i = 0; i < ndim; i++) { + coordu64 = (uint64_t)point[i]; + UINT64ENCODE(p, coordu64) + } /* end for */ + + /* Set up iod, size was set in memory callback or initialized in main read + * function. Use "single" records of varying size. */ + daos_iov_set(&udata->iods[udata->idx].iod_name, (void *)udata->akeys[udata->idx], (daos_size_t)akey_len); + daos_csum_set(&udata->iods[udata->idx].iod_kcsum, NULL, 0); + udata->iods[udata->idx].iod_nr = 1u; + udata->iods[udata->idx].iod_type = DAOS_IOD_SINGLE; + + /* Advance idx */ + udata->idx++; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_daosm_dataset_file_vl_cb() */ +#endif + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_read + * + * Purpose: Reads raw data from a dataset into a buffer. + *` + * Return: Success: 0 + * Failure: -1, dataset not read. + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_dataset_read(void *_dset, hid_t mem_type_id, hid_t mem_space_id, + hid_t file_space_id, hid_t dxpl_id, void *buf, void H5_ATTR_UNUSED **req) +{ + H5VL_rados_select_chunk_info_t *chunk_info = NULL; /* Array of info for each chunk selected in the file */ + H5VL_rados_dset_t *dset = (H5VL_rados_dset_t *)_dset; + H5S_sel_iter_t sel_iter; /* Selection iteration info */ + hbool_t sel_iter_init = FALSE; /* Selection iteration info has been initialized */ + int ndims; + hsize_t dim[H5S_MAX_RANK]; + hid_t real_file_space_id; + hid_t real_mem_space_id; + hssize_t num_elem; + hssize_t num_elem_chunk; + size_t chunk_info_len; + char *chunk_oid = NULL; + rados_read_op_t read_op; + hbool_t read_op_init = FALSE; + size_t file_type_size = 0; + size_t mem_type_size; + hbool_t types_equal = TRUE; + hbool_t need_bkg = FALSE; + hbool_t fill_bkg = FALSE; + //hid_t base_type_id = FAIL; + //size_t base_type_size = 0; + void *tmp_tconv_buf = NULL; + void *tmp_bkg_buf = NULL; + void *tconv_buf; + void *bkg_buf; + //H5T_class_t type_class; + //hbool_t is_vl = FALSE; + //htri_t is_vl_str = FALSE; + hbool_t close_spaces = FALSE; + H5VL_rados_tconv_reuse_t reuse = H5VL_RADOS_TCONV_REUSE_NONE; + int ret; + uint64_t i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Get dataspace extent */ + if((ndims = H5Sget_simple_extent_ndims(dset->space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of dimensions") + if(ndims != H5Sget_simple_extent_dims(dset->space_id, dim, NULL)) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dimensions") + + /* Get "real" file space */ + if(file_space_id == H5S_ALL) + real_file_space_id = dset->space_id; + else + real_file_space_id = file_space_id; + + /* Get number of elements in selection */ + if((num_elem = H5Sget_select_npoints(real_file_space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") + + /* Get "real" file space */ + if(mem_space_id == H5S_ALL) + real_mem_space_id = real_file_space_id; + else { + hssize_t num_elem_file; + + real_mem_space_id = mem_space_id; + + /* Verify number of elements in memory selection matches file selection + */ + if((num_elem_file = H5Sget_select_npoints(real_mem_space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") + if(num_elem_file != num_elem) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "src and dest data spaces have different sizes") + } /* end else */ + + /* Check for no selection */ + if(num_elem == 0) + HGOTO_DONE(SUCCEED) + +#if 0 + /* Check for vlen */ + if(H5T_NO_CLASS == (type_class = H5Tget_class(mem_type_id))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype class") + if(type_class == H5T_VLEN) { + is_vl = TRUE; + + /* Calculate base type size */ + if((base_type_id = H5Tget_super(mem_type_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype base type") + if(0 == (base_type_size = H5Tget_size(base_type_id))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype base type size") + } /* end if */ + else if(type_class == H5T_STRING) { + /* check for vlen string */ + if((is_vl_str = H5Tis_variable_str(mem_type_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check for variable length string") + if(is_vl_str) + is_vl = TRUE; + } /* end if */ + else +#endif + { + /* Initialize type conversion */ + if(H5VL_rados_tconv_init(dset->type_id, &file_type_size, mem_type_id, &mem_type_size, &types_equal, &reuse, &need_bkg, &fill_bkg) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize type conversion") + } /* end else */ + + /* Check if the dataset actually has a chunked storage layout. If it does not, simply + * set up the dataset as a single "chunk". + */ + switch(H5Pget_layout(dset->dcpl_id)) { + case H5D_COMPACT: + case H5D_CONTIGUOUS: + if (NULL == (chunk_info = (H5VL_rados_select_chunk_info_t *)H5MM_malloc(sizeof(H5VL_rados_select_chunk_info_t)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate single chunk info buffer") + chunk_info_len = 1; + + /* Set up "single-chunk dataset", with the "chunk" starting at coordinate 0 */ + chunk_info->fspace_id = real_file_space_id; + chunk_info->mspace_id = real_mem_space_id; + HDmemset(chunk_info->chunk_coords, 0, sizeof(chunk_info->chunk_coords)); + + break; + + case H5D_CHUNKED: +// if(is_vl) +// HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "vlen types are currently unsupported with chunking") + + /* Get the coordinates of the currently selected chunks in the file, setting up memory and file dataspaces for them */ + if(H5VL_rados_get_selected_chunk_info(dset->dcpl_id, real_file_space_id, real_mem_space_id, &chunk_info, &chunk_info_len) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get selected chunk info") + + close_spaces = TRUE; + + break; + default: + HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "invalid, unknown or unsupported dataset storage layout type") + } /* end switch */ + + /* Get number of elements in a chunk */ + if((num_elem_chunk = H5Sget_simple_extent_npoints(chunk_info[0].fspace_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in chunk") + + /* Iterate through each of the "chunks" in the dataset */ + for(i = 0; i < chunk_info_len; i++) { + /* Create read op */ + read_op = rados_create_read_op(); + read_op_init = TRUE; + + /* Create chunk key */ + if(H5VL_rados_oid_create_chunk(dset->obj.item.file, dset->obj.bin_oid, ndims, + chunk_info[i].chunk_coords, &chunk_oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't create dataset chunk oid") + + /* Get number of elements in selection */ + if((num_elem = H5Sget_select_npoints(chunk_info[i].mspace_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") +#if 0 + /* Check for variable length */ + if(is_vl) { + H5VL_daosm_vl_mem_ud_t mem_ud; + H5VL_daosm_vl_file_ud_t file_ud; + + /* Get number of elements in selection */ + if((num_elem = H5Sget_select_npoints(real_mem_space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") + + /* Allocate array of akey pointers */ + if(NULL == (akeys = (uint8_t **)H5MM_calloc((size_t)num_elem * sizeof(uint8_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for akey array") + + /* Allocate array of iods */ + if(NULL == (iods = (daos_iod_t *)H5MM_calloc((size_t)num_elem * sizeof(daos_iod_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for I/O descriptor array") + + /* Fill in size fields of iod as DAOS_REC_ANY so we can read the vl + * sizes */ + for(i = 0; i < (uint64_t)num_elem; i++) + iods[i].iod_size = DAOS_REC_ANY; + + /* Iterate over file selection. Note the bogus buffer and type_id, + * these don't matter since the "elem" parameter of the callback is not + * used. */ + file_ud.akeys = akeys; + file_ud.iods = iods; + file_ud.idx = 0; + if(H5Diterate((void *)buf, mem_type_id, real_file_space_id, H5VL_daosm_dataset_file_vl_cb, &file_ud) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, FAIL, "file selection iteration failed") + HDassert(file_ud.idx == (uint64_t)num_elem); + + /* Read vl sizes from dataset */ + /* Note cast to unsigned reduces width to 32 bits. Should eventually + * check for overflow and iterate over 2^32 size blocks */ + if(0 != (ret = daos_obj_fetch(dset->obj.obj_oh, dset->obj.item.file->epoch, &dkey, (unsigned)num_elem, iods, NULL, NULL /*maps*/, NULL /*event*/))) + HGOTO_ERROR(H5E_ATTR, H5E_READERROR, FAIL, "can't read vl data sizes from dataset: %d", ret) + + /* Allocate array of sg_iovs */ + if(NULL == (sg_iovs = (daos_iov_t *)H5MM_malloc((size_t)num_elem * sizeof(daos_iov_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for scatter gather list") + + /* Allocate array of sgls */ + if(NULL == (sgls = (daos_sg_list_t *)H5MM_malloc((size_t)num_elem * sizeof(daos_sg_list_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for scatter gather list array") + + /* Iterate over memory selection */ + mem_ud.iods = iods; + mem_ud.sgls = sgls; + mem_ud.sg_iovs = sg_iovs; + mem_ud.is_vl_str = is_vl_str; + mem_ud.base_type_size = base_type_size; + mem_ud.offset = 0; + mem_ud.idx = 0; + if(H5Diterate((void *)buf, mem_type_id, real_mem_space_id, H5VL_daosm_dataset_mem_vl_rd_cb, &mem_ud) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, FAIL, "memory selection iteration failed") + HDassert(mem_ud.idx == (uint64_t)num_elem); + + /* Read data from dataset */ + /* Note cast to unsigned reduces width to 32 bits. Should eventually + * check for overflow and iterate over 2^32 size blocks */ + if(0 != (ret = daos_obj_fetch(dset->obj.obj_oh, dset->obj.item.file->epoch, &dkey, (unsigned)((uint64_t)num_elem - mem_ud.offset), iods, sgls, NULL /*maps*/, NULL /*event*/))) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data from dataset: %d", ret) + } /* end if */ + else +#endif + { + H5S_t *chunk_fspace = NULL; + H5S_t *chunk_mspace = NULL; + htri_t match_select = FALSE; + + /* Get file dataspace object */ + if(NULL == (chunk_fspace = (H5S_t *) H5I_object_verify(chunk_info[i].fspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Check if the types are equal */ + if(types_equal) { + /* No type conversion necessary */ + /* Check if we should match the file and memory sequence lists + * (serialized selections). We can do this if the memory space + * is H5S_ALL and the chunk extent equals the file extent. If + * the number of chunks selected is more than one we do not need + * to check the extents because they cannot be the same. We + * could also allow the case where the memory space is not + * H5S_ALL but is equivalent. */ + if(mem_space_id == H5S_ALL && chunk_info_len == 1) + if((match_select = H5Sextent_equal(real_file_space_id, chunk_info[i].fspace_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOMPARE, FAIL, "can't check if file and chunk dataspaces are equal") + + /* Check for matching selections */ + if(match_select) { + /* Build read op from file space */ + if(H5VL_rados_build_io_op_match(chunk_fspace, file_type_size, (size_t)num_elem, buf, NULL, read_op, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS read op") + } /* end if */ + else { + /* Get memory dataspace object */ + if(NULL == (chunk_mspace = (H5S_t *) H5I_object_verify(chunk_info[i].mspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Build read op from file space and mem space */ + if(H5VL_rados_build_io_op_merge(chunk_mspace, chunk_fspace, file_type_size, (size_t)num_elem, buf, NULL, read_op, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS read op") + } /* end else */ + + /* Read data from dataset */ + if((ret = rados_read_op_operate(read_op, ioctx_g, chunk_oid, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data from dataset: %s", strerror(-ret)) + } /* end if */ + else { + size_t nseq_tmp; + size_t nelem_tmp; + hsize_t sel_off; + size_t sel_len; + hbool_t contig; + + /* Type conversion necessary */ + /* Get memory dataspace object */ + if(NULL == (chunk_mspace = (H5S_t *) H5I_object_verify(chunk_info[i].mspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Check for contiguous memory buffer */ + + /* Initialize selection iterator */ + if(H5S_select_iter_init(&sel_iter, chunk_mspace, (size_t)1) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + sel_iter_init = TRUE; /* Selection iteration info has been initialized */ + + /* Get the sequence list - only check the first sequence because we only + * care if it is contiguous and if so where the contiguous selection + * begins */ + if(H5S_SELECT_GET_SEQ_LIST(chunk_mspace, 0, &sel_iter, (size_t)1, (size_t)-1, &nseq_tmp, &nelem_tmp, &sel_off, &sel_len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + contig = (sel_len == (size_t)num_elem); + sel_off *= (hsize_t)mem_type_size; + + /* Release selection iterator */ + if(H5S_SELECT_ITER_RELEASE(&sel_iter) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + sel_iter_init = FALSE; + + /* Find or allocate usable type conversion buffer */ + if(contig && (reuse == H5VL_RADOS_TCONV_REUSE_TCONV)) + tconv_buf = (char *)buf + (size_t)sel_off; + else { + if(!tmp_tconv_buf) + if(NULL == (tmp_tconv_buf = H5MM_malloc( + (size_t)num_elem_chunk * (file_type_size + > mem_type_size ? file_type_size + : mem_type_size)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate type conversion buffer") + tconv_buf = tmp_tconv_buf; + } /* end else */ + + /* Find or allocate usable background buffer */ + if(need_bkg) { + if(contig && (reuse == H5VL_RADOS_TCONV_REUSE_BKG)) + bkg_buf = (char *)buf + (size_t)sel_off; + else { + if(!tmp_bkg_buf) + if(NULL == (tmp_bkg_buf = H5MM_malloc( + (size_t)num_elem_chunk * mem_type_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate background buffer") + bkg_buf = tmp_bkg_buf; + } /* end else */ + } /* end if */ + else + bkg_buf = NULL; + + /* Build read op from file space */ + if(H5VL_rados_build_io_op_contig(chunk_fspace, file_type_size, (size_t)num_elem, tconv_buf, NULL, read_op, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS write op") + + /* Read data from dataset */ + if((ret = rados_read_op_operate(read_op, ioctx_g, chunk_oid, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data from dataset: %s", strerror(-ret)) + + /* Gather data to background buffer if necessary */ + if(fill_bkg && (bkg_buf == tmp_bkg_buf)) + if(H5Dgather(chunk_info[i].mspace_id, buf, mem_type_id, (size_t)num_elem * mem_type_size, bkg_buf, NULL, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't gather data to background buffer") + + /* Perform type conversion */ + if(H5Tconvert(dset->type_id, mem_type_id, (size_t)num_elem, tconv_buf, bkg_buf, dxpl_id) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL, "can't perform type conversion") + + /* Scatter data to memory buffer if necessary */ + if(tconv_buf == tmp_tconv_buf) { + H5VL_rados_scatter_cb_ud_t scatter_cb_ud; + + scatter_cb_ud.buf = tconv_buf; + scatter_cb_ud.len = (size_t)num_elem * mem_type_size; + if(H5Dscatter(H5VL_rados_scatter_cb, &scatter_cb_ud, mem_type_id, chunk_info[i].mspace_id, buf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't scatter data to read buffer") + } /* end if */ + } /* end else */ + } /* end else */ + + rados_release_read_op(read_op); + read_op_init = FALSE; + } /* end for */ + +done: + /* Free memory */ + if(read_op_init) + rados_release_read_op(read_op); + H5MM_xfree(chunk_oid); + H5MM_xfree(tmp_tconv_buf); + H5MM_xfree(tmp_bkg_buf); + + /*if(base_type_id != FAIL) + if(H5I_dec_app_ref(base_type_id) < 0) + HDONE_ERROR(H5E_ATTR, H5E_CLOSEERROR, FAIL, "can't close base type id")*/ + + if(chunk_info) { + if(close_spaces) { + for(i = 0; i < chunk_info_len; i++) { + if(H5Sclose(chunk_info[i].mspace_id) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTCLOSEOBJ, FAIL, "can't close memory space"); + if(H5Sclose(chunk_info[i].fspace_id) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTCLOSEOBJ, FAIL, "can't close file space"); + } /* end for */ + } /* end if */ + + H5MM_free(chunk_info); + } /* end if */ + + /* Release selection iterator */ + if(sel_iter_init && H5S_SELECT_ITER_RELEASE(&sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_read() */ + +#if 0 + +/*------------------------------------------------------------------------- + * Function: H5VL_daosm_dataset_mem_vl_wr_cb + * + * Purpose: H5Diterate callback for iterating over the memory space + * before writing vl data. Sets up scatter gather lists + * (sgls) and sets the record sizes in iods. + * + * Return: Success: 0 + * Failure: -1, dataset not written. + * + * Programmer: Neil Fortner + * May, 2017 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_daosm_dataset_mem_vl_wr_cb(void *_elem, hid_t H5_ATTR_UNUSED type_id, + unsigned H5_ATTR_UNUSED ndim, const hsize_t H5_ATTR_UNUSED *point, + void *_udata) +{ + H5VL_daosm_vl_mem_ud_t *udata = (H5VL_daosm_vl_mem_ud_t *)_udata; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + /* Set up constant sgl info */ + udata->sgls[udata->idx].sg_nr.num = 1; + udata->sgls[udata->idx].sg_nr.num_out = 0; + udata->sgls[udata->idx].sg_iovs = &udata->sg_iovs[udata->idx]; + + /* Check for vlen string */ + if(udata->is_vl_str) { + /* Find string for this element */ + char *elem = *(char **)_elem; + + /* Set string length in iod and buffer location in sgl. If we are + * writing an empty string ("\0"), increase the size by one to + * differentiate it from NULL strings. Note that this will cause the + * read buffer to be one byte longer than it needs to be in this case. + * This should not cause any ill effects. */ + if(elem) { + udata->iods[udata->idx].iod_size = (daos_size_t)HDstrlen(elem); + if(udata->iods[udata->idx].iod_size == 0) + udata->iods[udata->idx].iod_size = 1; + daos_iov_set(&udata->sg_iovs[udata->idx], (void *)elem, udata->iods[udata->idx].iod_size); + } /* end if */ + else { + udata->iods[udata->idx].iod_size = 0; + daos_iov_set(&udata->sg_iovs[udata->idx], NULL, 0); + } /* end else */ + } /* end if */ + else { + /* Standard vlen, find hvl_t struct for this element */ + hvl_t *elem = (hvl_t *)_elem; + + HDassert(udata->base_type_size > 0); + + /* Set buffer length in iod and buffer location in sgl */ + if(elem->len > 0) { + udata->iods[udata->idx].iod_size = (daos_size_t)(elem->len * udata->base_type_size); + daos_iov_set(&udata->sg_iovs[udata->idx], (void *)elem->p, udata->iods[udata->idx].iod_size); + } /* end if */ + else { + udata->iods[udata->idx].iod_size = 0; + daos_iov_set(&udata->sg_iovs[udata->idx], NULL, 0); + } /* end else */ + } /* end else */ + + /* Advance idx */ + udata->idx++; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5VL_daosm_dataset_mem_vl_wr_cb() */ +#endif + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_write + * + * Purpose: Writes raw data from a buffer into a dataset. + * + * Return: Success: 0 + * Failure: -1, dataset not written. + * + * Programmer: Neil Fortner + * March, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_dataset_write(void *_dset, hid_t mem_type_id, hid_t mem_space_id, + hid_t file_space_id, hid_t H5_ATTR_UNUSED dxpl_id, + const void *buf, void H5_ATTR_UNUSED **req) +{ + H5VL_rados_select_chunk_info_t *chunk_info = NULL; /* Array of info for each chunk selected in the file */ + H5VL_rados_dset_t *dset = (H5VL_rados_dset_t *)_dset; + int ndims; + hsize_t dim[H5S_MAX_RANK]; + hid_t real_file_space_id; + hid_t real_mem_space_id; + hssize_t num_elem; + hssize_t num_elem_chunk; + size_t chunk_info_len; + char *chunk_oid = NULL; + rados_write_op_t write_op; + hbool_t write_op_init = FALSE; + rados_read_op_t read_op; + hbool_t read_op_init = FALSE; + size_t file_type_size; + size_t mem_type_size; + hbool_t types_equal = TRUE; + hbool_t need_bkg = FALSE; + hbool_t fill_bkg = FALSE; + //hid_t base_type_id = FAIL; + //size_t base_type_size = 0; + void *tconv_buf = NULL; + void *bkg_buf = NULL; + //H5T_class_t type_class; + //hbool_t is_vl = FALSE; + //uhtri_t is_vl_str = FALSE; + hbool_t close_spaces = FALSE; + H5VL_rados_tconv_reuse_t reuse = H5VL_RADOS_TCONV_REUSE_NONE; + int ret; + uint64_t i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + /* Check for write access */ + if(!(dset->obj.item.file->flags & H5F_ACC_RDWR)) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "no write intent on file") + + /* Get dataspace extent */ + if((ndims = H5Sget_simple_extent_ndims(dset->space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of dimensions") + if(ndims != H5Sget_simple_extent_dims(dset->space_id, dim, NULL)) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dimensions") + + /* Get "real" file space */ + if(file_space_id == H5S_ALL) + real_file_space_id = dset->space_id; + else + real_file_space_id = file_space_id; + + /* Get number of elements in selection */ + if((num_elem = H5Sget_select_npoints(real_file_space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") + + /* Get "real" file space */ + if(mem_space_id == H5S_ALL) + real_mem_space_id = real_file_space_id; + else { + hssize_t num_elem_file; + + real_mem_space_id = mem_space_id; + + /* Verify number of elements in memory selection matches file selection + */ + if((num_elem_file = H5Sget_select_npoints(real_mem_space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") + if(num_elem_file != num_elem) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "src and dest data spaces have different sizes") + } /* end else */ + + /* Check for no selection */ + if(num_elem == 0) + HGOTO_DONE(SUCCEED) +#if 0 + /* Check for vlen */ + if(H5T_NO_CLASS == (type_class = H5Tget_class(mem_type_id))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype class") + if(type_class == H5T_VLEN) { + is_vl = TRUE; + + /* Calculate base type size */ + if((base_type_id = H5Tget_super(mem_type_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype base type") + if(0 == (base_type_size = H5Tget_size(base_type_id))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype base type size") + } /* end if */ + else if(type_class == H5T_STRING) { + /* check for vlen string */ + if((is_vl_str = H5Tis_variable_str(mem_type_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check for variable length string") + if(is_vl_str) + is_vl = TRUE; + } /* end if */ + else +#endif + { + /* Initialize type conversion */ + if(H5VL_rados_tconv_init(dset->type_id, &file_type_size, mem_type_id, &mem_type_size, &types_equal, &reuse, &need_bkg, &fill_bkg) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize type conversion") + } /* end else */ + + /* Check if the dataset actually has a chunked storage layout. If it does not, simply + * set up the dataset as a single "chunk". + */ + switch(H5Pget_layout(dset->dcpl_id)) { + case H5D_COMPACT: + case H5D_CONTIGUOUS: + if (NULL == (chunk_info = (H5VL_rados_select_chunk_info_t *)H5MM_malloc(sizeof(H5VL_rados_select_chunk_info_t)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate single chunk info buffer") + chunk_info_len = 1; + + /* Set up "single-chunk dataset", with the "chunk" starting at coordinate 0 */ + chunk_info->fspace_id = real_file_space_id; + chunk_info->mspace_id = real_mem_space_id; + HDmemset(chunk_info->chunk_coords, 0, sizeof(chunk_info->chunk_coords)); + + break; + + case H5D_CHUNKED: +// if(is_vl) +// HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "vlen types are currently unsupported with chunking") + + /* Get the coordinates of the currently selected chunks in the file, setting up memory and file dataspaces for them */ + if(H5VL_rados_get_selected_chunk_info(dset->dcpl_id, real_file_space_id, real_mem_space_id, &chunk_info, &chunk_info_len) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get selected chunk info") + + close_spaces = TRUE; + + break; + default: + HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "invalid, unknown or unsupported dataset storage layout type") + } /* end switch */ + + /* Get number of elements in a chunk */ + if((num_elem_chunk = H5Sget_simple_extent_npoints(chunk_info[0].fspace_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in chunk") + + /* Allocate tconv_buf if necessary */ + if(!types_equal) + if(NULL == (tconv_buf = H5MM_malloc( (size_t)num_elem_chunk + * (file_type_size > mem_type_size ? file_type_size + : mem_type_size)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate type conversion buffer") + + /* Allocate bkg_buf if necessary */ + if(need_bkg) + if(NULL == (bkg_buf = H5MM_malloc((size_t)num_elem_chunk + * mem_type_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate background buffer") + + /* Iterate through each of the "chunks" in the dataset */ + for(i = 0; i < chunk_info_len; i++) { + /* Create write op */ + write_op = rados_create_write_op(); + write_op_init = TRUE; + + /* Create chunk key */ + if(H5VL_rados_oid_create_chunk(dset->obj.item.file, dset->obj.bin_oid, ndims, + chunk_info[i].chunk_coords, &chunk_oid) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't create dataset chunk oid") + + /* Get number of elements in selection */ + if((num_elem = H5Sget_select_npoints(chunk_info[i].mspace_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get number of points in selection") +#if 0 + /* Check for variable length */ + if(is_vl) { + H5VL_daosm_vl_mem_ud_t mem_ud; + H5VL_daosm_vl_file_ud_t file_ud; + + /* Allocate array of akey pointers */ + if(NULL == (akeys = (uint8_t **)H5MM_calloc((size_t)num_elem * sizeof(uint8_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for akey array") + + /* Allocate array of iods */ + if(NULL == (iods = (daos_iod_t *)H5MM_calloc((size_t)num_elem * sizeof(daos_iod_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for I/O descriptor array") + + /* Allocate array of sg_iovs */ + if(NULL == (sg_iovs = (daos_iov_t *)H5MM_malloc((size_t)num_elem * sizeof(daos_iov_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for scatter gather list") + + /* Allocate array of sgls */ + if(NULL == (sgls = (daos_sg_list_t *)H5MM_malloc((size_t)num_elem * sizeof(daos_sg_list_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate buffer for scatter gather list array") + + /* Iterate over memory selection */ + mem_ud.iods = iods; + mem_ud.sgls = sgls; + mem_ud.sg_iovs = sg_iovs; + mem_ud.is_vl_str = is_vl_str; + mem_ud.base_type_size = base_type_size; + mem_ud.idx = 0; + if(H5Diterate((void *)buf, mem_type_id, real_mem_space_id, H5VL_daosm_dataset_mem_vl_wr_cb, &mem_ud) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, FAIL, "memory selection iteration failed") + HDassert(mem_ud.idx == (uint64_t)num_elem); + + /* Iterate over file selection. Note the bogus buffer and type_id, + * these don't matter since the "elem" parameter of the callback is not + * used. */ + file_ud.akeys = akeys; + file_ud.iods = iods; + file_ud.idx = 0; + if(H5Diterate((void *)buf, mem_type_id, real_file_space_id, H5VL_daosm_dataset_file_vl_cb, &file_ud) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADITER, FAIL, "file selection iteration failed") + HDassert(file_ud.idx == (uint64_t)num_elem); + + /* Write data to dataset */ + /* Note cast to unsigned reduces width to 32 bits. Should eventually + * check for overflow and iterate over 2^32 size blocks */ + if(0 != (ret = daos_obj_update(dset->obj.obj_oh, dset->obj.item.file->epoch, &dkey, (unsigned)num_elem, iods, sgls, NULL /*event*/))) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "can't write data to dataset: %d", ret) + } /* end if */ + else +#endif + { + H5S_t *chunk_fspace = NULL; + H5S_t *chunk_mspace = NULL; + htri_t match_select = FALSE; + + /* Get file dataspace object */ + if(NULL == (chunk_fspace = (H5S_t *) H5I_object_verify(chunk_info[i].fspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Check if the types are equal */ + if(types_equal) { + /* No type conversion necessary */ + /* Check if we should match the file and memory sequence lists + * (serialized selections). We can do this if the memory space + * is H5S_ALL and the chunk extent equals the file extent. If + * the number of chunks selected is more than one we do not need + * to check the extents because they cannot be the same. We + * could also allow the case where the memory space is not + * H5S_ALL but is equivalent. */ + if(mem_space_id == H5S_ALL && chunk_info_len == 1) + if((match_select = H5Sextent_equal(real_file_space_id, chunk_info[i].fspace_id)) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCOMPARE, FAIL, "can't check if file and chunk dataspaces are equal") + + /* Check for matching selections */ + if(match_select) { + /* Build write op from file space */ + if(H5VL_rados_build_io_op_match(chunk_fspace, file_type_size, (size_t)num_elem, NULL, buf, NULL, write_op) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS write op") + } /* end if */ + else { + /* Get memory dataspace object */ + if(NULL == (chunk_mspace = (H5S_t *)H5I_object_verify(chunk_info[i].mspace_id, H5I_DATASPACE))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "not a dataspace") + + /* Build write op from file space and mem space */ + if(H5VL_rados_build_io_op_merge(chunk_mspace, chunk_fspace, file_type_size, (size_t)num_elem, NULL, buf, NULL, write_op) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS write op") + } /* end else */ + } /* end if */ + else { + /* Type conversion necessary */ + /* Check if we need to fill background buffer */ + if(fill_bkg) { + HDassert(bkg_buf); + + /* Create read op */ + read_op = rados_create_read_op(); + read_op_init = TRUE; + + /* Build io ops (to read to bkg_buf and write from tconv_buf) + * from file space */ + if(H5VL_rados_build_io_op_contig(chunk_fspace, file_type_size, (size_t)num_elem, bkg_buf, tconv_buf, read_op, write_op) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS write op") + + /* Read data from dataset to background buffer */ + if((ret = rados_read_op_operate(read_op, ioctx_g, chunk_oid, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data from dataset: %s", strerror(-ret)) + + rados_release_read_op(read_op); + read_op_init = FALSE; + } /* end if */ + else + /* Build write op from file space */ + if(H5VL_rados_build_io_op_contig(chunk_fspace, file_type_size, (size_t)num_elem, NULL, tconv_buf, NULL, write_op) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't generate RADOS write op") + + /* Gather data to conversion buffer */ + if(H5Dgather(chunk_info[i].mspace_id, buf, mem_type_id, (size_t)num_elem * mem_type_size, tconv_buf, NULL, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't gather data to conversion buffer") + + /* Perform type conversion */ + if(H5Tconvert(mem_type_id, dset->type_id, (size_t)num_elem, tconv_buf, bkg_buf, dxpl_id) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL, "can't perform type conversion") + } /* end else */ + + /* Write data to dataset */ + if((ret = rados_write_op_operate(write_op, ioctx_g, chunk_oid, NULL, LIBRADOS_OPERATION_NOFLAG)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "can't write data to dataset: %s", strerror(-ret)) + } /* end else */ + + rados_release_write_op(write_op); + write_op_init = FALSE; + } /* end for */ + +done: + /* Free memory */ + if(read_op_init) + rados_release_read_op(read_op); + if(write_op_init) + rados_release_write_op(write_op); + H5MM_xfree(chunk_oid); + tconv_buf = H5MM_xfree(tconv_buf); + bkg_buf = H5MM_xfree(bkg_buf); + + /*if(base_type_id != FAIL) + if(H5I_dec_app_ref(base_type_id) < 0) + HDONE_ERROR(H5E_ATTR, H5E_CLOSEERROR, FAIL, "can't close base type id")*/ + + if(chunk_info) { + if(close_spaces) { + for(i = 0; i < chunk_info_len; i++) { + if(H5Sclose(chunk_info[i].mspace_id) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTCLOSEOBJ, FAIL, "can't close memory space"); + if(H5Sclose(chunk_info[i].fspace_id) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTCLOSEOBJ, FAIL, "can't close file space"); + } /* end for */ + } /* end if */ + + H5MM_free(chunk_info); + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_write() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_get + * + * Purpose: Gets certain information about a dataset + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5VL_rados_dataset_get(void *_dset, H5VL_dataset_get_t get_type, + hid_t H5_ATTR_UNUSED dxpl_id, void H5_ATTR_UNUSED **req, va_list arguments) +{ + H5VL_rados_dset_t *dset = (H5VL_rados_dset_t *)_dset; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + switch (get_type) { + case H5VL_DATASET_GET_DCPL: + { + hid_t *plist_id = va_arg(arguments, hid_t *); + + /* Retrieve the dataset's creation property list */ + if((*plist_id = H5Pcopy(dset->dcpl_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dset creation property list") + + break; + } /* end block */ + case H5VL_DATASET_GET_DAPL: + { + hid_t *plist_id = va_arg(arguments, hid_t *); + + /* Retrieve the dataset's access property list */ + if((*plist_id = H5Pcopy(dset->dapl_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dset access property list") + + break; + } /* end block */ + case H5VL_DATASET_GET_SPACE: + { + hid_t *ret_id = va_arg(arguments, hid_t *); + + /* Retrieve the dataset's dataspace */ + if((*ret_id = H5Scopy(dset->space_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dataspace ID of dataset"); + break; + } /* end block */ + case H5VL_DATASET_GET_SPACE_STATUS: + { + H5D_space_status_t *allocation = va_arg(arguments, H5D_space_status_t *); + + /* Retrieve the dataset's space status */ + *allocation = H5D_SPACE_STATUS_NOT_ALLOCATED; + break; + } /* end block */ + case H5VL_DATASET_GET_TYPE: + { + hid_t *ret_id = va_arg(arguments, hid_t *); + + /* Retrieve the dataset's datatype */ + if((*ret_id = H5Tcopy(dset->type_id)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get datatype ID of dataset") + break; + } /* end block */ + case H5VL_DATASET_GET_STORAGE_SIZE: + case H5VL_DATASET_GET_OFFSET: + default: + HGOTO_ERROR(H5E_VOL, H5E_UNSUPPORTED, FAIL, "can't get this type of information from dataset") + } /* end switch */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_get() */ + + +/*------------------------------------------------------------------------- + * Function: H5VL_rados_dataset_close + * + * Purpose: Closes a rados HDF5 dataset. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Neil Fortner + * April, 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5VL_rados_dataset_close(void *_dset, hid_t H5_ATTR_UNUSED dxpl_id, + void H5_ATTR_UNUSED **req) +{ + H5VL_rados_dset_t *dset = (H5VL_rados_dset_t *)_dset; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(dset); + + if(--dset->obj.item.rc == 0) { + /* Free dataset data structures */ + dset->obj.oid = H5MM_xfree(dset->obj.oid); + if(dset->type_id != FAIL && H5I_dec_app_ref(dset->type_id) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTDEC, FAIL, "failed to close datatype") + if(dset->space_id != FAIL && H5I_dec_app_ref(dset->space_id) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTDEC, FAIL, "failed to close dataspace") + if(dset->dcpl_id != FAIL && H5I_dec_app_ref(dset->dcpl_id) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTDEC, FAIL, "failed to close plist") + if(dset->dapl_id != FAIL && H5I_dec_app_ref(dset->dapl_id) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTDEC, FAIL, "failed to close plist") + dset = H5FL_FREE(H5VL_rados_dset_t, dset); + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5VL_rados_dataset_close() */ + diff --git a/src/H5VLrados.h b/src/H5VLrados.h new file mode 100644 index 0000000..24f19c1 --- /dev/null +++ b/src/H5VLrados.h @@ -0,0 +1,123 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Neil Fortner <nfortne2@hdfgroup.org> + * December, 2017 + * + * Purpose: The private header file for the RADOS VOL plugin. + */ +#ifndef H5VLrados_H +#define H5VLrados_H + +/* Include package's public header */ +#include "H5VLrados_public.h" + +#define HDF5_VOL_RADOS_VERSION_1 1 /* Version number of RADOS VOL plugin */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* FAPL property to tell the VOL plugin to open a saved snapshot when opening a + * file */ +#define H5VL_RADOS_SNAP_OPEN_ID "rados_snap_open" + +/* Common object and attribute information */ +typedef struct H5VL_rados_item_t { + H5I_type_t type; + struct H5VL_rados_file_t *file; + int rc; +} H5VL_rados_item_t; + +/* Common object information */ +typedef struct H5VL_rados_obj_t { + H5VL_rados_item_t item; /* Must be first */ + uint64_t bin_oid; + char *oid; +} H5VL_rados_obj_t; + +/* The file struct */ +typedef struct H5VL_rados_file_t { + H5VL_rados_item_t item; /* Must be first */ + char *file_name; + size_t file_name_len; + unsigned flags; + char *glob_md_oid; + struct H5VL_rados_group_t *root_grp; + uint64_t max_oid; + hbool_t max_oid_dirty; + hid_t fcpl_id; + hid_t fapl_id; + MPI_Comm comm; + MPI_Info info; + int my_rank; + int num_procs; + hbool_t collective; +} H5VL_rados_file_t; + +/* The group struct */ +typedef struct H5VL_rados_group_t { + H5VL_rados_obj_t obj; /* Must be first */ + hid_t gcpl_id; + hid_t gapl_id; +} H5VL_rados_group_t; + +/* The dataset struct */ +typedef struct H5VL_rados_dset_t { + H5VL_rados_obj_t obj; /* Must be first */ + hid_t type_id; + hid_t space_id; + hid_t dcpl_id; + hid_t dapl_id; +} H5VL_rados_dset_t; + +/* The datatype struct */ +/* Note we could speed things up a bit by caching the serialized datatype. We + * may also not need to keep the type_id around. -NAF */ +typedef struct H5VL_rados_dtype_t { + H5VL_rados_obj_t obj; /* Must be first */ + hid_t type_id; + hid_t tcpl_id; + hid_t tapl_id; +} H5VL_rados_dtype_t; + +/* The attribute struct */ +typedef struct H5VL_rados_attr_t { + H5VL_rados_item_t item; /* Must be first */ + H5VL_rados_obj_t *parent; + char *name; + hid_t type_id; + hid_t space_id; +} H5VL_rados_attr_t; + +/* The link value struct */ +typedef struct H5VL_rados_link_val_t { + H5L_type_t type; + union { + uint64_t hard; + char *soft; + } target; +} H5VL_rados_link_val_t; + +extern hid_t H5VL_RADOS_g; + +H5_DLL herr_t H5VL_rados_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* H5VLrados_H */ diff --git a/src/H5VLrados_public.h b/src/H5VLrados_public.h new file mode 100644 index 0000000..65ee9f4 --- /dev/null +++ b/src/H5VLrados_public.h @@ -0,0 +1,46 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Neil Fortner <nfortne2@hdfgroup.gov> + * December, 2016 + * + * Purpose: The public header file for the RADOS VOL plugin. + */ +#ifndef H5VLrados_public_H +#define H5VLrados_public_H + +#define H5_HAVE_EFF 1 /* DSMINC */ + +/* External headers needed by this file */ +#include <rados/librados.h> + +/* Public headers needed by this file */ +#include "H5public.h" +#include "H5Ipublic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +H5_DLL herr_t H5VLrados_init(rados_t rados_cluster, const char *rados_pool); +H5_DLL herr_t H5VLrados_term(void); +H5_DLL herr_t H5Pset_fapl_rados(hid_t fapl_id, MPI_Comm comm, MPI_Info info); + +#ifdef __cplusplus +} +#endif + +#endif /* H5VLrados_public_H */ |