summaryrefslogtreecommitdiffstats
path: root/tools/src/misc/h5repart.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/src/misc/h5repart.c')
-rw-r--r--tools/src/misc/h5repart.c510
1 files changed, 510 insertions, 0 deletions
diff --git a/tools/src/misc/h5repart.c b/tools/src/misc/h5repart.c
new file mode 100644
index 0000000..ac10944
--- /dev/null
+++ b/tools/src/misc/h5repart.c
@@ -0,0 +1,510 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Robb Matzke
+ * Wednesday, May 13, 1998
+ *
+ * Purpose: Repartitions a file family. This program can be used to
+ * split a single file into a family of files, join a family of
+ * files into a single file, or copy one family to another while
+ * changing the size of the family members. It can also be used
+ * to copy a single file to a single file with holes.
+ */
+
+/* See H5private.h for how to include system headers */
+#include "hdf5.h"
+#include "H5private.h"
+
+#define NAMELEN 4096
+#define GB *1024 * 1024 * 1024
+
+/* Make these 2 private properties(defined in H5Fprivate.h) available to h5repart.
+ * The first one updates the member file size in the superblock. The second one
+ * change file driver from family to a single file driver.
+ */
+#define H5F_ACS_FAMILY_NEWSIZE_NAME "family_newsize"
+#define H5F_ACS_FAMILY_TO_SINGLE_NAME "family_to_single"
+
+/*-------------------------------------------------------------------------
+ * Function: usage
+ *
+ * Purpose: Prints a usage message.
+ *
+ * Return: void
+ *
+ * Programmer: Robb Matzke
+ * Wednesday, May 13, 1998
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+usage(const char *progname)
+{
+ HDfprintf(stderr, "usage: %s [-v] [-V] [-[b|m] N[g|m|k]] [-family_to_sec2|-family_to_single] SRC DST\n",
+ progname);
+ HDfprintf(stderr, " -v Produce verbose output\n");
+ HDfprintf(stderr, " -V Print a version number and exit\n");
+ HDfprintf(stderr, " -b N The I/O block size, defaults to 1kB\n");
+ HDfprintf(stderr, " -m N The destination member size or 1GB\n");
+ HDfprintf(stderr, " -family_to_sec2 Deprecated version of -family_to_single (below)\n");
+ HDfprintf(stderr, " -family_to_single Change file driver from family to the default single-file VFD "
+ "(windows or sec2)\n");
+ HDfprintf(stderr, " SRC The name of the source file\n");
+ HDfprintf(stderr, " DST The name of the destination files\n");
+ HDfprintf(stderr, "Sizes may be suffixed with 'g' for GB, 'm' for MB or "
+ "'k' for kB.\n");
+ HDfprintf(stderr, "File family names include an integer printf "
+ "format such as '%%d'\n");
+ HDexit(EXIT_FAILURE);
+}
+
+/*-------------------------------------------------------------------------
+ * Function: get_size
+ *
+ * Purpose: Reads a size option of the form `-XNS' where `X' is any
+ * letter, `N' is a multi-character positive decimal number, and
+ * `S' is an optional suffix letter in the set [GgMmk]. The
+ * option may also be split among two arguments as: `-X NS'.
+ * The input value of ARGNO is the argument number for the
+ * switch in the ARGV vector and ARGC is the number of entries
+ * in that vector.
+ *
+ * Return: Success: The value N multiplied according to the
+ * suffix S. On return ARGNO will be the number
+ * of the next argument to process.
+ *
+ * Failure: Calls usage() which exits with a non-zero
+ * status.
+ *
+ * Programmer: Robb Matzke
+ * Wednesday, May 13, 1998
+ *-------------------------------------------------------------------------
+ */
+static off_t
+get_size(const char *progname, int *argno, int argc, char *argv[])
+{
+ off_t retval = -1;
+ char *suffix = NULL;
+
+ if (isdigit((int)(argv[*argno][2]))) {
+ retval = HDstrtol(argv[*argno] + 2, &suffix, 10);
+ (*argno)++;
+ }
+ else if (argv[*argno][2] || *argno + 1 >= argc) {
+ usage(progname);
+ }
+ else {
+ retval = HDstrtol(argv[*argno + 1], &suffix, 0);
+ if (suffix == argv[*argno + 1])
+ usage(progname);
+ *argno += 2;
+ }
+ if (suffix && suffix[0] && !suffix[1]) {
+ switch (*suffix) {
+ case 'G':
+ case 'g':
+ retval *= 1024 * 1024 * 1024;
+ break;
+ case 'M':
+ case 'm':
+ retval *= 1024 * 1024;
+ break;
+ case 'k':
+ retval *= 1024;
+ break;
+ default:
+ usage(progname);
+ }
+ }
+ else if (suffix && suffix[0]) {
+ usage(progname);
+ }
+ return retval;
+}
+
+/*-------------------------------------------------------------------------
+ * Function: main
+ *
+ * Purpose: Split an hdf5 file
+ *
+ * Return: Success:
+ *
+ * Failure:
+ *
+ * Programmer: Robb Matzke
+ * Wednesday, May 13, 1998
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+H5_GCC_CLANG_DIAG_OFF("format-nonliteral")
+int
+main(int argc, char *argv[])
+{
+ const char *prog_name; /*program name */
+ size_t blk_size = 1024; /*size of each I/O block */
+ char *buf = NULL; /*I/O block buffer */
+ size_t n, i; /*counters */
+ ssize_t nio; /*I/O return value */
+ int argno = 1; /*program argument number */
+ int src, dst = -1; /*source & destination files */
+ int need_seek = FALSE; /*destination needs to seek? */
+ int need_write; /*data needs to be written? */
+ h5_stat_t sb; /*temporary file stat buffer */
+
+ int verbose = FALSE; /*display file names? */
+
+ const char *src_gen_name; /*general source name */
+ char *src_name = NULL; /*source member name */
+
+ int src_is_family; /*is source name a family name? */
+ int src_membno = 0; /*source member number */
+
+ const char *dst_gen_name; /*general destination name */
+ char *dst_name = NULL; /*destination member name */
+ int dst_is_family; /*is dst name a family name? */
+ int dst_membno = 0; /*destination member number */
+
+ off_t left_overs = 0; /*amount of zeros left over */
+ off_t src_offset = 0; /*offset in source member */
+ off_t dst_offset = 0; /*offset in destination member */
+ off_t src_size; /*source logical member size */
+ off_t src_act_size; /*source actual member size */
+ off_t dst_size = 1 GB; /*destination logical memb size */
+ hid_t fapl; /*file access property list */
+ hid_t file;
+ hsize_t hdsize; /*destination logical memb size */
+ hbool_t family_to_single = FALSE; /*change family to single file driver? */
+
+ /*
+ * Get the program name from argv[0]. Use only the last component.
+ */
+ if ((prog_name = HDstrrchr(argv[0], '/')))
+ prog_name++;
+ else
+ prog_name = argv[0];
+
+ /*
+ * Parse switches.
+ */
+ while (argno < argc && '-' == argv[argno][0]) {
+ if (!HDstrcmp(argv[argno], "-v")) {
+ verbose = TRUE;
+ argno++;
+ }
+ else if (!HDstrcmp(argv[argno], "-V")) {
+ HDprintf("This is %s version %u.%u release %u\n", prog_name, H5_VERS_MAJOR, H5_VERS_MINOR,
+ H5_VERS_RELEASE);
+ HDexit(EXIT_SUCCESS);
+ }
+ else if (!HDstrcmp(argv[argno], "-family_to_sec2")) {
+ family_to_single = TRUE;
+ argno++;
+ }
+ else if (!HDstrcmp(argv[argno], "-family_to_single")) {
+ family_to_single = TRUE;
+ argno++;
+ }
+ else if ('b' == argv[argno][1]) {
+ blk_size = (size_t)get_size(prog_name, &argno, argc, argv);
+ }
+ else if ('m' == argv[argno][1]) {
+ dst_size = get_size(prog_name, &argno, argc, argv);
+ }
+ else {
+ usage(prog_name);
+ } /* end if */
+ } /* end while */
+
+ /* allocate names */
+ if (NULL == (src_name = (char *)HDcalloc((size_t)NAMELEN, sizeof(char))))
+ HDexit(EXIT_FAILURE);
+ if (NULL == (dst_name = (char *)HDcalloc((size_t)NAMELEN, sizeof(char))))
+ HDexit(EXIT_FAILURE);
+
+ /*
+ * Get the name for the source file and open the first member. The size
+ * of the first member determines the logical size of all the members.
+ */
+ if (argno >= argc)
+ usage(prog_name);
+ src_gen_name = argv[argno++];
+ HDsnprintf(src_name, NAMELEN, src_gen_name, src_membno);
+ src_is_family = strcmp(src_name, src_gen_name);
+
+ if ((src = HDopen(src_name, O_RDONLY)) < 0) {
+ HDperror(src_name);
+ HDexit(EXIT_FAILURE);
+ }
+
+ if (HDfstat(src, &sb) < 0) {
+ HDperror("fstat");
+ HDexit(EXIT_FAILURE);
+ }
+ src_size = src_act_size = sb.st_size;
+ if (verbose)
+ HDfprintf(stderr, "< %s\n", src_name);
+
+ /*
+ * Get the name for the destination file and open the first member.
+ */
+ if (argno >= argc)
+ usage(prog_name);
+ dst_gen_name = argv[argno++];
+ HDsnprintf(dst_name, NAMELEN, dst_gen_name, dst_membno);
+ dst_is_family = HDstrcmp(dst_name, dst_gen_name);
+
+ if ((dst = HDopen(dst_name, O_RDWR | O_CREAT | O_TRUNC, H5_POSIX_CREATE_MODE_RW)) < 0) {
+ HDperror(dst_name);
+ HDexit(EXIT_FAILURE);
+ }
+ if (verbose)
+ HDfprintf(stderr, "> %s\n", dst_name);
+
+ /* No more arguments */
+ if (argno < argc)
+ usage(prog_name);
+
+ /* Now the real work, split the file */
+ buf = (char *)HDmalloc(blk_size);
+ while (src_offset < src_size) {
+
+ /* Read a block. The amount to read is the minimum of:
+ * 1. The I/O block size
+ * 2. What's left to write in the destination member
+ * 3. Left over zeros or what's left in the source member.
+ */
+ n = blk_size;
+ if (dst_is_family)
+ n = (size_t)MIN((off_t)n, dst_size - dst_offset);
+ if (left_overs) {
+ n = (size_t)MIN((off_t)n, left_overs);
+ left_overs = left_overs - (off_t)n;
+ need_write = FALSE;
+ }
+ else if (src_offset < src_act_size) {
+ n = (size_t)MIN((off_t)n, src_act_size - src_offset);
+ if ((nio = HDread(src, buf, n)) < 0) {
+ HDperror("read");
+ HDexit(EXIT_FAILURE);
+ }
+ else if ((size_t)nio != n) {
+ HDfprintf(stderr, "%s: short read\n", src_name);
+ HDexit(EXIT_FAILURE);
+ }
+ for (i = 0; i < n; i++) {
+ if (buf[i])
+ break;
+ }
+ need_write = (i < n);
+ }
+ else {
+ n = 0;
+ left_overs = src_size - src_act_size;
+ need_write = FALSE;
+ }
+
+ /*
+ * If the block contains non-zero data then write it to the
+ * destination, otherwise just remember that we'll have to do a seek
+ * later in the destination when we finally get non-zero data.
+ */
+ if (need_write) {
+ if (need_seek && HDlseek(dst, dst_offset, SEEK_SET) < 0) {
+ HDperror("HDlseek");
+ HDexit(EXIT_FAILURE);
+ }
+ if ((nio = HDwrite(dst, buf, n)) < 0) {
+ HDperror("write");
+ HDexit(EXIT_FAILURE);
+ }
+ else if ((size_t)nio != n) {
+ HDfprintf(stderr, "%s: short write\n", dst_name);
+ HDexit(EXIT_FAILURE);
+ }
+ need_seek = FALSE;
+ }
+ else {
+ need_seek = TRUE;
+ }
+
+ /*
+ * Update the source offset and open the next source family member if
+ * necessary. The source stream ends at the first member which
+ * cannot be opened because it doesn't exist. At the end of the
+ * source stream, update the destination offset and break out of the
+ * loop. The destination offset must be updated so we can fix
+ * trailing holes.
+ */
+ src_offset = src_offset + (off_t)n;
+ if (src_offset == src_act_size) {
+ HDclose(src);
+ if (!src_is_family) {
+ dst_offset = dst_offset + (off_t)n;
+ break;
+ }
+ HDsnprintf(src_name, NAMELEN, src_gen_name, ++src_membno);
+ if ((src = HDopen(src_name, O_RDONLY)) < 0 && ENOENT == errno) {
+ dst_offset = dst_offset + (off_t)n;
+ break;
+ }
+ else if (src < 0) {
+ HDperror(src_name);
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDfstat(src, &sb) < 0) {
+ HDperror("fstat");
+ HDexit(EXIT_FAILURE);
+ }
+ src_act_size = sb.st_size;
+ if (src_act_size > src_size) {
+ HDfprintf(stderr, "%s: member truncated to %lu bytes\n", src_name, (unsigned long)src_size);
+ }
+ src_offset = 0;
+ if (verbose)
+ HDfprintf(stderr, "< %s\n", src_name);
+ }
+
+ /*
+ * Update the destination offset, opening a new member if one will be
+ * needed. The first member is extended to the logical member size
+ * but other members might be smaller if they end with a hole.
+ */
+ dst_offset = dst_offset + (off_t)n;
+ if (dst_is_family && dst_offset == dst_size) {
+ if (0 == dst_membno) {
+ if (HDlseek(dst, dst_size - 1, SEEK_SET) < 0) {
+ HDperror("HDHDlseek");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDread(dst, buf, 1) < 0) {
+ HDperror("read");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDlseek(dst, dst_size - 1, SEEK_SET) < 0) {
+ HDperror("HDlseek");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDwrite(dst, buf, 1) < 0) {
+ HDperror("write");
+ HDexit(EXIT_FAILURE);
+ }
+ }
+ HDclose(dst);
+ HDsnprintf(dst_name, NAMELEN, dst_gen_name, ++dst_membno);
+ if ((dst = HDopen(dst_name, O_RDWR | O_CREAT | O_TRUNC, H5_POSIX_CREATE_MODE_RW)) < 0) {
+ HDperror(dst_name);
+ HDexit(EXIT_FAILURE);
+ }
+ dst_offset = 0;
+ need_seek = FALSE;
+ if (verbose)
+ HDfprintf(stderr, "> %s\n", dst_name);
+ }
+ }
+
+ /*
+ * Make sure the last family member is the right size and then close it.
+ * The last member can't end with a hole or hdf5 will think that the
+ * family has been truncated.
+ */
+ if (need_seek) {
+ if (HDlseek(dst, dst_offset - 1, SEEK_SET) < 0) {
+ HDperror("HDlseek");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDread(dst, buf, 1) < 0) {
+ HDperror("read");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDlseek(dst, dst_offset - 1, SEEK_SET) < 0) {
+ HDperror("HDlseek");
+ HDexit(EXIT_FAILURE);
+ }
+ if (HDwrite(dst, buf, 1) < 0) {
+ HDperror("write");
+ HDexit(EXIT_FAILURE);
+ }
+ }
+ HDclose(dst);
+
+ /* Modify family driver information saved in superblock through private property.
+ * These private properties are for this tool only. */
+ if ((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) {
+ HDperror("H5Pcreate");
+ HDexit(EXIT_FAILURE);
+ }
+
+ if (family_to_single) {
+ /* The user wants to change file driver from family to a single-file VFD.
+ * Open the file with the sec2, windows, etc. driver. This property signals
+ * the library to ignore the family driver information saved in the superblock.
+ */
+ if (H5Pset(fapl, H5F_ACS_FAMILY_TO_SINGLE_NAME, &family_to_single) < 0) {
+ HDperror("H5Pset");
+ HDexit(EXIT_FAILURE);
+ }
+ }
+ else {
+ /* Modify family size saved in superblock through private property. It signals
+ * library to save the new member size(specified in command line) in superblock.
+ * This private property is for this tool only. */
+ if (H5Pset_fapl_family(fapl, H5F_FAMILY_DEFAULT, H5P_DEFAULT) < 0) {
+ HDperror("H5Pset_fapl_family");
+ HDexit(EXIT_FAILURE);
+ }
+
+ /* Set the property of the new member size as hsize_t */
+ hdsize = (hsize_t)dst_size;
+ if (H5Pset(fapl, H5F_ACS_FAMILY_NEWSIZE_NAME, &hdsize) < 0) {
+ HDperror("H5Pset");
+ HDexit(EXIT_FAILURE);
+ }
+ }
+
+ /* If the new file is a family file, try to open file for "read and write" to
+ * flush metadata. Flushing metadata will update the superblock to the new
+ * member size. If the original file is a family file and the new file is a single
+ * file, the property FAMILY_TO_SINGLE will signal the library to switch to default
+ * single-file driver when the new file is opened. If the original file is a single
+ * file and the new file can only be a single file, reopen the new file should fail.
+ * There's nothing to do in this case.
+ */
+ H5E_BEGIN_TRY
+ {
+ file = H5Fopen(dst_gen_name, H5F_ACC_RDWR, fapl);
+ }
+ H5E_END_TRY;
+
+ if (file >= 0) {
+ if (H5Fclose(file) < 0) {
+ HDperror("H5Fclose");
+ HDexit(EXIT_FAILURE);
+ } /* end if */
+ } /* end if */
+
+ if (H5Pclose(fapl) < 0) {
+ HDperror("H5Pclose");
+ HDexit(EXIT_FAILURE);
+ } /* end if */
+
+ /* Free resources and return */
+ HDfree(src_name);
+ HDfree(dst_name);
+ HDfree(buf);
+ return EXIT_SUCCESS;
+} /* end main */
+H5_GCC_CLANG_DIAG_ON("format-nonliteral")