/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Copyright by The HDF Group. * * Copyright by the Board of Trustees of the University of Illinois. * * All rights reserved. * * * * This file is part of HDF5. The full HDF5 copyright notice, including * * terms governing use, modification, and redistribution, is contained in * * the files COPYING and Copyright.html. COPYING can be found at the root * * of the source code distribution tree; Copyright.html can be found at the * * root level of an installed copy of the electronic HDF5 document set and * * is linked from the top-level documents page. It can also be found at * * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * * access to either file, you may request a copy from help@hdfgroup.org. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* * Programmer: Robb Matzke <matzke@llnl.gov> * Wednesday, May 13, 1998 * * Purpose: Repartitions a file family. This program can be used to * split a single file into a family of files, join a family of * files into a single file, or copy one family to another while * changing the size of the family members. It can also be used * to copy a single file to a single file with holes. */ /* See H5private.h for how to include system headers */ #include "hdf5.h" #include "H5private.h" #ifdef H5_STDC_HEADERS # include <ctype.h> # include <errno.h> # include <fcntl.h> # include <stdio.h> # include <stdlib.h> # include <string.h> #endif #ifdef H5_HAVE_UNISTD_H # include <sys/types.h> # include <unistd.h> #endif #ifdef H5_HAVE_SYS_STAT_H # include <sys/stat.h> #endif #ifdef _WIN32 # include <io.h> # include <fcntl.h> #endif #ifndef FALSE # define FALSE 0 #endif #ifndef TRUE # define TRUE 1 #endif # define NAMELEN 4096 #define GB *1024*1024*1024 #ifndef MIN # define MIN(X,Y) ((X)<(Y)?(X):(Y)) #endif #ifndef MIN3 # define MIN3(X,Y,Z) MIN(MIN(X,Y),Z) #endif /*Make these 2 private properties(defined in H5Fprivate.h) available to h5repart. *The first one updates the member file size in the superblock. The second one *change file driver from family to sec2. */ #define H5F_ACS_FAMILY_NEWSIZE_NAME "family_newsize" #define H5F_ACS_FAMILY_TO_SEC2_NAME "family_to_sec2" /*------------------------------------------------------------------------- * Function: usage * * Purpose: Prints a usage message. * * Return: void * * Programmer: Robb Matzke * Wednesday, May 13, 1998 * * Modifications: * *------------------------------------------------------------------------- */ static void usage (const char *progname) { fprintf(stderr, "usage: %s [-v] [-V] [-[b|m] N[g|m|k]] [-family_to_sec2] SRC DST\n", progname); fprintf(stderr, " -v Produce verbose output\n"); fprintf(stderr, " -V Print a version number and exit\n"); fprintf(stderr, " -b N The I/O block size, defaults to 1kB\n"); fprintf(stderr, " -m N The destination member size or 1GB\n"); fprintf(stderr, " -family_to_sec2 Change file driver from family to sec2\n"); fprintf(stderr, " SRC The name of the source file\n"); fprintf(stderr, " DST The name of the destination files\n"); fprintf(stderr, "Sizes may be suffixed with `g' for GB, `m' for MB or " "`k' for kB.\n"); fprintf(stderr, "File family names include an integer printf " "format such as `%%d'\n"); exit (1); } /*------------------------------------------------------------------------- * Function: get_size * * Purpose: Reads a size option of the form `-XNS' where `X' is any * letter, `N' is a multi-character positive decimal number, and * `S' is an optional suffix letter in the set [GgMmk]. The * option may also be split among two arguments as: `-X NS'. * The input value of ARGNO is the argument number for the * switch in the ARGV vector and ARGC is the number of entries * in that vector. * * Return: Success: The value N multiplied according to the * suffix S. On return ARGNO will be the number * of the next argument to process. * * Failure: Calls usage() which exits with a non-zero * status. * * Programmer: Robb Matzke * Wednesday, May 13, 1998 * * Modifications: * *------------------------------------------------------------------------- */ static off_t get_size (const char *progname, int *argno, int argc, char *argv[]) { off_t retval=-1; char *suffix; if (isdigit ((int)(argv[*argno][2]))) { retval = strtol (argv[*argno]+2, &suffix, 10); (*argno)++; } else if (argv[*argno][2] || *argno+1>=argc) { usage (progname); } else { retval = strtol (argv[*argno+1], &suffix, 0); if (suffix==argv[*argno+1]) usage (progname); *argno += 2; } if (suffix && suffix[0] && !suffix[1]) { switch (*suffix) { case 'G': case 'g': retval *= 1024 * 1024 * 1024; break; case 'M': case 'm': retval *= 1024 * 1024; break; case 'k': retval *= 1024; break; default: usage (progname); } } else if (suffix && suffix[0]) { usage (progname); } return retval; } /*------------------------------------------------------------------------- * Function: main * * Purpose: Split an hdf5 file * * Return: Success: * * Failure: * * Programmer: Robb Matzke * Wednesday, May 13, 1998 * * Modifications: * *------------------------------------------------------------------------- */ int main (int argc, char *argv[]) { const char *prog_name; /*program name */ size_t blk_size=1024; /*size of each I/O block */ char *buf=NULL; /*I/O block buffer */ size_t n, i; /*counters */ ssize_t nio; /*I/O return value */ int argno=1; /*program argument number */ int src, dst=-1; /*source & destination files */ int need_seek=FALSE; /*destination needs to seek? */ int need_write; /*data needs to be written? */ /*struct stat sb; temporary file stat buffer */ /*struct _stati64 sb;*/ h5_stat_t sb; int verbose=FALSE; /*display file names? */ const char *src_gen_name; /*general source name */ char src_name[NAMELEN]; /*source member name */ int src_is_family; /*is source name a family name? */ int src_membno=0; /*source member number */ const char *dst_gen_name; /*general destination name */ char dst_name[NAMELEN]; /*destination member name */ int dst_is_family; /*is dst name a family name? */ int dst_membno=0; /*destination member number */ #if defined(_WIN32) && ! defined (__MWERKS__) __int64 left_overs=0; /*amount of zeros left over */ __int64 src_offset=0; /*offset in source member */ __int64 dst_offset=0; /*offset in destination member */ __int64 src_size; /*source logical member size */ __int64 src_act_size; /*source actual member size */ __int64 dst_size=1 GB; /*destination logical memb size */ #else off_t left_overs=0; /*amount of zeros left over */ off_t src_offset=0; /*offset in source member */ off_t dst_offset=0; /*offset in destination member */ off_t src_size; /*source logical member size */ off_t src_act_size; /*source actual member size */ off_t dst_size=1 GB; /*destination logical memb size */ #endif hid_t fapl; /*file access property list */ hid_t file; hsize_t hdsize; /*destination logical memb size */ hbool_t family_to_sec2=FALSE; /*change family to sec2 driver? */ /* * Get the program name from argv[0]. Use only the last component. */ if ((prog_name=strrchr (argv[0], '/'))) prog_name++; else prog_name = argv[0]; /* * Parse switches. */ while (argno<argc && '-'==argv[argno][0]) { if (!strcmp (argv[argno], "-v")) { verbose = TRUE; argno++; } else if (!strcmp(argv[argno], "-V")) { printf("This is %s version %u.%u release %u\n", prog_name, H5_VERS_MAJOR, H5_VERS_MINOR, H5_VERS_RELEASE); exit(0); } else if (!strcmp (argv[argno], "-family_to_sec2")) { family_to_sec2 = TRUE; argno++; } else if ('b'==argv[argno][1]) { blk_size = get_size (prog_name, &argno, argc, argv); } else if ('m'==argv[argno][1]) { dst_size = get_size (prog_name, &argno, argc, argv); } else { usage (prog_name); } } /* * Get the name for the source file and open the first member. The size * of the first member determines the logical size of all the members. */ if (argno>=argc) usage (prog_name); src_gen_name = argv[argno++]; sprintf (src_name, src_gen_name, src_membno); src_is_family = strcmp (src_name, src_gen_name); if ((src=HDopen(src_name, O_RDONLY,0))<0) { perror (src_name); exit (1); } if (HDfstat(src, &sb)<0) { perror ("fstat"); exit (1); } src_size = src_act_size = sb.st_size; if (verbose) fprintf (stderr, "< %s\n", src_name); /* * Get the name for the destination file and open the first member. */ if (argno>=argc) usage (prog_name); dst_gen_name = argv[argno++]; sprintf (dst_name, dst_gen_name, dst_membno); dst_is_family = strcmp (dst_name, dst_gen_name); if ((dst=HDopen (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { perror (dst_name); exit (1); } if (verbose) fprintf (stderr, "> %s\n", dst_name); /* No more arguments */ if (argno<argc) usage (prog_name); /* Now the real work, split the file */ buf = malloc (blk_size); while (src_offset<src_size) { /* Read a block. The amount to read is the minimum of: * 1. The I/O block size * 2. What's left to write in the destination member * 3. Left over zeros or what's left in the source member. */ n = blk_size; if (dst_is_family) n = (size_t)MIN((off_t)n, dst_size-dst_offset); if (left_overs) { n = (size_t)MIN (n, left_overs); left_overs -= n; need_write = FALSE; } else if (src_offset<src_act_size) { n = (size_t)MIN ((off_t)n, src_act_size-src_offset); if ((nio=read (src, buf, n))<0) { perror ("read"); exit (1); } else if ((size_t)nio!=n) { fprintf (stderr, "%s: short read\n", src_name); exit (1); } for (i=0; i<n; i++) { if (buf[i]) break; } need_write = (i<n); } else { n = 0; left_overs = src_size - src_act_size; need_write = FALSE; } /* * If the block contains non-zero data then write it to the * destination, otherwise just remember that we'll have to do a seek * later in the destination when we finally get non-zero data. */ if (need_write) { if (need_seek && HDlseek (dst, dst_offset, SEEK_SET)<0) { perror ("HDlseek"); exit (1); } if ((nio=write (dst, buf, n))<0) { perror ("write"); exit (1); } else if ((size_t)nio!=n) { fprintf (stderr, "%s: short write\n", dst_name); exit (1); } need_seek = FALSE; } else { need_seek = TRUE; } /* * Update the source offset and open the next source family member if * necessary. The source stream ends at the first member which * cannot be opened because it doesn't exist. At the end of the * source stream, update the destination offset and break out of the * loop. The destination offset must be updated so we can fix * trailing holes. */ src_offset += n; if (src_offset==src_act_size) { close (src); if (!src_is_family) { dst_offset += n; break; } sprintf (src_name, src_gen_name, ++src_membno); if ((src=HDopen (src_name, O_RDONLY,0))<0 && ENOENT==errno) { dst_offset += n; break; } else if (src<0) { perror (src_name); exit (1); } if (HDfstat (src, &sb)<0) { perror ("fstat"); exit (1); } src_act_size = sb.st_size; if (src_act_size>src_size) { fprintf (stderr, "%s: member truncated to %lu bytes\n", src_name, (unsigned long)src_size); } src_offset = 0; if (verbose) fprintf (stderr, "< %s\n", src_name); } /* * Update the destination offset, opening a new member if one will be * needed. The first member is extended to the logical member size * but other members might be smaller if they end with a hole. */ dst_offset += n; if (dst_is_family && dst_offset==dst_size) { if (0==dst_membno) { if (HDlseek (dst, dst_size-1, SEEK_SET)<0) { perror ("HDHDlseek"); exit (1); } if (read (dst, buf, 1)<0) { perror ("read"); exit (1); } if (HDlseek (dst, dst_size-1, SEEK_SET)<0) { perror ("HDlseek"); exit (1); } if (write (dst, buf, 1)<0) { perror ("write"); exit (1); } } close (dst); sprintf (dst_name, dst_gen_name, ++dst_membno); if ((dst=HDopen (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { perror (dst_name); exit (1); } dst_offset = 0; need_seek = FALSE; if (verbose) fprintf (stderr, "> %s\n", dst_name); } } /* * Make sure the last family member is the right size and then close it. * The last member can't end with a hole or hdf5 will think that the * family has been truncated. */ if (need_seek) { if (HDlseek (dst, dst_offset-1, SEEK_SET)<0) { perror ("HDlseek"); exit (1); } if (read (dst, buf, 1)<0) { perror ("read"); exit (1); } if (HDlseek (dst, dst_offset-1, SEEK_SET)<0) { perror ("HDlseek"); exit (1); } if (write (dst, buf, 1)<0) { perror ("write"); exit (1); } } close (dst); /* Modify family driver information saved in superblock through private property. * These private properties are for this tool only. */ if ((fapl=H5Pcreate(H5P_FILE_ACCESS))<0) { perror ("H5Pcreate"); exit (1); } if(family_to_sec2) { /* The user wants to change file driver from family to sec2. Open the file * with sec2 driver. This property signals the library to ignore the family * driver information saved in the superblock. */ if(H5Pset(fapl, H5F_ACS_FAMILY_TO_SEC2_NAME, &family_to_sec2) < 0) { perror ("H5Pset"); exit (1); } } else { /* Modify family size saved in superblock through private property. It signals * library to save the new member size(specified in command line) in superblock. * This private property is for this tool only. */ if(H5Pset_fapl_family(fapl, H5F_FAMILY_DEFAULT, H5P_DEFAULT) < 0) { perror ("H5Pset_fapl_family"); exit (1); } /* Set the property of the new member size as hsize_t */ hdsize = dst_size; if(H5Pset(fapl, H5F_ACS_FAMILY_NEWSIZE_NAME, &hdsize) < 0) { perror ("H5Pset"); exit (1); } } /* If the new file is a family file, try to open file for "read and write" to * flush metadata. Flushing metadata will update the superblock to the new * member size. If the original file is a family file and the new file is a sec2 * file, the property FAMILY_TO_SEC2 will signal the library to switch to sec2 * driver when the new file is opened. If the original file is a sec2 file and the * new file can only be a sec2 file, reopen the new file should fail. There's * nothing to do in this case. */ H5E_BEGIN_TRY { file=H5Fopen(dst_gen_name, H5F_ACC_RDWR, fapl); } H5E_END_TRY; if(file>=0) { if(H5Fclose(file)<0) { perror ("H5Fclose"); exit (1); } } if(H5Pclose(fapl)<0) { perror ("H5Pclose"); exit (1); } /* Free resources and return */ free (buf); return 0; }