diff options
Diffstat (limited to 'src/h5repart.c')
-rw-r--r-- | src/h5repart.c | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/src/h5repart.c b/src/h5repart.c new file mode 100644 index 0000000..8faa9ea --- /dev/null +++ b/src/h5repart.c @@ -0,0 +1,388 @@ +/* + * Copyright (C) 1998 NCSA + * All rights reserved. + * + * Programmer: Robb Matzke <matzke@llnl.gov> + * Wednesday, May 13, 1998 + * + * Purpose: Repartitions a file family. This program can be used to + * split a single file into a family of files, join a family of + * files into a single file, or copy one family to another while + * changing the size of the family members. It can also be used + * to copy a single file to a single file with holes. + */ +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <hdf5.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#define FALSE 0 +#define TRUE 1 +#define NAMELEN 4096 +#define GB *1024*1024*1024 + +#define MIN(X,Y) ((X)<(Y)?(X):(Y)) +#define MIN3(X,Y,Z) MIN(MIN(X,Y),Z) + + +/*------------------------------------------------------------------------- + * Function: usage + * + * Purpose: Prints a usage message. + * + * Return: void + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +usage (const char *progname) +{ + fprintf (stderr, "usage: %s [-[b|m] N[g|m|k]] SRC DST\n", progname); + fprintf (stderr, " -b N The I/O block size, defaults to 1kB\n"); + fprintf (stderr, " -m N The destination member size or 1GB\n"); + fprintf (stderr, " SRC The name of the source file\n"); + fprintf (stderr, " DST The name of the destination files\n"); + fprintf (stderr, "Sizes may be suffixed with `g' for GB, `m' for MB or " + "`k' for kB.\n"); + fprintf (stderr, "File family names include an integer printf " + "format such as `%%d'\n"); + exit (1); +} + + +/*------------------------------------------------------------------------- + * Function: get_size + * + * Purpose: Reads a size option of the form `-XNS' where `X' is any + * letter, `N' is a multi-character positive decimal number, and + * `S' is an optional suffix letter in the set [GgMmk]. The + * option may also be split among two arguments as: `-X NS'. + * The input value of ARGNO is the argument number for the + * switch in the ARGV vector and ARGC is the number of entries + * in that vector. + * + * Return: Success: The value N multiplied according to the + * suffix S. On return ARGNO will be the number + * of the next argument to process. + * + * Failure: Calls usage() which exits with a non-zero + * status. + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static off_t +get_size (const char *progname, int *argno, int argc, char *argv[]) +{ + off_t retval; + char *suffix; + + if (isdigit (argv[*argno][2])) { + retval = strtol (argv[*argno]+2, &suffix, 10); + (*argno)++; + } else if (argv[*argno][2] || *argno+1>=argc) { + usage (progname); + } else { + retval = strtol (argv[*argno+1], &suffix, 0); + if (suffix==argv[*argno+1]) usage (progname); + *argno += 2; + } + if (suffix && suffix[0] && !suffix[1]) { + switch (*suffix) { + case 'G': + case 'g': + retval *= 1024 * 1024 * 1024; + break; + case 'M': + case 'm': + retval *= 1024 * 1024; + break; + case 'k': + retval *= 1024; + break; + default: + usage (progname); + } + } else if (suffix && suffix[0]) { + usage (progname); + } + return retval; +} + + +/*------------------------------------------------------------------------- + * Function: main + * + * Purpose: Split an hdf5 file + * + * Return: Success: + * + * Failure: + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +main (int argc, char *argv[]) +{ + const char *prog_name; /*program name */ + size_t blk_size=1024; /*size of each I/O block */ + char *buf=NULL; /*I/O block buffer */ + size_t n, i; /*counters */ + ssize_t nio; /*I/O return value */ + int argno=1; /*program argument number */ + int src, dst=-1; /*source & destination files */ + int need_seek=FALSE; /*destination needs to seek? */ + int need_write; /*data needs to be written? */ + struct stat sb; /*temporary file stat buffer */ + int verbose=FALSE; /*display file names? */ + size_t left_overs=0; /*amount of zeros left over */ + + const char *src_gen_name; /*general source name */ + char src_name[NAMELEN]; /*source member name */ + off_t src_offset=0; /*offset in source member */ + int src_is_family; /*is source name a family name? */ + int src_membno=0; /*source member number */ + off_t src_size; /*source logical member size */ + off_t src_act_size; /*source actual member size */ + + const char *dst_gen_name; /*general destination name */ + char dst_name[NAMELEN]; /*destination member name */ + off_t dst_offset=0; /*offset in destination member */ + int dst_is_family; /*is dst name a family name? */ + int dst_membno=0; /*destination member number */ + off_t dst_size=1 GB; /*destination logical memb size */ + + /* + * Get the program name from argv[0]. Use only the last component. + */ + if ((prog_name=strrchr (argv[0], '/'))) prog_name++; + else prog_name = argv[0]; + + /* + * Parse switches. + */ + while (argno<argc && '-'==argv[argno][0]) { + if (!strcmp (argv[argno], "-v")) { + verbose = TRUE; + argno++; + } else if ('b'==argv[argno][1]) { + blk_size = get_size (prog_name, &argno, argc, argv); + } else if ('m'==argv[argno][1]) { + dst_size = get_size (prog_name, &argno, argc, argv); + } else { + usage (prog_name); + } + } + + /* + * Get the name for the source file and open the first member. The size + * of the first member determines the logical size of all the members. + */ + if (argno>=argc) usage (prog_name); + src_gen_name = argv[argno++]; + sprintf (src_name, src_gen_name, src_membno); + src_is_family = strcmp (src_name, src_gen_name); + if ((src=open (src_name, O_RDONLY))<0) { + perror (src_name); + exit (1); + } + if (fstat (src, &sb)<0) { + perror ("fstat"); + exit (1); + } + src_size = src_act_size = sb.st_size; + if (verbose) fprintf (stderr, "< %s\n", src_name); + + /* + * Get the name for the destination file and open the first member. + */ + if (argno>=argc) usage (prog_name); + dst_gen_name = argv[argno++]; + sprintf (dst_name, dst_gen_name, dst_membno); + dst_is_family = strcmp (dst_name, dst_gen_name); + if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { + perror (dst_name); + exit (1); + } + if (verbose) fprintf (stderr, "> %s\n", dst_name); + + /* No more arguments */ + if (argno<argc) usage (prog_name); + + /* Now the real work, split the file */ + buf = malloc (blk_size); + while (src_offset<src_size) { + + /* Read a block. The amount to read is the minimum of: + * 1. The I/O block size + * 2. What's left to write in the destination member + * 3. Left over zeros or what's left in the source member. + */ + n = blk_size; + if (dst_is_family) n = (size_t)MIN((off_t)n, dst_size-dst_offset); + if (left_overs) { + n = MIN (n, left_overs); + left_overs -= n; + need_write = FALSE; + } else if (src_offset<src_act_size) { + n = (size_t)MIN ((off_t)n, src_act_size-src_offset); + if ((nio=read (src, buf, n))<0) { + perror ("read"); + exit (1); + } else if ((size_t)nio!=n) { + fprintf (stderr, "%s: short read\n", src_name); + exit (1); + } + for (i=0; i<n; i++) { + if (buf[i]) break; + } + need_write = (i<n); + } else { + n = 0; + left_overs = src_size - src_act_size; + need_write = FALSE; + } + + /* + * If the block contains non-zero data then write it to the + * destination, otherwise just remember that we'll have to do a seek + * later in the destination when we finally get non-zero data. + */ + if (need_write) { + if (need_seek && lseek (dst, dst_offset, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if ((nio=write (dst, buf, n))<0) { + perror ("write"); + exit (1); + } else if ((size_t)nio!=n) { + fprintf (stderr, "%s: short write\n", dst_name); + exit (1); + } + need_seek = FALSE; + } else { + need_seek = TRUE; + } + + /* + * Update the source offset and open the next source family member if + * necessary. The source stream ends at the first member which + * cannot be opened because it doesn't exist. At the end of the + * source stream, update the destination offset and break out of the + * loop. The destination offset must be updated so we can fix + * trailing holes. + */ + src_offset += n; + if (src_offset==src_act_size) { + close (src); + if (!src_is_family) { + dst_offset += n; + break; + } + sprintf (src_name, src_gen_name, ++src_membno); + if ((src=open (src_name, O_RDONLY))<0 && ENOENT==errno) { + dst_offset += n; + break; + } else if (src<0) { + perror (src_name); + exit (1); + } + if (fstat (src, &sb)<0) { + perror ("fstat"); + exit (1); + } + src_act_size = sb.st_size; + if (src_act_size>src_size) { + fprintf (stderr, "%s: member truncated to %lu bytes\n", + src_name, (unsigned long)src_size); + } + src_offset = 0; + if (verbose) fprintf (stderr, "< %s\n", src_name); + } + + /* + * Update the destination offset, opening a new member if one will be + * needed. The first member is extended to the logical member size + * but other members might be smaller if they end with a hole. + */ + dst_offset += n; + if (dst_is_family && dst_offset==dst_size) { + if (0==dst_membno) { + if (lseek (dst, dst_size-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (read (dst, buf, 1)<0) { + perror ("read"); + exit (1); + } + if (lseek (dst, dst_size-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (write (dst, buf, 1)<0) { + perror ("write"); + exit (1); + } + } + close (dst); + sprintf (dst_name, dst_gen_name, ++dst_membno); + if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { + perror (dst_name); + exit (1); + } + dst_offset = 0; + need_seek = FALSE; + if (verbose) fprintf (stderr, "> %s\n", dst_name); + } + } + + /* + * Make sure the last family member is the right size and then close it. + * The last member can't end with a hole or hdf5 will think that the + * family has been truncated. + */ + if (need_seek) { + if (lseek (dst, dst_offset-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (read (dst, buf, 1)<0) { + perror ("read"); + exit (1); + } + if (lseek (dst, dst_offset-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (write (dst, buf, 1)<0) { + perror ("write"); + exit (1); + } + } + close (dst); + + /* Free resources and return */ + free (buf); + return 0; +} |