diff options
-rw-r--r-- | tools/misc/Dependencies | 0 | ||||
-rw-r--r-- | tools/misc/Makefile.in | 71 | ||||
-rw-r--r-- | tools/misc/h5debug.c | 185 | ||||
-rw-r--r-- | tools/misc/h5import.c | 141 | ||||
-rw-r--r-- | tools/misc/h5repart.c | 418 | ||||
-rw-r--r-- | tools/misc/pdb2hdf.c | 503 |
6 files changed, 1318 insertions, 0 deletions
diff --git a/tools/misc/Dependencies b/tools/misc/Dependencies new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tools/misc/Dependencies diff --git a/tools/misc/Makefile.in b/tools/misc/Makefile.in new file mode 100644 index 0000000..def8f48 --- /dev/null +++ b/tools/misc/Makefile.in @@ -0,0 +1,71 @@ +## HDF5 Library Makefile(.in) +## +## Copyright (C) 2001 National Center for Supercomputing Applications. +## All rights reserved. +## +## +top_srcdir=@top_srcdir@ +top_builddir=../.. +srcdir=@srcdir@ +SUBDIRS= +@COMMENCE@ + +## Add include directory to the C preprocessor flags, add -lh5tools and +## -lhdf5 to the list of libraries. +## +CPPFLAGS=-I. -I$(srcdir) -I$(top_builddir)/src -I$(top_srcdir)/src \ + -I$(top_srcdir)/tools/lib @CPPFLAGS@ + +## Test programs and scripts. +## +TEST_PROGS= +TEST_SCRIPTS= + +## These are our main targets: library and tools. +## +LIBTOOLS=../lib/libh5tools.la +LIBHDF5=$(top_builddir)/src/libhdf5.la + +PUB_PROGS=h5debug h5import h5repart @PDB2HDF@ +PROGS=$(PUB_PROGS) $(TEST_PROGS) + +## Source and object files for the library; do not install +## +LIB_SRC= +LIB_OBJ=$(LIB_SRC:.c=.lo) +PUB_LIB= + +## Source and object files for programs... +## +PROG_SRC=h5debug.c h5import.c h5repart.c pdb2hdf.c +PROG_OBJ=$(PROG_SRC:.c=.lo) + +PRIVATE_HDR= + +## Source and object files for the tests +## +TEST_SRC= +TEST_OBJ=$(TEST_SRC:.c=.lo) + +## Programs have to be built before they can be tested! +## +check test _test: $(PROGS) + +## How to build the programs...They all depend on the hdf5 library and +## the tools library compiled in this directory. +## +$(PROGS): $(LIBTOOLS) $(LIBHDF5) + +h5debug: h5debug.lo + @$(LT_LINK_EXE) $(CFLAGS) -o $@ h5debug.lo $(LIBTOOLS) $(LIBHDF5) $(LDFLAGS) $(LIBS) + +h5import: h5import.lo + @$(LT_LINK_EXE) $(CFLAGS) -o $@ h5import.lo $(LIBTOOLS) $(LIBHDF5) $(LDFLAGS) $(LIBS) + +h5repart: h5repart.lo + @$(LT_LINK_EXE) $(CFLAGS) -o $@ h5repart.lo $(LIBTOOLS) $(LIBHDF5) $(LDFLAGS) $(LIBS) + +pdb2hdf: pdb2hdf.lo + @$(LT_LINK_EXE) $(CFLAGS) -o $@ pdb2hdf.lo $(LIBTOOLS) $(LIBHDF5) $(LDFLAGS) $(LIBS) + +@CONCLUDE@ diff --git a/tools/misc/h5debug.c b/tools/misc/h5debug.c new file mode 100644 index 0000000..76eb472 --- /dev/null +++ b/tools/misc/h5debug.c @@ -0,0 +1,185 @@ +/*------------------------------------------------------------------------- + * Copyright (C) 1997 National Center for Supercomputing Applications. + * All rights reserved. + * + *------------------------------------------------------------------------- + * + * Created: debug.c + * Jul 18 1997 + * Robb Matzke <matzke@llnl.gov> + * + * Purpose: Debugs an existing HDF5 file at a low level. + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +#define H5F_PACKAGE /*suppress error about including H5Fpkg */ + +#include <H5private.h> +#include <H5Iprivate.h> +#include <H5Bprivate.h> +#include <H5Pprivate.h> +#include <H5Fpkg.h> +#include <H5Gprivate.h> +#include <H5HGprivate.h> +#include <H5HLprivate.h> +#include <H5Oprivate.h> + +/* File drivers */ +#include <H5FDfamily.h> + +#define INDENT 3 +#define VCOL 50 + + +/*------------------------------------------------------------------------- + * Function: main + * + * Usage: debug FILENAME [OFFSET] + * + * Return: Success: exit (0) + * + * Failure: exit (non-zero) + * + * Programmer: Robb Matzke + * matzke@llnl.gov + * Jul 18 1997 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +main(int argc, char *argv[]) +{ + hid_t fid, plist=H5P_DEFAULT; + H5F_t *f; + haddr_t addr=0, extra=0; + uint8_t sig[16]; + intn i, ndims; + herr_t status = SUCCEED; + + if (argc == 1) { + fprintf(stderr, + "Usage: %s filename [signature addr [extra]]\n", argv[0]); + HDexit(1); + } + + /* + * Open the file and get the file descriptor. + */ + if (strchr (argv[1], '%')) { + plist = H5Pcreate (H5P_FILE_ACCESS); + H5Pset_fapl_family (plist, (hsize_t)0, H5P_DEFAULT); + } + if ((fid = H5Fopen(argv[1], H5F_ACC_RDONLY, plist)) < 0) { + fprintf(stderr, "cannot open file\n"); + HDexit(1); + } + if (NULL == (f = H5I_object(fid))) { + fprintf(stderr, "cannot obtain H5F_t pointer\n"); + HDexit(2); + } + + /* + * Parse command arguments. + */ + if (argc > 2) { + printf("New address: %s\n", argv[2]); + addr = HDstrtoll(argv[2], NULL, 0); + } + if (argc > 3) { + extra = HDstrtoll(argv[3], NULL, 0); + } + /* + * Read the signature at the specified file position. + */ + HDfprintf(stdout, "Reading signature at address %a (rel)\n", addr); + if (H5F_block_read(f, H5FD_MEM_SUPER, addr, (hsize_t)sizeof(sig), H5P_DEFAULT, sig)<0) { + fprintf(stderr, "cannot read signature\n"); + HDexit(3); + } + if (!HDmemcmp(sig, H5F_SIGNATURE, H5F_SIGNATURE_LEN)) { + /* + * Debug the boot block. + */ + status = H5F_debug(f, addr, stdout, 0, VCOL); + + } else if (!HDmemcmp(sig, H5HL_MAGIC, H5HL_SIZEOF_MAGIC)) { + /* + * Debug a local heap. + */ + status = H5HL_debug(f, addr, stdout, 0, VCOL); + + } else if (!HDmemcmp (sig, H5HG_MAGIC, H5HG_SIZEOF_MAGIC)) { + /* + * Debug a global heap collection. + */ + status = H5HG_debug (f, addr, stdout, 0, VCOL); + + } else if (!HDmemcmp(sig, H5G_NODE_MAGIC, H5G_NODE_SIZEOF_MAGIC)) { + /* + * Debug a symbol table node. + */ + status = H5G_node_debug(f, addr, stdout, 0, VCOL, extra); + + } else if (!HDmemcmp(sig, H5B_MAGIC, H5B_SIZEOF_MAGIC)) { + /* + * Debug a B-tree. B-trees are debugged through the B-tree + * subclass. The subclass identifier is the byte immediately + * after the B-tree signature. + */ + H5B_subid_t subtype = (H5B_subid_t)sig[H5B_SIZEOF_MAGIC]; + + switch (subtype) { + case H5B_SNODE_ID: + status = H5G_node_debug(f, addr, stdout, 0, VCOL, extra); + break; + + case H5B_ISTORE_ID: + ndims = (int)extra; + status = H5F_istore_debug (f, addr, stdout, 0, VCOL, ndims); + break; + + default: + fprintf(stderr, "Unknown B-tree subtype %u\n", + (unsigned)(subtype)); + HDexit(4); + } + + } else if (sig[0] == H5O_VERSION) { + /* + * This could be an object header. Since they don't have a signature + * it's a somewhat "ify" detection. + */ + status = H5O_debug(f, addr, stdout, 0, VCOL); + + } else { + /* + * Got some other unrecognized signature. + */ + printf("%-*s ", VCOL, "Signature:"); + for (i = 0; i < 8; i++) { + if (sig[i] > ' ' && sig[i] <= '~' && '\\' != sig[i]) { + HDputchar(sig[i]); + } else if ('\\' == sig[i]) { + HDputchar('\\'); + HDputchar('\\'); + } else { + printf("\\%03o", sig[i]); + } + } + HDputchar('\n'); + + fprintf(stderr, "unknown signature\n"); + HDexit(4); + } + + if (status < 0) { + fprintf(stderr, "An error occurred\n"); + HDexit(5); + } + H5Fclose(fid); + return 0; +} diff --git a/tools/misc/h5import.c b/tools/misc/h5import.c new file mode 100644 index 0000000..e896feb --- /dev/null +++ b/tools/misc/h5import.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 1998 NCSA + * All rights reserved. + * + * Programmer: Robb Matzke <matzke@llnl.gov> + * Thursday, June 11, 1998 + * + * Purpose: Create an hdf5 file with a 1d dataset of uint8. + */ + +/* See H5private.h for how to include system headers */ +#include <hdf5.h> +#ifdef H5_STDC_HEADERS +# include <fcntl.h> +# include <string.h> +# include <stdlib.h> +# include <stdio.h> +#endif + +#ifdef H5_HAVE_UNISTD_H +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef H5_HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif + +#ifdef WIN32 +#include <io.h> +#endif + + +/*------------------------------------------------------------------------- + * Function: usage + * + * Purpose: Print a usage message and exit with non-zero status + * + * Return: never returns + * + * Programmer: Robb Matzke + * Thursday, June 11, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +usage (const char *argv0) +{ + fprintf (stderr, "Usage: %s -f HDF5-FILE FILES...\n", argv0); + exit (1); +} + + +/*------------------------------------------------------------------------- + * Function: main + * + * Purpose: + * + * Return: Success: 0 + * + * Failure: 1 + * + * Programmer: Robb Matzke + * Thursday, June 11, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +main (int argc, char *argv[]) +{ + hid_t file, space=-1, dset=-1; + const char *output_name, *dset_name; + int argno, fd=-1; + hsize_t size[1]; + struct stat sb; + + /* Parse arguments */ + if (argc<4) usage (argv[0]); + if (strcmp (argv[1], "-f")) usage (argv[0]); + output_name = argv[2]; + + /* create the file */ + H5E_BEGIN_TRY { + if ((file = H5Fcreate (output_name, H5F_ACC_EXCL, + H5P_DEFAULT, H5P_DEFAULT))<0 && + (file = H5Fopen (output_name, H5F_ACC_RDWR, H5P_DEFAULT)<0)) { + fprintf (stderr, "%s: unable to create or open hdf5 file\n", + output_name); + exit (1); + } + } H5E_END_TRY; + + /* process files from command-line */ + for (argno=3; argno<argc; argno++) { + + /* Open the file */ + if ((dset_name=strrchr (argv[argno], '/'))) dset_name++; + else dset_name = argv[argno]; + fprintf (stderr, "%s\n", dset_name); + if ((fd=open (argv[argno], O_RDONLY))<0) { + perror (argv[argno]); + goto next; + } + if (fstat (fd, &sb)<0) { + perror (argv[argno]); + goto next; + } + + /* Data space */ + size[0] = sb.st_size; + if ((space = H5Screate_simple (1, size, size))<0) goto next; + + /* Dataset */ + if ((dset=H5Dcreate (file, dset_name, H5T_NATIVE_SCHAR, + space, H5P_DEFAULT))<0) goto next; + + + + next: + if (fd>=0) close (fd); + fd = -1; + H5E_BEGIN_TRY { + if (space>=0) { + H5Sclose (space); + space = -1; + } + if (dset>=0) { + H5Dclose (dset); + dset = -1; + } + } H5E_END_TRY; + } + + /* Close the file */ + H5Fclose (file); + return 0; +} diff --git a/tools/misc/h5repart.c b/tools/misc/h5repart.c new file mode 100644 index 0000000..de7b3df --- /dev/null +++ b/tools/misc/h5repart.c @@ -0,0 +1,418 @@ +/* + * Copyright (C) 1998 NCSA + * All rights reserved. + * + * Programmer: Robb Matzke <matzke@llnl.gov> + * Wednesday, May 13, 1998 + * + * Purpose: Repartitions a file family. This program can be used to + * split a single file into a family of files, join a family of + * files into a single file, or copy one family to another while + * changing the size of the family members. It can also be used + * to copy a single file to a single file with holes. + */ + +/* See H5private.h for how to include system headers */ +#include <hdf5.h> +#ifdef H5_STDC_HEADERS +# include <ctype.h> +# include <errno.h> +# include <fcntl.h> +# include <stdio.h> +# include <stdlib.h> +# include <string.h> +#endif + +#ifdef H5_HAVE_UNISTD_H +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef H5_HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif + +#ifdef WIN32 +#include <io.h> +#endif + +#ifndef FALSE +#define FALSE 0 +#endif +#ifndef TRUE +#define TRUE 1 +#endif +#define NAMELEN 4096 +#define GB *1024*1024*1024 + +#ifndef MIN +#define MIN(X,Y) ((X)<(Y)?(X):(Y)) +#endif +#ifndef MIN3 +#define MIN3(X,Y,Z) MIN(MIN(X,Y),Z) +#endif + + +/*------------------------------------------------------------------------- + * Function: usage + * + * Purpose: Prints a usage message. + * + * Return: void + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +usage (const char *progname) +{ + fprintf(stderr, "usage: %s [-v] [-V] [-[b|m] N[g|m|k]] SRC DST\n", + progname); + fprintf(stderr, " -v Produce verbose output\n"); + fprintf(stderr, " -V Print a version number and exit\n"); + fprintf(stderr, " -b N The I/O block size, defaults to 1kB\n"); + fprintf(stderr, " -m N The destination member size or 1GB\n"); + fprintf(stderr, " SRC The name of the source file\n"); + fprintf(stderr, " DST The name of the destination files\n"); + fprintf(stderr, "Sizes may be suffixed with `g' for GB, `m' for MB or " + "`k' for kB.\n"); + fprintf(stderr, "File family names include an integer printf " + "format such as `%%d'\n"); + exit (1); +} + + +/*------------------------------------------------------------------------- + * Function: get_size + * + * Purpose: Reads a size option of the form `-XNS' where `X' is any + * letter, `N' is a multi-character positive decimal number, and + * `S' is an optional suffix letter in the set [GgMmk]. The + * option may also be split among two arguments as: `-X NS'. + * The input value of ARGNO is the argument number for the + * switch in the ARGV vector and ARGC is the number of entries + * in that vector. + * + * Return: Success: The value N multiplied according to the + * suffix S. On return ARGNO will be the number + * of the next argument to process. + * + * Failure: Calls usage() which exits with a non-zero + * status. + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static off_t +get_size (const char *progname, int *argno, int argc, char *argv[]) +{ + off_t retval=-1; + char *suffix; + + if (isdigit ((int)(argv[*argno][2]))) { + retval = strtol (argv[*argno]+2, &suffix, 10); + (*argno)++; + } else if (argv[*argno][2] || *argno+1>=argc) { + usage (progname); + } else { + retval = strtol (argv[*argno+1], &suffix, 0); + if (suffix==argv[*argno+1]) usage (progname); + *argno += 2; + } + if (suffix && suffix[0] && !suffix[1]) { + switch (*suffix) { + case 'G': + case 'g': + retval *= 1024 * 1024 * 1024; + break; + case 'M': + case 'm': + retval *= 1024 * 1024; + break; + case 'k': + retval *= 1024; + break; + default: + usage (progname); + } + } else if (suffix && suffix[0]) { + usage (progname); + } + return retval; +} + + +/*------------------------------------------------------------------------- + * Function: main + * + * Purpose: Split an hdf5 file + * + * Return: Success: + * + * Failure: + * + * Programmer: Robb Matzke + * Wednesday, May 13, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +main (int argc, char *argv[]) +{ + const char *prog_name; /*program name */ + size_t blk_size=1024; /*size of each I/O block */ + char *buf=NULL; /*I/O block buffer */ + size_t n, i; /*counters */ + ssize_t nio; /*I/O return value */ + int argno=1; /*program argument number */ + int src, dst=-1; /*source & destination files */ + int need_seek=FALSE; /*destination needs to seek? */ + int need_write; /*data needs to be written? */ + struct stat sb; /*temporary file stat buffer */ + int verbose=FALSE; /*display file names? */ + size_t left_overs=0; /*amount of zeros left over */ + + const char *src_gen_name; /*general source name */ + char src_name[NAMELEN]; /*source member name */ + off_t src_offset=0; /*offset in source member */ + int src_is_family; /*is source name a family name? */ + int src_membno=0; /*source member number */ + off_t src_size; /*source logical member size */ + off_t src_act_size; /*source actual member size */ + + const char *dst_gen_name; /*general destination name */ + char dst_name[NAMELEN]; /*destination member name */ + off_t dst_offset=0; /*offset in destination member */ + int dst_is_family; /*is dst name a family name? */ + int dst_membno=0; /*destination member number */ + off_t dst_size=1 GB; /*destination logical memb size */ + + /* + * Get the program name from argv[0]. Use only the last component. + */ + if ((prog_name=strrchr (argv[0], '/'))) prog_name++; + else prog_name = argv[0]; + + /* + * Parse switches. + */ + while (argno<argc && '-'==argv[argno][0]) { + if (!strcmp (argv[argno], "-v")) { + verbose = TRUE; + argno++; + } else if (!strcmp(argv[argno], "-V")) { + printf("This is %s version %u.%u release %u\n", + prog_name, H5_VERS_MAJOR, H5_VERS_MINOR, H5_VERS_RELEASE); + exit(0); + } else if ('b'==argv[argno][1]) { + blk_size = get_size (prog_name, &argno, argc, argv); + } else if ('m'==argv[argno][1]) { + dst_size = get_size (prog_name, &argno, argc, argv); + } else { + usage (prog_name); + } + } + + /* + * Get the name for the source file and open the first member. The size + * of the first member determines the logical size of all the members. + */ + if (argno>=argc) usage (prog_name); + src_gen_name = argv[argno++]; + sprintf (src_name, src_gen_name, src_membno); + src_is_family = strcmp (src_name, src_gen_name); + if ((src=open (src_name, O_RDONLY))<0) { + perror (src_name); + exit (1); + } + if (fstat (src, &sb)<0) { + perror ("fstat"); + exit (1); + } + src_size = src_act_size = sb.st_size; + if (verbose) fprintf (stderr, "< %s\n", src_name); + + /* + * Get the name for the destination file and open the first member. + */ + if (argno>=argc) usage (prog_name); + dst_gen_name = argv[argno++]; + sprintf (dst_name, dst_gen_name, dst_membno); + dst_is_family = strcmp (dst_name, dst_gen_name); + if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { + perror (dst_name); + exit (1); + } + if (verbose) fprintf (stderr, "> %s\n", dst_name); + + /* No more arguments */ + if (argno<argc) usage (prog_name); + + /* Now the real work, split the file */ + buf = malloc (blk_size); + while (src_offset<src_size) { + + /* Read a block. The amount to read is the minimum of: + * 1. The I/O block size + * 2. What's left to write in the destination member + * 3. Left over zeros or what's left in the source member. + */ + n = blk_size; + if (dst_is_family) n = (size_t)MIN((off_t)n, dst_size-dst_offset); + if (left_overs) { + n = MIN (n, left_overs); + left_overs -= n; + need_write = FALSE; + } else if (src_offset<src_act_size) { + n = (size_t)MIN ((off_t)n, src_act_size-src_offset); + if ((nio=read (src, buf, n))<0) { + perror ("read"); + exit (1); + } else if ((size_t)nio!=n) { + fprintf (stderr, "%s: short read\n", src_name); + exit (1); + } + for (i=0; i<n; i++) { + if (buf[i]) break; + } + need_write = (i<n); + } else { + n = 0; + left_overs = src_size - src_act_size; + need_write = FALSE; + } + + /* + * If the block contains non-zero data then write it to the + * destination, otherwise just remember that we'll have to do a seek + * later in the destination when we finally get non-zero data. + */ + if (need_write) { + if (need_seek && lseek (dst, dst_offset, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if ((nio=write (dst, buf, n))<0) { + perror ("write"); + exit (1); + } else if ((size_t)nio!=n) { + fprintf (stderr, "%s: short write\n", dst_name); + exit (1); + } + need_seek = FALSE; + } else { + need_seek = TRUE; + } + + /* + * Update the source offset and open the next source family member if + * necessary. The source stream ends at the first member which + * cannot be opened because it doesn't exist. At the end of the + * source stream, update the destination offset and break out of the + * loop. The destination offset must be updated so we can fix + * trailing holes. + */ + src_offset += n; + if (src_offset==src_act_size) { + close (src); + if (!src_is_family) { + dst_offset += n; + break; + } + sprintf (src_name, src_gen_name, ++src_membno); + if ((src=open (src_name, O_RDONLY))<0 && ENOENT==errno) { + dst_offset += n; + break; + } else if (src<0) { + perror (src_name); + exit (1); + } + if (fstat (src, &sb)<0) { + perror ("fstat"); + exit (1); + } + src_act_size = sb.st_size; + if (src_act_size>src_size) { + fprintf (stderr, "%s: member truncated to %lu bytes\n", + src_name, (unsigned long)src_size); + } + src_offset = 0; + if (verbose) fprintf (stderr, "< %s\n", src_name); + } + + /* + * Update the destination offset, opening a new member if one will be + * needed. The first member is extended to the logical member size + * but other members might be smaller if they end with a hole. + */ + dst_offset += n; + if (dst_is_family && dst_offset==dst_size) { + if (0==dst_membno) { + if (lseek (dst, dst_size-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (read (dst, buf, 1)<0) { + perror ("read"); + exit (1); + } + if (lseek (dst, dst_size-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (write (dst, buf, 1)<0) { + perror ("write"); + exit (1); + } + } + close (dst); + sprintf (dst_name, dst_gen_name, ++dst_membno); + if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) { + perror (dst_name); + exit (1); + } + dst_offset = 0; + need_seek = FALSE; + if (verbose) fprintf (stderr, "> %s\n", dst_name); + } + } + + /* + * Make sure the last family member is the right size and then close it. + * The last member can't end with a hole or hdf5 will think that the + * family has been truncated. + */ + if (need_seek) { + if (lseek (dst, dst_offset-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (read (dst, buf, 1)<0) { + perror ("read"); + exit (1); + } + if (lseek (dst, dst_offset-1, SEEK_SET)<0) { + perror ("lseek"); + exit (1); + } + if (write (dst, buf, 1)<0) { + perror ("write"); + exit (1); + } + } + close (dst); + + /* Free resources and return */ + free (buf); + return 0; +} diff --git a/tools/misc/pdb2hdf.c b/tools/misc/pdb2hdf.c new file mode 100644 index 0000000..7ecd28e --- /dev/null +++ b/tools/misc/pdb2hdf.c @@ -0,0 +1,503 @@ +/* + * Copyright © 1999 NCSA + * All rights reserved. + * + * Programmer: Robb Matzke <matzke@llnl.gov> + * Tuesday, October 12, 1999 + * + * Purpose: Creates an HDF5 file from a PDB file. The raw data can be + * left in the PDB file, creating an HDF5 file that contains + * meta data that points into the PDB file. + */ +#include <assert.h> +#include <hdf5.h> +#include <pdb.h> +#include <score.h> +#include <stdio.h> +#include <string.h> + +/* + * libsilo renames all the PDB functions. However, this source files uses + * their documented names, so we have #define's to translate them to Silo + * terminology. + */ +#ifdef H5_HAVE_LIBSILO +# define PD_open lite_PD_open +# define PD_close lite_PD_close +# define PD_ls lite_PD_ls +# define PD_cd lite_PD_cd +# define PD_inquire_entry lite_PD_inquire_entry +# define PD_read lite_PD_read +# define _PD_fixname _lite_PD_fixname +# define _PD_rl_defstr _lite_PD_rl_defstr +# define SC_free lite_SC_free +#endif + +static int verbose_g = 0; /*verbose output? */ +static int cached_g = 0; /*use core file driver? */ + + +/*------------------------------------------------------------------------- + * Function: usage + * + * Purpose: Print a usage message. + * + * Return: void + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +usage(const char *arg0) +{ + char *progname; + + if ((progname=strrchr(arg0, '/')) && progname[1]) progname++; + else progname = arg0; + + fprintf(stderr, "\ +usage: %s [OPTIONS] [PDBFILE ...]\n\ + OPTIONS\n\ + -h, -?, --help Print a usage message and exit\n\ + -c, --cached Cache all data in memory before writing the output\n\ + -v, --verbose Print the name of each object processed\n\ + -V, --version Show the version number of this program\n\ +\n\ + The options and PDB file names may be interspersed and are processed from\n\ + left to right.\n\ +\n\ + The name of the HDF5 file is generated by taking the basename of the PDB\n\ + file and replacing the last extension (or appending if no extension) with\n\ + the characters \".h5\". For example, \"/tmp/test/eos.data\" would result\n\ + in an HDF5 file called \"eos.h5\" in the current directory.\n", + progname); + +} + + +/*------------------------------------------------------------------------- + * Function: version + * + * Purpose: Print the version number. + * + * Return: void + * + * Programmer: Robb Matzke + * Friday, October 15, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +version(const char *arg0) +{ + const char *progname; + + if ((progname=strrchr(arg0, '/')) && progname[1]) progname++; + else progname = arg0; + print_version(progname); +} + + +/*------------------------------------------------------------------------- + * Function: fix_name + * + * Purpose: Given a PDB file name create the corresponding HDF5 file + * name. This is done by taking the base name of the PDB file + * and replacing (or appending) the last extension with ".h5". + * + * Return: Success: HDF_NAME + * + * Failure: NULL + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static char * +fix_name(const char *pdb_name, char *hdf_name, size_t size) +{ + char *s; + const char *ext; + + if (!pdb_name || !hdf_name) return NULL; + if ((s=strrchr(pdb_name, '/'))) pdb_name = s; + if (NULL==(ext=strrchr(pdb_name, '.'))) ext = pdb_name + strlen(pdb_name); + if ((size_t)((ext-pdb_name)+4) > size) return NULL; /*overflow*/ + memcpy(hdf_name, pdb_name, ext-pdb_name); + strcpy(hdf_name+(ext-pdb_name), ".h5"); + return hdf_name; +} + + +/*------------------------------------------------------------------------- + * Function: fix_type + * + * Purpose: Given a PDB datatype return a corresponding hdf5 datatype. + * The hdf5 datatype should be closed when the caller is + * finished using it. + * + * Return: Success: HDF5 datatype + * + * Failure: negative + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static hid_t +fix_type(PDBfile *pdb, const char *s) +{ + hid_t type = -1; + defstr *d = _lite_PD_lookup_type((char*)s, pdb->chart); + + /* PDB checking */ + assert(d); + assert(d->size>0); + if (d->onescmp) return -1; + + + if (!strcmp(s, "char")) { + /* + * Character datatypes. Use whatever sign the native system uses by + * default. + */ + type = H5Tcopy(H5T_NATIVE_CHAR); + + } else if (!strcmp(s, "integer")) { + /* + * Integer datatypes. PDB supports various sizes of signed or + * unsigned integers. + */ + type = H5Tcopy(d->unsgned?H5T_NATIVE_UINT:H5T_NATIVE_INT); + H5Tset_size(type, d->size); + H5Tset_precision(type, 8*d->size); + assert(NORMAL_ORDER==d->order_flag || REVERSE_ORDER==d->order_flag); + H5Tset_order(type, + NORMAL_ORDER==d->order_flag?H5T_ORDER_BE:H5T_ORDER_LE); + + } else if (!strcmp(s, "float") || !strcmp(s, "double")) { + /* + * Floating-point datatypes + */ + size_t nbits, spos, epos, esize, mpos, msize; + + type = H5Tcopy(H5T_NATIVE_FLOAT); + H5Tset_size(type, d->size); + H5Tset_precision(type, 8*d->size); + assert(d->order); + H5Tset_order(type, 1==d->order[0]?H5T_ORDER_BE:H5T_ORDER_LE); + + /* + * format[0] = # of bits per number + * format[1] = # of bits in exponent + * format[2] = # of bits in mantissa + * format[3] = start bit of sign + * format[4] = start bit of exponent + * format[5] = start bit of mantissa + * format[6] = high order mantissa bit (CRAY needs this) + * format[7] = bias of exponent + */ + assert(d->format && d->format[0] == 8*d->size); + nbits = d->format[0]; + spos = nbits - (d->format[3]+1); + esize = d->format[1]; + epos = nbits - (d->format[4]+esize); + msize = d->format[2]; + mpos = nbits - (d->format[5]+msize); + H5Tset_fields(type, spos, epos, esize, mpos, msize); + H5Tset_ebias(type, d->format[7]); + } + return type; +} + + +/*------------------------------------------------------------------------- + * Function: fix_space + * + * Purpose: Convert a PDB dimension list into an HDF5 data space. + * + * Return: Success: HDF5 data space + * + * Failure: negative + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static hid_t +fix_space(const dimdes *dim) +{ + hsize_t size[H5S_MAX_RANK]; + int rank; + + for (rank=0; rank<H5S_MAX_RANK && dim; rank++, dim=dim->next) { + size[rank] = dim->number; + } + if (rank>=H5S_MAX_RANK) return -1; + return H5Screate_simple(rank, size, NULL); +} + + +/*------------------------------------------------------------------------- + * Function: fix_external + * + * Purpose: Sets the external file information for a dataset creation + * property list based on information from PDB. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static int +fix_external(hid_t dcpl, const char *pdb_file_name, long nelmts, + hsize_t elmt_size, symblock *block) +{ + int i; + + for (i=0; nelmts>0; i++) { + hsize_t nbytes = block[i].number * elmt_size; + H5Pset_external(dcpl, pdb_file_name, block[i].diskaddr, nbytes); + nelmts -= block[i].number; + } + return 0; +} + + +/*------------------------------------------------------------------------- + * Function: traverse + * + * Purpose: Traverse the current working directory of the PDB file. + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static int +traverse(PDBfile *pdb, const char *pdb_file_name, hid_t hdf) +{ + int nitems, i, in_subdir=FALSE; + char **list=NULL; + hid_t group=-1, h_type=-1, h_space=-1, dset=-1, dcpl=-1; + hsize_t elmt_size; + const syment *ep=NULL; + + if (NULL==(list=PD_ls(pdb, ".", NULL, &nitems))) { + fprintf(stderr, "cannot obtain PDB directory contents\n"); + goto error; + } + + for (i=0; i<nitems; i++) { + ep = PD_inquire_entry(pdb, list[i], TRUE, NULL); + if (verbose_g) { + printf("%s %s\n", _PD_fixname(pdb, list[i]), ep->type); + fflush(stdout); + } + + + if ('/'==list[i][strlen(list[i])-1]) { + /* + * This is a PDB directory. Make a corresponding HDF5 group and + * traverse into that PDB directory and HDF5 group + */ + if ((group=H5Gcreate(hdf, list[i], 0))<0) { + fprintf(stderr, "cannot create HDF group %s\n", list[i]); + goto error; + } + if (!PD_cd(pdb, list[i])) { + fprintf(stderr, "cannot cd into PDB directory %s\n", list[i]); + goto error; + } else { + in_subdir = TRUE; + } + + traverse(pdb, pdb_file_name, group); + if (!PD_cd(pdb, "..")) { + fprintf(stderr, "cannot traverse out of PDB %s\n", list[i]); + goto error; + } + H5Gclose(group); + + } else { + /* This is some non-directory PDB object */ + + /* Create an HDF5 datatype from the PDB type */ + if ((h_type=fix_type(pdb, ep->type))<0) { + fprintf(stderr, "cannot create datatype for %s (%s)\n", + list[i], ep->type); + continue; + } + elmt_size = H5Tget_size(h_type); + + /* Create an HDF5 dataspace from the PDB dimensions */ + if ((h_space=fix_space(ep->dimensions))<0) { + fprintf(stderr, "cannot create datatype for %s\n", list[i]); + continue; + } + + /* Create pointers to the external PDB data */ + dcpl = H5Pcreate(H5P_DATASET_CREATE); + fix_external(dcpl, pdb_file_name, ep->number, elmt_size, + ep->blocks); + + /* Create the dataset */ + if ((dset=H5Dcreate(hdf, list[i], h_type, h_space, dcpl))<0) { + fprintf(stderr, "cannot create dataset for %s\n", list[i]); + } + + H5Pclose(dcpl); + H5Dclose(dset); + H5Sclose(h_space); + H5Tclose(h_type); + } + + } + + for (i=0; i<nitems; i++) { + SC_free(list[i]); + } + SC_free(list); + return 0; + + error: + if (group>=0) H5Gclose(group); + if (in_subdir) PD_cd(pdb, ".."); + if (list) { + for (i=0; i<nitems; i++) { + SC_free(list[i]); + } + SC_free(list); + } + return -1; +} + + +/*------------------------------------------------------------------------- + * Function: main + * + * Purpose: Create an HDF5 file from a PDB file. + * + * Return: Success: 0 + * + * Failure: non-zero + * + * Programmer: Robb Matzke + * Tuesday, October 12, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +main(int argc, char *argv[]) +{ + int argno; + char _hdf_name[512], *hdf_name, *pdb_name, *s; + PDBfile *pdb; + hid_t hdf, fapl; + + /* Print a help message if called with no arguments */ + if (1==argc) { + usage(argv[0]); + exit(1); + } + + /* Process arguments in order; switches interspersed with files */ + for (argno=1; argno<argc; argno++) { + if (!strcmp("--help", argv[argno])) { + usage(argv[0]); + exit(1); + } else if (!strcmp("--verbose", argv[argno])) { + verbose_g++; + } else if (!strcmp("--cached", argv[argno])) { + cached_g++; + } else if (!strcmp("--version", argv[argno])) { + version(argv[0]); + } else if ('-'==argv[argno][0] && '-'!=argv[argno][1]) { + for (s=argv[argno]+1; *s; s++) { + switch (*s) { + case '?': + case 'h': /*--help*/ + usage(argv[0]); + exit(0); + case 'c': /*--cached*/ + cached_g++; + break; + case 'v': /*--verbose*/ + verbose_g++; + break; + case 'V': /*--version*/ + version(argv[0]); + break; + default: + usage(argv[0]); + exit(1); + } + } + } else if ('-'==argv[argno][0]) { + usage(argv[0]); + exit(1); + } else { + /* This must be a file name. Process it. */ + fapl = H5Pcreate(H5P_FILE_ACCESS); + if (cached_g) H5Pset_fapl_core(fapl, 1024*1024, TRUE); + + pdb_name = argv[argno]; + hdf_name = fix_name(argv[argno], _hdf_name, sizeof _hdf_name); + if (NULL==(pdb=PD_open(pdb_name, "r"))) { + fprintf(stderr, "%s: unable to open PDB file\n", pdb_name); + exit(1); + } + if ((hdf=H5Fcreate(hdf_name, H5F_ACC_TRUNC, H5P_DEFAULT, + fapl))<0) { + fprintf(stderr, "%s: unable to open HDF file\n", hdf_name); + exit(1); + } + H5Pclose(fapl); + + /* + * Traverse the PDB file to create the HDF5 file. + */ + traverse(pdb, pdb_name, hdf); + + /* Close the files */ + if (!PD_close(pdb)) { + fprintf(stderr, "%s: problems closing PDB file\n", pdb_name); + exit(1); + } + if (H5Fclose(hdf)<0) { + fprintf(stderr, "%s: problems closing HDF file\n", hdf_name); + exit(1); + } + } + } + return 0; +} |