summaryrefslogtreecommitdiffstats
path: root/tools/misc/pdb2hdf.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/misc/pdb2hdf.c')
-rw-r--r--tools/misc/pdb2hdf.c503
1 files changed, 503 insertions, 0 deletions
diff --git a/tools/misc/pdb2hdf.c b/tools/misc/pdb2hdf.c
new file mode 100644
index 0000000..7ecd28e
--- /dev/null
+++ b/tools/misc/pdb2hdf.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright © 1999 NCSA
+ * All rights reserved.
+ *
+ * Programmer: Robb Matzke <matzke@llnl.gov>
+ * Tuesday, October 12, 1999
+ *
+ * Purpose: Creates an HDF5 file from a PDB file. The raw data can be
+ * left in the PDB file, creating an HDF5 file that contains
+ * meta data that points into the PDB file.
+ */
+#include <assert.h>
+#include <hdf5.h>
+#include <pdb.h>
+#include <score.h>
+#include <stdio.h>
+#include <string.h>
+
+/*
+ * libsilo renames all the PDB functions. However, this source files uses
+ * their documented names, so we have #define's to translate them to Silo
+ * terminology.
+ */
+#ifdef H5_HAVE_LIBSILO
+# define PD_open lite_PD_open
+# define PD_close lite_PD_close
+# define PD_ls lite_PD_ls
+# define PD_cd lite_PD_cd
+# define PD_inquire_entry lite_PD_inquire_entry
+# define PD_read lite_PD_read
+# define _PD_fixname _lite_PD_fixname
+# define _PD_rl_defstr _lite_PD_rl_defstr
+# define SC_free lite_SC_free
+#endif
+
+static int verbose_g = 0; /*verbose output? */
+static int cached_g = 0; /*use core file driver? */
+
+
+/*-------------------------------------------------------------------------
+ * Function: usage
+ *
+ * Purpose: Print a usage message.
+ *
+ * Return: void
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+usage(const char *arg0)
+{
+ char *progname;
+
+ if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
+ else progname = arg0;
+
+ fprintf(stderr, "\
+usage: %s [OPTIONS] [PDBFILE ...]\n\
+ OPTIONS\n\
+ -h, -?, --help Print a usage message and exit\n\
+ -c, --cached Cache all data in memory before writing the output\n\
+ -v, --verbose Print the name of each object processed\n\
+ -V, --version Show the version number of this program\n\
+\n\
+ The options and PDB file names may be interspersed and are processed from\n\
+ left to right.\n\
+\n\
+ The name of the HDF5 file is generated by taking the basename of the PDB\n\
+ file and replacing the last extension (or appending if no extension) with\n\
+ the characters \".h5\". For example, \"/tmp/test/eos.data\" would result\n\
+ in an HDF5 file called \"eos.h5\" in the current directory.\n",
+ progname);
+
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: version
+ *
+ * Purpose: Print the version number.
+ *
+ * Return: void
+ *
+ * Programmer: Robb Matzke
+ * Friday, October 15, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+version(const char *arg0)
+{
+ const char *progname;
+
+ if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
+ else progname = arg0;
+ print_version(progname);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: fix_name
+ *
+ * Purpose: Given a PDB file name create the corresponding HDF5 file
+ * name. This is done by taking the base name of the PDB file
+ * and replacing (or appending) the last extension with ".h5".
+ *
+ * Return: Success: HDF_NAME
+ *
+ * Failure: NULL
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static char *
+fix_name(const char *pdb_name, char *hdf_name, size_t size)
+{
+ char *s;
+ const char *ext;
+
+ if (!pdb_name || !hdf_name) return NULL;
+ if ((s=strrchr(pdb_name, '/'))) pdb_name = s;
+ if (NULL==(ext=strrchr(pdb_name, '.'))) ext = pdb_name + strlen(pdb_name);
+ if ((size_t)((ext-pdb_name)+4) > size) return NULL; /*overflow*/
+ memcpy(hdf_name, pdb_name, ext-pdb_name);
+ strcpy(hdf_name+(ext-pdb_name), ".h5");
+ return hdf_name;
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: fix_type
+ *
+ * Purpose: Given a PDB datatype return a corresponding hdf5 datatype.
+ * The hdf5 datatype should be closed when the caller is
+ * finished using it.
+ *
+ * Return: Success: HDF5 datatype
+ *
+ * Failure: negative
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static hid_t
+fix_type(PDBfile *pdb, const char *s)
+{
+ hid_t type = -1;
+ defstr *d = _lite_PD_lookup_type((char*)s, pdb->chart);
+
+ /* PDB checking */
+ assert(d);
+ assert(d->size>0);
+ if (d->onescmp) return -1;
+
+
+ if (!strcmp(s, "char")) {
+ /*
+ * Character datatypes. Use whatever sign the native system uses by
+ * default.
+ */
+ type = H5Tcopy(H5T_NATIVE_CHAR);
+
+ } else if (!strcmp(s, "integer")) {
+ /*
+ * Integer datatypes. PDB supports various sizes of signed or
+ * unsigned integers.
+ */
+ type = H5Tcopy(d->unsgned?H5T_NATIVE_UINT:H5T_NATIVE_INT);
+ H5Tset_size(type, d->size);
+ H5Tset_precision(type, 8*d->size);
+ assert(NORMAL_ORDER==d->order_flag || REVERSE_ORDER==d->order_flag);
+ H5Tset_order(type,
+ NORMAL_ORDER==d->order_flag?H5T_ORDER_BE:H5T_ORDER_LE);
+
+ } else if (!strcmp(s, "float") || !strcmp(s, "double")) {
+ /*
+ * Floating-point datatypes
+ */
+ size_t nbits, spos, epos, esize, mpos, msize;
+
+ type = H5Tcopy(H5T_NATIVE_FLOAT);
+ H5Tset_size(type, d->size);
+ H5Tset_precision(type, 8*d->size);
+ assert(d->order);
+ H5Tset_order(type, 1==d->order[0]?H5T_ORDER_BE:H5T_ORDER_LE);
+
+ /*
+ * format[0] = # of bits per number
+ * format[1] = # of bits in exponent
+ * format[2] = # of bits in mantissa
+ * format[3] = start bit of sign
+ * format[4] = start bit of exponent
+ * format[5] = start bit of mantissa
+ * format[6] = high order mantissa bit (CRAY needs this)
+ * format[7] = bias of exponent
+ */
+ assert(d->format && d->format[0] == 8*d->size);
+ nbits = d->format[0];
+ spos = nbits - (d->format[3]+1);
+ esize = d->format[1];
+ epos = nbits - (d->format[4]+esize);
+ msize = d->format[2];
+ mpos = nbits - (d->format[5]+msize);
+ H5Tset_fields(type, spos, epos, esize, mpos, msize);
+ H5Tset_ebias(type, d->format[7]);
+ }
+ return type;
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: fix_space
+ *
+ * Purpose: Convert a PDB dimension list into an HDF5 data space.
+ *
+ * Return: Success: HDF5 data space
+ *
+ * Failure: negative
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static hid_t
+fix_space(const dimdes *dim)
+{
+ hsize_t size[H5S_MAX_RANK];
+ int rank;
+
+ for (rank=0; rank<H5S_MAX_RANK && dim; rank++, dim=dim->next) {
+ size[rank] = dim->number;
+ }
+ if (rank>=H5S_MAX_RANK) return -1;
+ return H5Screate_simple(rank, size, NULL);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: fix_external
+ *
+ * Purpose: Sets the external file information for a dataset creation
+ * property list based on information from PDB.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+fix_external(hid_t dcpl, const char *pdb_file_name, long nelmts,
+ hsize_t elmt_size, symblock *block)
+{
+ int i;
+
+ for (i=0; nelmts>0; i++) {
+ hsize_t nbytes = block[i].number * elmt_size;
+ H5Pset_external(dcpl, pdb_file_name, block[i].diskaddr, nbytes);
+ nelmts -= block[i].number;
+ }
+ return 0;
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: traverse
+ *
+ * Purpose: Traverse the current working directory of the PDB file.
+ *
+ * Return: Success: 0
+ *
+ * Failure: -1
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+traverse(PDBfile *pdb, const char *pdb_file_name, hid_t hdf)
+{
+ int nitems, i, in_subdir=FALSE;
+ char **list=NULL;
+ hid_t group=-1, h_type=-1, h_space=-1, dset=-1, dcpl=-1;
+ hsize_t elmt_size;
+ const syment *ep=NULL;
+
+ if (NULL==(list=PD_ls(pdb, ".", NULL, &nitems))) {
+ fprintf(stderr, "cannot obtain PDB directory contents\n");
+ goto error;
+ }
+
+ for (i=0; i<nitems; i++) {
+ ep = PD_inquire_entry(pdb, list[i], TRUE, NULL);
+ if (verbose_g) {
+ printf("%s %s\n", _PD_fixname(pdb, list[i]), ep->type);
+ fflush(stdout);
+ }
+
+
+ if ('/'==list[i][strlen(list[i])-1]) {
+ /*
+ * This is a PDB directory. Make a corresponding HDF5 group and
+ * traverse into that PDB directory and HDF5 group
+ */
+ if ((group=H5Gcreate(hdf, list[i], 0))<0) {
+ fprintf(stderr, "cannot create HDF group %s\n", list[i]);
+ goto error;
+ }
+ if (!PD_cd(pdb, list[i])) {
+ fprintf(stderr, "cannot cd into PDB directory %s\n", list[i]);
+ goto error;
+ } else {
+ in_subdir = TRUE;
+ }
+
+ traverse(pdb, pdb_file_name, group);
+ if (!PD_cd(pdb, "..")) {
+ fprintf(stderr, "cannot traverse out of PDB %s\n", list[i]);
+ goto error;
+ }
+ H5Gclose(group);
+
+ } else {
+ /* This is some non-directory PDB object */
+
+ /* Create an HDF5 datatype from the PDB type */
+ if ((h_type=fix_type(pdb, ep->type))<0) {
+ fprintf(stderr, "cannot create datatype for %s (%s)\n",
+ list[i], ep->type);
+ continue;
+ }
+ elmt_size = H5Tget_size(h_type);
+
+ /* Create an HDF5 dataspace from the PDB dimensions */
+ if ((h_space=fix_space(ep->dimensions))<0) {
+ fprintf(stderr, "cannot create datatype for %s\n", list[i]);
+ continue;
+ }
+
+ /* Create pointers to the external PDB data */
+ dcpl = H5Pcreate(H5P_DATASET_CREATE);
+ fix_external(dcpl, pdb_file_name, ep->number, elmt_size,
+ ep->blocks);
+
+ /* Create the dataset */
+ if ((dset=H5Dcreate(hdf, list[i], h_type, h_space, dcpl))<0) {
+ fprintf(stderr, "cannot create dataset for %s\n", list[i]);
+ }
+
+ H5Pclose(dcpl);
+ H5Dclose(dset);
+ H5Sclose(h_space);
+ H5Tclose(h_type);
+ }
+
+ }
+
+ for (i=0; i<nitems; i++) {
+ SC_free(list[i]);
+ }
+ SC_free(list);
+ return 0;
+
+ error:
+ if (group>=0) H5Gclose(group);
+ if (in_subdir) PD_cd(pdb, "..");
+ if (list) {
+ for (i=0; i<nitems; i++) {
+ SC_free(list[i]);
+ }
+ SC_free(list);
+ }
+ return -1;
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: main
+ *
+ * Purpose: Create an HDF5 file from a PDB file.
+ *
+ * Return: Success: 0
+ *
+ * Failure: non-zero
+ *
+ * Programmer: Robb Matzke
+ * Tuesday, October 12, 1999
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+int
+main(int argc, char *argv[])
+{
+ int argno;
+ char _hdf_name[512], *hdf_name, *pdb_name, *s;
+ PDBfile *pdb;
+ hid_t hdf, fapl;
+
+ /* Print a help message if called with no arguments */
+ if (1==argc) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ /* Process arguments in order; switches interspersed with files */
+ for (argno=1; argno<argc; argno++) {
+ if (!strcmp("--help", argv[argno])) {
+ usage(argv[0]);
+ exit(1);
+ } else if (!strcmp("--verbose", argv[argno])) {
+ verbose_g++;
+ } else if (!strcmp("--cached", argv[argno])) {
+ cached_g++;
+ } else if (!strcmp("--version", argv[argno])) {
+ version(argv[0]);
+ } else if ('-'==argv[argno][0] && '-'!=argv[argno][1]) {
+ for (s=argv[argno]+1; *s; s++) {
+ switch (*s) {
+ case '?':
+ case 'h': /*--help*/
+ usage(argv[0]);
+ exit(0);
+ case 'c': /*--cached*/
+ cached_g++;
+ break;
+ case 'v': /*--verbose*/
+ verbose_g++;
+ break;
+ case 'V': /*--version*/
+ version(argv[0]);
+ break;
+ default:
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+ } else if ('-'==argv[argno][0]) {
+ usage(argv[0]);
+ exit(1);
+ } else {
+ /* This must be a file name. Process it. */
+ fapl = H5Pcreate(H5P_FILE_ACCESS);
+ if (cached_g) H5Pset_fapl_core(fapl, 1024*1024, TRUE);
+
+ pdb_name = argv[argno];
+ hdf_name = fix_name(argv[argno], _hdf_name, sizeof _hdf_name);
+ if (NULL==(pdb=PD_open(pdb_name, "r"))) {
+ fprintf(stderr, "%s: unable to open PDB file\n", pdb_name);
+ exit(1);
+ }
+ if ((hdf=H5Fcreate(hdf_name, H5F_ACC_TRUNC, H5P_DEFAULT,
+ fapl))<0) {
+ fprintf(stderr, "%s: unable to open HDF file\n", hdf_name);
+ exit(1);
+ }
+ H5Pclose(fapl);
+
+ /*
+ * Traverse the PDB file to create the HDF5 file.
+ */
+ traverse(pdb, pdb_name, hdf);
+
+ /* Close the files */
+ if (!PD_close(pdb)) {
+ fprintf(stderr, "%s: problems closing PDB file\n", pdb_name);
+ exit(1);
+ }
+ if (H5Fclose(hdf)<0) {
+ fprintf(stderr, "%s: problems closing HDF file\n", hdf_name);
+ exit(1);
+ }
+ }
+ }
+ return 0;
+}