summaryrefslogtreecommitdiffstats
path: root/tools/pdb2hdf.c
blob: 7ecd28e573f5f36c38145d2bdb7ed7d85cafa7a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
/*
 * Copyright ©  1999 NCSA
 *              All rights reserved.
 *
 * Programmer:  Robb Matzke <matzke@llnl.gov>
 *              Tuesday, October 12, 1999
 *
 * Purpose:	Creates an HDF5 file from a PDB file.  The raw data can be
 *		left in the PDB file, creating an HDF5 file that contains
 *		meta data that points into the PDB file.
 */
#include <assert.h>
#include <hdf5.h>
#include <pdb.h>
#include <score.h>
#include <stdio.h>
#include <string.h>

/*
 * libsilo renames all the PDB functions. However, this source files uses
 * their documented names, so we have #define's to translate them to Silo
 * terminology.
 */
#ifdef H5_HAVE_LIBSILO
#   define PD_open			lite_PD_open
#   define PD_close			lite_PD_close
#   define PD_ls			lite_PD_ls
#   define PD_cd			lite_PD_cd
#   define PD_inquire_entry		lite_PD_inquire_entry
#   define PD_read			lite_PD_read
#   define _PD_fixname			_lite_PD_fixname
#   define _PD_rl_defstr		_lite_PD_rl_defstr
#   define SC_free			lite_SC_free
#endif

static int verbose_g = 0;		/*verbose output?		*/
static int cached_g = 0;		/*use core file driver?		*/


/*-------------------------------------------------------------------------
 * Function:	usage
 *
 * Purpose:	Print a usage message.
 *
 * Return:	void
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static void
usage(const char *arg0)
{
    char	*progname;

    if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
    else progname = arg0;

    fprintf(stderr, "\
usage: %s [OPTIONS] [PDBFILE ...]\n\
   OPTIONS\n\
      -h, -?, --help   Print a usage message and exit\n\
      -c, --cached     Cache all data in memory before writing the output\n\
      -v, --verbose    Print the name of each object processed\n\
      -V, --version    Show the version number of this program\n\
\n\
   The options and PDB file names may be interspersed and are processed from\n\
   left to right.\n\
\n\
   The name of the HDF5 file is generated by taking the basename of the PDB\n\
   file and replacing the last extension (or appending if no extension) with\n\
   the characters \".h5\". For example, \"/tmp/test/eos.data\" would result\n\
   in an HDF5 file called \"eos.h5\" in the current directory.\n",
	    progname);
    
}


/*-------------------------------------------------------------------------
 * Function:	version
 *
 * Purpose:	Print the version number.
 *
 * Return:	void
 *
 * Programmer:	Robb Matzke
 *              Friday, October 15, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static void
version(const char *arg0)
{
    const char	*progname;

    if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
    else progname = arg0;
    print_version(progname);
}


/*-------------------------------------------------------------------------
 * Function:	fix_name
 *
 * Purpose:	Given a PDB file name create the corresponding HDF5 file
 *		name. This is done by taking the base name of the PDB file
 *		and replacing (or appending) the last extension with ".h5".
 *
 * Return:	Success:	HDF_NAME
 *
 *		Failure:	NULL
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static char *
fix_name(const char *pdb_name, char *hdf_name, size_t size)
{
    char	*s;
    const char	*ext;
    
    if (!pdb_name || !hdf_name) return NULL;
    if ((s=strrchr(pdb_name, '/'))) pdb_name = s;
    if (NULL==(ext=strrchr(pdb_name, '.'))) ext = pdb_name + strlen(pdb_name);
    if ((size_t)((ext-pdb_name)+4) > size) return NULL; /*overflow*/
    memcpy(hdf_name, pdb_name, ext-pdb_name);
    strcpy(hdf_name+(ext-pdb_name), ".h5");
    return hdf_name;
}


/*-------------------------------------------------------------------------
 * Function:	fix_type
 *
 * Purpose:	Given a PDB datatype return a corresponding hdf5 datatype.
 *		The hdf5 datatype should be closed when the caller is
 *		finished using it.
 *
 * Return:	Success:	HDF5 datatype
 *
 *		Failure:	negative
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static hid_t
fix_type(PDBfile *pdb, const char *s)
{
    hid_t	type = -1;
    defstr 	*d = _lite_PD_lookup_type((char*)s, pdb->chart);

    /* PDB checking */
    assert(d);
    assert(d->size>0);
    if (d->onescmp) return -1;
    
    
    if (!strcmp(s, "char")) {
	/*
	 * Character datatypes. Use whatever sign the native system uses by
	 * default.
	 */
	type = H5Tcopy(H5T_NATIVE_CHAR);
	
    } else if (!strcmp(s, "integer")) {
	/*
	 * Integer datatypes. PDB supports various sizes of signed or
	 * unsigned integers.
	 */
	type = H5Tcopy(d->unsgned?H5T_NATIVE_UINT:H5T_NATIVE_INT);
	H5Tset_size(type, d->size);
	H5Tset_precision(type, 8*d->size);
	assert(NORMAL_ORDER==d->order_flag || REVERSE_ORDER==d->order_flag);
	H5Tset_order(type,
		     NORMAL_ORDER==d->order_flag?H5T_ORDER_BE:H5T_ORDER_LE);
	
    } else if (!strcmp(s, "float") || !strcmp(s, "double")) {
	/*
	 * Floating-point datatypes
	 */
	size_t	nbits, spos, epos, esize, mpos, msize;

	type = H5Tcopy(H5T_NATIVE_FLOAT);
	H5Tset_size(type, d->size);
	H5Tset_precision(type, 8*d->size);
	assert(d->order);
	H5Tset_order(type, 1==d->order[0]?H5T_ORDER_BE:H5T_ORDER_LE);
	
	/*
	 * format[0] = # of bits per number                     
	 * format[1] = # of bits in exponent                    
	 * format[2] = # of bits in mantissa                    
	 * format[3] = start bit of sign                        
	 * format[4] = start bit of exponent                    
	 * format[5] = start bit of mantissa                    
	 * format[6] = high order mantissa bit (CRAY needs this)
	 * format[7] = bias of exponent
	 */
	assert(d->format && d->format[0] == 8*d->size);
	nbits = d->format[0];
	spos = nbits - (d->format[3]+1);
	esize = d->format[1];
	epos = nbits - (d->format[4]+esize);
	msize = d->format[2];
	mpos = nbits - (d->format[5]+msize);
	H5Tset_fields(type, spos, epos, esize, mpos, msize);
	H5Tset_ebias(type, d->format[7]);
    }
    return type;
}


/*-------------------------------------------------------------------------
 * Function:	fix_space
 *
 * Purpose:	Convert a PDB dimension list into an HDF5 data space.
 *
 * Return:	Success:	HDF5 data space
 *
 *		Failure:	negative
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static hid_t
fix_space(const dimdes *dim)
{
    hsize_t	size[H5S_MAX_RANK];
    int		rank;

    for (rank=0; rank<H5S_MAX_RANK && dim; rank++, dim=dim->next) {
	size[rank] = dim->number;
    }
    if (rank>=H5S_MAX_RANK) return -1;
    return H5Screate_simple(rank, size, NULL);
}


/*-------------------------------------------------------------------------
 * Function:	fix_external
 *
 * Purpose:	Sets the external file information for a dataset creation
 *		property list based on information from PDB.
 *
 * Return:	Success:	non-negative
 *
 *		Failure:	negative
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static int
fix_external(hid_t dcpl, const char *pdb_file_name, long nelmts,
	     hsize_t elmt_size, symblock *block)
{
    int		i;
    
    for (i=0; nelmts>0; i++) {
	hsize_t nbytes = block[i].number * elmt_size;
	H5Pset_external(dcpl, pdb_file_name, block[i].diskaddr, nbytes);
	nelmts -= block[i].number;
    }
    return 0;
}


/*-------------------------------------------------------------------------
 * Function:	traverse
 *
 * Purpose:	Traverse the current working directory of the PDB file.
 *
 * Return:	Success:	0
 *
 *		Failure:	-1
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static int
traverse(PDBfile *pdb, const char *pdb_file_name, hid_t hdf)
{
    int			nitems, i, in_subdir=FALSE;
    char		**list=NULL;
    hid_t		group=-1, h_type=-1, h_space=-1, dset=-1, dcpl=-1;
    hsize_t		elmt_size;
    const syment	*ep=NULL;

    if (NULL==(list=PD_ls(pdb, ".", NULL, &nitems))) {
	fprintf(stderr, "cannot obtain PDB directory contents\n");
	goto error;
    }

    for (i=0; i<nitems; i++) {
	ep = PD_inquire_entry(pdb, list[i], TRUE, NULL);
	if (verbose_g) {
	    printf("%s %s\n", _PD_fixname(pdb, list[i]), ep->type);
	    fflush(stdout);
	}
	

	if ('/'==list[i][strlen(list[i])-1]) {
	    /*
	     * This is a PDB directory. Make a corresponding HDF5 group and
	     * traverse into that PDB directory and HDF5 group
	     */
	    if ((group=H5Gcreate(hdf, list[i], 0))<0) {
		fprintf(stderr, "cannot create HDF group %s\n", list[i]);
		goto error;
	    }
	    if (!PD_cd(pdb, list[i])) {
		fprintf(stderr, "cannot cd into PDB directory %s\n", list[i]);
		goto error;
	    } else {
		in_subdir = TRUE;
	    }
	    
	    traverse(pdb, pdb_file_name, group);
	    if (!PD_cd(pdb, "..")) {
		fprintf(stderr, "cannot traverse out of PDB %s\n", list[i]);
		goto error;
	    }
	    H5Gclose(group);
	    
	} else {
	    /* This is some non-directory PDB object */

	    /* Create an HDF5 datatype from the PDB type */
	    if ((h_type=fix_type(pdb, ep->type))<0) {
		fprintf(stderr, "cannot create datatype for %s (%s)\n",
		       list[i], ep->type);
		continue;
	    }
	    elmt_size = H5Tget_size(h_type);

	    /* Create an HDF5 dataspace from the PDB dimensions */
	    if ((h_space=fix_space(ep->dimensions))<0) {
		fprintf(stderr, "cannot create datatype for %s\n", list[i]);
		continue;
	    }

	    /* Create pointers to the external PDB data */
	    dcpl = H5Pcreate(H5P_DATASET_CREATE);
	    fix_external(dcpl, pdb_file_name, ep->number, elmt_size,
			 ep->blocks);

	    /* Create the dataset */
	    if ((dset=H5Dcreate(hdf, list[i], h_type, h_space, dcpl))<0) {
		fprintf(stderr, "cannot create dataset for %s\n", list[i]);
	    }

	    H5Pclose(dcpl);
	    H5Dclose(dset);
	    H5Sclose(h_space);
	    H5Tclose(h_type);
	}
	
    }

    for (i=0; i<nitems; i++) {
	SC_free(list[i]);
    }
    SC_free(list);
    return 0;

 error:
    if (group>=0) H5Gclose(group);
    if (in_subdir) PD_cd(pdb, "..");
    if (list) {
	for (i=0; i<nitems; i++) {
	    SC_free(list[i]);
	}
	SC_free(list);
    }
    return -1;
}


/*-------------------------------------------------------------------------
 * Function:	main
 *
 * Purpose:	Create an HDF5 file from a PDB file.
 *
 * Return:	Success:	0
 *
 *		Failure:	non-zero
 *
 * Programmer:	Robb Matzke
 *              Tuesday, October 12, 1999
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
int
main(int argc, char *argv[])
{
    int		argno;
    char	_hdf_name[512], *hdf_name, *pdb_name, *s;
    PDBfile	*pdb;
    hid_t	hdf, fapl;

    /* Print a help message if called with no arguments */
    if (1==argc) {
	usage(argv[0]);
	exit(1);
    }

    /* Process arguments in order; switches interspersed with files */
    for (argno=1; argno<argc; argno++) {
	if (!strcmp("--help", argv[argno])) {
	    usage(argv[0]);
	    exit(1);
	} else if (!strcmp("--verbose", argv[argno])) {
	    verbose_g++;
	} else if (!strcmp("--cached", argv[argno])) {
	    cached_g++;
	} else if (!strcmp("--version", argv[argno])) {
	    version(argv[0]);
	} else if ('-'==argv[argno][0] && '-'!=argv[argno][1]) {
	    for (s=argv[argno]+1; *s; s++) {
		switch (*s) {
		case '?':
		case 'h':		/*--help*/
		    usage(argv[0]);
		    exit(0);
		case 'c':		/*--cached*/
		    cached_g++;
		    break;
		case 'v':		/*--verbose*/
		    verbose_g++;
		    break;
		case 'V':		/*--version*/
		    version(argv[0]);
		    break;
		default:
		    usage(argv[0]);
		    exit(1);
		}
	    }
	} else if ('-'==argv[argno][0]) {
	    usage(argv[0]);
	    exit(1);
	} else {
	    /* This must be a file name. Process it. */
	    fapl = H5Pcreate(H5P_FILE_ACCESS);
	    if (cached_g) H5Pset_fapl_core(fapl, 1024*1024, TRUE);

	    pdb_name = argv[argno];
	    hdf_name = fix_name(argv[argno], _hdf_name, sizeof _hdf_name);
	    if (NULL==(pdb=PD_open(pdb_name, "r"))) {
		fprintf(stderr, "%s: unable to open PDB file\n", pdb_name);
		exit(1);
	    }
	    if ((hdf=H5Fcreate(hdf_name, H5F_ACC_TRUNC, H5P_DEFAULT,
			       fapl))<0) {
		fprintf(stderr, "%s: unable to open HDF file\n", hdf_name);
		exit(1);
	    }
	    H5Pclose(fapl);
	    
	    /*
	     * Traverse the PDB file to create the HDF5 file.
	     */
	    traverse(pdb, pdb_name, hdf);

	    /* Close the files */
	    if (!PD_close(pdb)) {
		fprintf(stderr, "%s: problems closing PDB file\n", pdb_name);
		exit(1);
	    }
	    if (H5Fclose(hdf)<0) {
		fprintf(stderr, "%s: problems closing HDF file\n", hdf_name);
		exit(1);
	    }
	}
    }
    return 0;
}