diff options
author | Pedro Vicente Nunes <pvn@hdfgroup.org> | 2007-08-07 16:19:11 (GMT) |
---|---|---|
committer | Pedro Vicente Nunes <pvn@hdfgroup.org> | 2007-08-07 16:19:11 (GMT) |
commit | 6c95c46fcb0497f3bcb945b0da138e04fc600ab4 (patch) | |
tree | e3493806594f51f408c950c195c9e3b023634e5e /tools/h5import/h5import.c | |
parent | e2477c8d0b76325590f43be8926634f16da5f849 (diff) | |
download | hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.zip hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.tar.gz hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.tar.bz2 |
[svn-r14039]
New feature: implementation of h5import conversion of an ASCII plain-text file containing text data
The string type H5T_C_S1 is used to define the data (the datum is defined here as one line of text in the text file). The size is set to variable length (H5T_VARIABLE)
The space used is a 1D array with as many elements as there are lines in the ASCII file (a line is defined by the inclusion of an end of line character, ASCII number 10). A first traversal of the input text file must be made to determine the number of lines in the file and thus the dimensionality of the dataset.
New test to the test script added
text input files and teststr.h5 for h5dump to compare added
Tested: windows, linux, solaris
Diffstat (limited to 'tools/h5import/h5import.c')
-rwxr-xr-x | tools/h5import/h5import.c | 198 |
1 files changed, 196 insertions, 2 deletions
diff --git a/tools/h5import/h5import.c b/tools/h5import/h5import.c index 2cb6b56..dc3abb1 100755 --- a/tools/h5import/h5import.c +++ b/tools/h5import/h5import.c @@ -254,8 +254,25 @@ gtoken(char *s) return (token); } + + +/*------------------------------------------------------------------------- + * Function: processDataFile + * + * Purpose: allocate memory and read data file + * + * Return: 0, success, -1, error + * + * Programmer: pkmat + * + * Modifications: pvn + * 7/23/2007. Added support for STR type + * + *------------------------------------------------------------------------- + */ + static int -processDataFile(char *infile, struct Input *in, FILE **strm) +processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id) { const char *err1 = "Unable to open the input file %s for reading.\n"; const char *err2 = "Error in allocating integer data storage.\n"; @@ -265,6 +282,7 @@ processDataFile(char *infile, struct Input *in, FILE **strm) const char *err6 = "Error in allocating unsigned integer data storage.\n"; const char *err7 = "Error in reading unsigned integer data.\n"; const char *err10 = "Unrecognized input class type.\n"; + const char *err11 = "Error in reading string data.\n"; if ((*strm = fopen(infile, "r")) == NULL) { @@ -307,6 +325,15 @@ processDataFile(char *infile, struct Input *in, FILE **strm) break; case 5: /* STR */ + + if (processStrData(strm, in, file_id) == -1) + { + (void) fprintf(stderr, err11, infile); + return(-1); + } + + + break; case 6: /* TEXTUIN */ @@ -755,6 +782,162 @@ readFloatData(FILE **strm, struct Input *in) return(0); } + + +/*------------------------------------------------------------------------- + * Function: processStrData + * + * Purpose: read an ASCII file with string data and generate an HDF5 dataset + * with a variable length type + * + * Return: 0, ok, -1 no + * + * Programmer: Pedro Vicente, pvn@hdfgroup.org + * + * Date: July, 26, 2007 + * + *------------------------------------------------------------------------- + */ +static int +processStrData(FILE **strm, struct Input *in, hid_t file_id) +{ + hid_t group_id, dset_id, space_id, mspace_id, type_id, handle; + hsize_t dims[1]; + char str[1024]; + char c; + int i = 0, j, nlines = 0, line; + +/*------------------------------------------------------------------------- + * get number of lines in the input file + *------------------------------------------------------------------------- + */ + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + if ( c == 10 ) /* eol */ + { + nlines++; + + } + } + + if ( !nlines ) + return 0; + + /* number of records */ + dims[0] = nlines; + + /* rewind */ + fseek(*strm,0L,0); + +/*------------------------------------------------------------------------- + * read file again and generate an HDF5 dataset + *------------------------------------------------------------------------- + */ + + if (( type_id = H5Tcopy(H5T_C_S1)) < 0 ) + goto out; + + if ( H5Tset_size (type_id,H5T_VARIABLE) < 0 ) + goto out; + + /* disable error reporting */ + H5E_BEGIN_TRY + { + + /* create parent groups */ + if (in->path.count > 1) + { + j = 0; + handle = file_id; + while (j<in->path.count-1) + { + if ((group_id = H5Gopen(handle, in->path.group[j])) < 0) + { + group_id = H5Gcreate(handle, in->path.group[j++], 0); + for (; j<in->path.count-1; j++) + group_id = H5Gcreate(group_id, in->path.group[j], 0); + handle = group_id; + break; + } + handle = group_id; + j++; + } + } + else + { + handle = file_id; + j=0; + } + + /*enable error reporting */ + } H5E_END_TRY; + + if (( space_id = H5Screate_simple(1,dims,NULL)) < 0 ) + goto out; + + if (( mspace_id = H5Screate(H5S_SCALAR)) < 0 ) + goto out; + + if (( dset_id = H5Dcreate(handle, in->path.group[j], type_id, space_id, H5P_DEFAULT)) < 0) + goto out; + + line = 0; + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + str[ i ] = c; + + i++; + + if ( c == 10 ) /* eol */ + { + char *str2 = str; + hid_t fspace_id; + hsize_t start[1]; + hsize_t count[1] = { 1 }; + + str[ i-1 ] = '\0'; /* terminate string */ + + if (( fspace_id = H5Dget_space (dset_id)) < 0 ) + goto out; + + start[0] = line ++ ; + + if ( H5Sselect_hyperslab(fspace_id,H5S_SELECT_SET,start,NULL,count,NULL) < 0 ) + goto out; + + if ( H5Dwrite(dset_id,type_id,mspace_id,fspace_id,H5P_DEFAULT, &str2 ) < 0 ) + goto out; + + if ( H5Sclose(fspace_id) < 0 ) + goto out; + + i = 0; + str[ 0 ] = '\0'; + + } + } + + + /* close */ + H5Dclose(dset_id); + H5Sclose(space_id); + H5Sclose(mspace_id); + H5Tclose(type_id); + + return(0); + +out: + + return (-1); +} + + static int allocateIntegerStorage(struct Input *in) { @@ -1258,6 +1441,10 @@ validateConfigurationParameters(struct Input * in) const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n"; #endif + /* for class STR other parameters are ignored */ + if (in->inputClass == 5) /* STR */ + return (0); + if ( (in->configOptionVector[DIM] != 1) || (in->configOptionVector[RANK] != 1)) @@ -2242,12 +2429,15 @@ process(struct Options *opt) } } - if (processDataFile(opt->infiles[k].datafile, in, &strm) == -1) + if (processDataFile(opt->infiles[k].datafile, in, &strm, file_id ) == -1) { (void) fprintf(stderr, err3, opt->infiles[k].datafile); return (-1); } + if (in->inputClass != 5) /* STR */ + { + for (j=0; j<in->rank;j++) numOfElements *= in->sizeOfDimension[j]; @@ -2355,6 +2545,10 @@ process(struct Options *opt) H5Pclose(proplist); H5Sclose(dataspace); } + + } /* STR */ + + H5Fclose(file_id); return (0); } |