diff options
author | Pedro Vicente Nunes <pvn@hdfgroup.org> | 2007-08-07 16:19:11 (GMT) |
---|---|---|
committer | Pedro Vicente Nunes <pvn@hdfgroup.org> | 2007-08-07 16:19:11 (GMT) |
commit | 6c95c46fcb0497f3bcb945b0da138e04fc600ab4 (patch) | |
tree | e3493806594f51f408c950c195c9e3b023634e5e | |
parent | e2477c8d0b76325590f43be8926634f16da5f849 (diff) | |
download | hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.zip hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.tar.gz hdf5-6c95c46fcb0497f3bcb945b0da138e04fc600ab4.tar.bz2 |
[svn-r14039]
New feature: implementation of h5import conversion of an ASCII plain-text file containing text data
The string type H5T_C_S1 is used to define the data (the datum is defined here as one line of text in the text file). The size is set to variable length (H5T_VARIABLE)
The space used is a 1D array with as many elements as there are lines in the ASCII file (a line is defined by the inclusion of an end of line character, ASCII number 10). A first traversal of the input text file must be made to determine the number of lines in the file and thus the dimensionality of the dataset.
New test to the test script added
text input files and teststr.h5 for h5dump to compare added
Tested: windows, linux, solaris
-rwxr-xr-x | tools/h5import/h5import.c | 198 | ||||
-rwxr-xr-x | tools/h5import/h5import.h | 3 | ||||
-rwxr-xr-x | tools/h5import/h5importtestutil.sh | 3 | ||||
-rw-r--r-- | tools/h5import/testfiles/teststr.h5 | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | tools/h5import/testfiles/textstr | 6 | ||||
-rw-r--r-- | tools/h5import/testfiles/txtstr | 2 |
6 files changed, 209 insertions, 3 deletions
diff --git a/tools/h5import/h5import.c b/tools/h5import/h5import.c index 2cb6b56..dc3abb1 100755 --- a/tools/h5import/h5import.c +++ b/tools/h5import/h5import.c @@ -254,8 +254,25 @@ gtoken(char *s) return (token); } + + +/*------------------------------------------------------------------------- + * Function: processDataFile + * + * Purpose: allocate memory and read data file + * + * Return: 0, success, -1, error + * + * Programmer: pkmat + * + * Modifications: pvn + * 7/23/2007. Added support for STR type + * + *------------------------------------------------------------------------- + */ + static int -processDataFile(char *infile, struct Input *in, FILE **strm) +processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id) { const char *err1 = "Unable to open the input file %s for reading.\n"; const char *err2 = "Error in allocating integer data storage.\n"; @@ -265,6 +282,7 @@ processDataFile(char *infile, struct Input *in, FILE **strm) const char *err6 = "Error in allocating unsigned integer data storage.\n"; const char *err7 = "Error in reading unsigned integer data.\n"; const char *err10 = "Unrecognized input class type.\n"; + const char *err11 = "Error in reading string data.\n"; if ((*strm = fopen(infile, "r")) == NULL) { @@ -307,6 +325,15 @@ processDataFile(char *infile, struct Input *in, FILE **strm) break; case 5: /* STR */ + + if (processStrData(strm, in, file_id) == -1) + { + (void) fprintf(stderr, err11, infile); + return(-1); + } + + + break; case 6: /* TEXTUIN */ @@ -755,6 +782,162 @@ readFloatData(FILE **strm, struct Input *in) return(0); } + + +/*------------------------------------------------------------------------- + * Function: processStrData + * + * Purpose: read an ASCII file with string data and generate an HDF5 dataset + * with a variable length type + * + * Return: 0, ok, -1 no + * + * Programmer: Pedro Vicente, pvn@hdfgroup.org + * + * Date: July, 26, 2007 + * + *------------------------------------------------------------------------- + */ +static int +processStrData(FILE **strm, struct Input *in, hid_t file_id) +{ + hid_t group_id, dset_id, space_id, mspace_id, type_id, handle; + hsize_t dims[1]; + char str[1024]; + char c; + int i = 0, j, nlines = 0, line; + +/*------------------------------------------------------------------------- + * get number of lines in the input file + *------------------------------------------------------------------------- + */ + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + if ( c == 10 ) /* eol */ + { + nlines++; + + } + } + + if ( !nlines ) + return 0; + + /* number of records */ + dims[0] = nlines; + + /* rewind */ + fseek(*strm,0L,0); + +/*------------------------------------------------------------------------- + * read file again and generate an HDF5 dataset + *------------------------------------------------------------------------- + */ + + if (( type_id = H5Tcopy(H5T_C_S1)) < 0 ) + goto out; + + if ( H5Tset_size (type_id,H5T_VARIABLE) < 0 ) + goto out; + + /* disable error reporting */ + H5E_BEGIN_TRY + { + + /* create parent groups */ + if (in->path.count > 1) + { + j = 0; + handle = file_id; + while (j<in->path.count-1) + { + if ((group_id = H5Gopen(handle, in->path.group[j])) < 0) + { + group_id = H5Gcreate(handle, in->path.group[j++], 0); + for (; j<in->path.count-1; j++) + group_id = H5Gcreate(group_id, in->path.group[j], 0); + handle = group_id; + break; + } + handle = group_id; + j++; + } + } + else + { + handle = file_id; + j=0; + } + + /*enable error reporting */ + } H5E_END_TRY; + + if (( space_id = H5Screate_simple(1,dims,NULL)) < 0 ) + goto out; + + if (( mspace_id = H5Screate(H5S_SCALAR)) < 0 ) + goto out; + + if (( dset_id = H5Dcreate(handle, in->path.group[j], type_id, space_id, H5P_DEFAULT)) < 0) + goto out; + + line = 0; + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + str[ i ] = c; + + i++; + + if ( c == 10 ) /* eol */ + { + char *str2 = str; + hid_t fspace_id; + hsize_t start[1]; + hsize_t count[1] = { 1 }; + + str[ i-1 ] = '\0'; /* terminate string */ + + if (( fspace_id = H5Dget_space (dset_id)) < 0 ) + goto out; + + start[0] = line ++ ; + + if ( H5Sselect_hyperslab(fspace_id,H5S_SELECT_SET,start,NULL,count,NULL) < 0 ) + goto out; + + if ( H5Dwrite(dset_id,type_id,mspace_id,fspace_id,H5P_DEFAULT, &str2 ) < 0 ) + goto out; + + if ( H5Sclose(fspace_id) < 0 ) + goto out; + + i = 0; + str[ 0 ] = '\0'; + + } + } + + + /* close */ + H5Dclose(dset_id); + H5Sclose(space_id); + H5Sclose(mspace_id); + H5Tclose(type_id); + + return(0); + +out: + + return (-1); +} + + static int allocateIntegerStorage(struct Input *in) { @@ -1258,6 +1441,10 @@ validateConfigurationParameters(struct Input * in) const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n"; #endif + /* for class STR other parameters are ignored */ + if (in->inputClass == 5) /* STR */ + return (0); + if ( (in->configOptionVector[DIM] != 1) || (in->configOptionVector[RANK] != 1)) @@ -2242,12 +2429,15 @@ process(struct Options *opt) } } - if (processDataFile(opt->infiles[k].datafile, in, &strm) == -1) + if (processDataFile(opt->infiles[k].datafile, in, &strm, file_id ) == -1) { (void) fprintf(stderr, err3, opt->infiles[k].datafile); return (-1); } + if (in->inputClass != 5) /* STR */ + { + for (j=0; j<in->rank;j++) numOfElements *= in->sizeOfDimension[j]; @@ -2355,6 +2545,10 @@ process(struct Options *opt) H5Pclose(proplist); H5Sclose(dataspace); } + + } /* STR */ + + H5Fclose(file_id); return (0); } diff --git a/tools/h5import/h5import.h b/tools/h5import/h5import.h index e061871..975ace0 100755 --- a/tools/h5import/h5import.h +++ b/tools/h5import/h5import.h @@ -214,7 +214,7 @@ static int CompressionTypeStrToInt(char *temp); static int getCompressionParameter(struct Input *in, FILE** strm); static int getExternalFilename(struct Input *in, FILE** strm); static int getMaximumDimensionSizes(struct Input *in, FILE **strm); -static int processDataFile(char *infile, struct Input *in, FILE **strm); +static int processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id); static int readIntegerData(FILE **strm, struct Input *in); static int readFloatData(FILE **strm, struct Input *in); static int allocateIntegerStorage(struct Input *in); @@ -224,6 +224,7 @@ hid_t createInputDataType(struct Input *in); static int readUIntegerData(FILE **strm, struct Input *in); static int allocateUIntegerStorage(struct Input *in); static int validateConfigurationParameters(struct Input * in); +static int processStrData(FILE **strm, struct Input *in, hid_t file_id); #endif /* H5IMPORT_H__ */ diff --git a/tools/h5import/h5importtestutil.sh b/tools/h5import/h5importtestutil.sh index 09d2303..0687871 100755 --- a/tools/h5import/h5importtestutil.sh +++ b/tools/h5import/h5importtestutil.sh @@ -98,6 +98,9 @@ TOOLTEST buin16 -c $srcdir/testfiles/conbuin16 -o test12.h5 TESTING "BINARY UI32 - rank 3 - Output LE + CHUNKED " TOOLTEST buin32 -c $srcdir/testfiles/conbuin32 -o test13.h5 +TESTING "STR" +TOOLTEST $srcdir/testfiles/txtstr -c $srcdir/testfiles/textstr -o teststr.h5 + rm -f tx* b* *.dat rm -f test*.h5 rm -rf tmp_testfiles diff --git a/tools/h5import/testfiles/teststr.h5 b/tools/h5import/testfiles/teststr.h5 Binary files differnew file mode 100644 index 0000000..ceb0810 --- /dev/null +++ b/tools/h5import/testfiles/teststr.h5 diff --git a/tools/h5import/testfiles/textstr b/tools/h5import/testfiles/textstr new file mode 100644 index 0000000..85079e0 --- /dev/null +++ b/tools/h5import/testfiles/textstr @@ -0,0 +1,6 @@ +PATH /mytext/data +INPUT-CLASS STR + + + + diff --git a/tools/h5import/testfiles/txtstr b/tools/h5import/testfiles/txtstr new file mode 100644 index 0000000..25be0a6 --- /dev/null +++ b/tools/h5import/testfiles/txtstr @@ -0,0 +1,2 @@ + hello world + hello world again |