From 6c95c46fcb0497f3bcb945b0da138e04fc600ab4 Mon Sep 17 00:00:00 2001 From: Pedro Vicente Nunes Date: Tue, 7 Aug 2007 11:19:11 -0500 Subject: [svn-r14039] New feature: implementation of h5import conversion of an ASCII plain-text file containing text data The string type H5T_C_S1 is used to define the data (the datum is defined here as one line of text in the text file). The size is set to variable length (H5T_VARIABLE) The space used is a 1D array with as many elements as there are lines in the ASCII file (a line is defined by the inclusion of an end of line character, ASCII number 10). A first traversal of the input text file must be made to determine the number of lines in the file and thus the dimensionality of the dataset. New test to the test script added text input files and teststr.h5 for h5dump to compare added Tested: windows, linux, solaris --- tools/h5import/h5import.c | 198 +++++++++++++++++++++++++++++++++++- tools/h5import/h5import.h | 3 +- tools/h5import/h5importtestutil.sh | 3 + tools/h5import/testfiles/teststr.h5 | Bin 0 -> 10240 bytes tools/h5import/testfiles/textstr | 6 ++ tools/h5import/testfiles/txtstr | 2 + 6 files changed, 209 insertions(+), 3 deletions(-) create mode 100644 tools/h5import/testfiles/teststr.h5 create mode 100644 tools/h5import/testfiles/textstr create mode 100644 tools/h5import/testfiles/txtstr diff --git a/tools/h5import/h5import.c b/tools/h5import/h5import.c index 2cb6b56..dc3abb1 100755 --- a/tools/h5import/h5import.c +++ b/tools/h5import/h5import.c @@ -254,8 +254,25 @@ gtoken(char *s) return (token); } + + +/*------------------------------------------------------------------------- + * Function: processDataFile + * + * Purpose: allocate memory and read data file + * + * Return: 0, success, -1, error + * + * Programmer: pkmat + * + * Modifications: pvn + * 7/23/2007. Added support for STR type + * + *------------------------------------------------------------------------- + */ + static int -processDataFile(char *infile, struct Input *in, FILE **strm) +processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id) { const char *err1 = "Unable to open the input file %s for reading.\n"; const char *err2 = "Error in allocating integer data storage.\n"; @@ -265,6 +282,7 @@ processDataFile(char *infile, struct Input *in, FILE **strm) const char *err6 = "Error in allocating unsigned integer data storage.\n"; const char *err7 = "Error in reading unsigned integer data.\n"; const char *err10 = "Unrecognized input class type.\n"; + const char *err11 = "Error in reading string data.\n"; if ((*strm = fopen(infile, "r")) == NULL) { @@ -307,6 +325,15 @@ processDataFile(char *infile, struct Input *in, FILE **strm) break; case 5: /* STR */ + + if (processStrData(strm, in, file_id) == -1) + { + (void) fprintf(stderr, err11, infile); + return(-1); + } + + + break; case 6: /* TEXTUIN */ @@ -755,6 +782,162 @@ readFloatData(FILE **strm, struct Input *in) return(0); } + + +/*------------------------------------------------------------------------- + * Function: processStrData + * + * Purpose: read an ASCII file with string data and generate an HDF5 dataset + * with a variable length type + * + * Return: 0, ok, -1 no + * + * Programmer: Pedro Vicente, pvn@hdfgroup.org + * + * Date: July, 26, 2007 + * + *------------------------------------------------------------------------- + */ +static int +processStrData(FILE **strm, struct Input *in, hid_t file_id) +{ + hid_t group_id, dset_id, space_id, mspace_id, type_id, handle; + hsize_t dims[1]; + char str[1024]; + char c; + int i = 0, j, nlines = 0, line; + +/*------------------------------------------------------------------------- + * get number of lines in the input file + *------------------------------------------------------------------------- + */ + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + if ( c == 10 ) /* eol */ + { + nlines++; + + } + } + + if ( !nlines ) + return 0; + + /* number of records */ + dims[0] = nlines; + + /* rewind */ + fseek(*strm,0L,0); + +/*------------------------------------------------------------------------- + * read file again and generate an HDF5 dataset + *------------------------------------------------------------------------- + */ + + if (( type_id = H5Tcopy(H5T_C_S1)) < 0 ) + goto out; + + if ( H5Tset_size (type_id,H5T_VARIABLE) < 0 ) + goto out; + + /* disable error reporting */ + H5E_BEGIN_TRY + { + + /* create parent groups */ + if (in->path.count > 1) + { + j = 0; + handle = file_id; + while (jpath.count-1) + { + if ((group_id = H5Gopen(handle, in->path.group[j])) < 0) + { + group_id = H5Gcreate(handle, in->path.group[j++], 0); + for (; jpath.count-1; j++) + group_id = H5Gcreate(group_id, in->path.group[j], 0); + handle = group_id; + break; + } + handle = group_id; + j++; + } + } + else + { + handle = file_id; + j=0; + } + + /*enable error reporting */ + } H5E_END_TRY; + + if (( space_id = H5Screate_simple(1,dims,NULL)) < 0 ) + goto out; + + if (( mspace_id = H5Screate(H5S_SCALAR)) < 0 ) + goto out; + + if (( dset_id = H5Dcreate(handle, in->path.group[j], type_id, space_id, H5P_DEFAULT)) < 0) + goto out; + + line = 0; + + while ( !feof( *strm ) ) + { + c = fgetc( *strm ); + + str[ i ] = c; + + i++; + + if ( c == 10 ) /* eol */ + { + char *str2 = str; + hid_t fspace_id; + hsize_t start[1]; + hsize_t count[1] = { 1 }; + + str[ i-1 ] = '\0'; /* terminate string */ + + if (( fspace_id = H5Dget_space (dset_id)) < 0 ) + goto out; + + start[0] = line ++ ; + + if ( H5Sselect_hyperslab(fspace_id,H5S_SELECT_SET,start,NULL,count,NULL) < 0 ) + goto out; + + if ( H5Dwrite(dset_id,type_id,mspace_id,fspace_id,H5P_DEFAULT, &str2 ) < 0 ) + goto out; + + if ( H5Sclose(fspace_id) < 0 ) + goto out; + + i = 0; + str[ 0 ] = '\0'; + + } + } + + + /* close */ + H5Dclose(dset_id); + H5Sclose(space_id); + H5Sclose(mspace_id); + H5Tclose(type_id); + + return(0); + +out: + + return (-1); +} + + static int allocateIntegerStorage(struct Input *in) { @@ -1258,6 +1441,10 @@ validateConfigurationParameters(struct Input * in) const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n"; #endif + /* for class STR other parameters are ignored */ + if (in->inputClass == 5) /* STR */ + return (0); + if ( (in->configOptionVector[DIM] != 1) || (in->configOptionVector[RANK] != 1)) @@ -2242,12 +2429,15 @@ process(struct Options *opt) } } - if (processDataFile(opt->infiles[k].datafile, in, &strm) == -1) + if (processDataFile(opt->infiles[k].datafile, in, &strm, file_id ) == -1) { (void) fprintf(stderr, err3, opt->infiles[k].datafile); return (-1); } + if (in->inputClass != 5) /* STR */ + { + for (j=0; jrank;j++) numOfElements *= in->sizeOfDimension[j]; @@ -2355,6 +2545,10 @@ process(struct Options *opt) H5Pclose(proplist); H5Sclose(dataspace); } + + } /* STR */ + + H5Fclose(file_id); return (0); } diff --git a/tools/h5import/h5import.h b/tools/h5import/h5import.h index e061871..975ace0 100755 --- a/tools/h5import/h5import.h +++ b/tools/h5import/h5import.h @@ -214,7 +214,7 @@ static int CompressionTypeStrToInt(char *temp); static int getCompressionParameter(struct Input *in, FILE** strm); static int getExternalFilename(struct Input *in, FILE** strm); static int getMaximumDimensionSizes(struct Input *in, FILE **strm); -static int processDataFile(char *infile, struct Input *in, FILE **strm); +static int processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id); static int readIntegerData(FILE **strm, struct Input *in); static int readFloatData(FILE **strm, struct Input *in); static int allocateIntegerStorage(struct Input *in); @@ -224,6 +224,7 @@ hid_t createInputDataType(struct Input *in); static int readUIntegerData(FILE **strm, struct Input *in); static int allocateUIntegerStorage(struct Input *in); static int validateConfigurationParameters(struct Input * in); +static int processStrData(FILE **strm, struct Input *in, hid_t file_id); #endif /* H5IMPORT_H__ */ diff --git a/tools/h5import/h5importtestutil.sh b/tools/h5import/h5importtestutil.sh index 09d2303..0687871 100755 --- a/tools/h5import/h5importtestutil.sh +++ b/tools/h5import/h5importtestutil.sh @@ -98,6 +98,9 @@ TOOLTEST buin16 -c $srcdir/testfiles/conbuin16 -o test12.h5 TESTING "BINARY UI32 - rank 3 - Output LE + CHUNKED " TOOLTEST buin32 -c $srcdir/testfiles/conbuin32 -o test13.h5 +TESTING "STR" +TOOLTEST $srcdir/testfiles/txtstr -c $srcdir/testfiles/textstr -o teststr.h5 + rm -f tx* b* *.dat rm -f test*.h5 rm -rf tmp_testfiles diff --git a/tools/h5import/testfiles/teststr.h5 b/tools/h5import/testfiles/teststr.h5 new file mode 100644 index 0000000..ceb0810 Binary files /dev/null and b/tools/h5import/testfiles/teststr.h5 differ diff --git a/tools/h5import/testfiles/textstr b/tools/h5import/testfiles/textstr new file mode 100644 index 0000000..85079e0 --- /dev/null +++ b/tools/h5import/testfiles/textstr @@ -0,0 +1,6 @@ +PATH /mytext/data +INPUT-CLASS STR + + + + diff --git a/tools/h5import/testfiles/txtstr b/tools/h5import/testfiles/txtstr new file mode 100644 index 0000000..25be0a6 --- /dev/null +++ b/tools/h5import/testfiles/txtstr @@ -0,0 +1,2 @@ + hello world + hello world again -- cgit v0.12