diff options
author | Raymond Lu <songyulu@hdfgroup.org> | 2011-11-08 22:34:00 (GMT) |
---|---|---|
committer | Raymond Lu <songyulu@hdfgroup.org> | 2011-11-08 22:34:00 (GMT) |
commit | 68da0f773642748d996b498e28b0d39dc3b08710 (patch) | |
tree | 3f1e7b61adad313a72c6a61fc129ca28e2ff40dd | |
parent | 264fa3562aa40b083bbfea7bd27731086e19aa9e (diff) | |
download | hdf5-68da0f773642748d996b498e28b0d39dc3b08710.zip hdf5-68da0f773642748d996b498e28b0d39dc3b08710.tar.gz hdf5-68da0f773642748d996b498e28b0d39dc3b08710.tar.bz2 |
[svn-r21734] Issue 7582 - The library allowed the conversion of strings between ASCII and UTF8. I corrected it by adding a condition check in H5T_conv_s_s and H5T_conv_vlen to report an error under this situation.
Tested on jam, koala, linew.
-rw-r--r-- | release_docs/RELEASE.txt | 3 | ||||
-rw-r--r-- | src/H5T.c | 2 | ||||
-rw-r--r-- | src/H5Tconv.c | 15 | ||||
-rw-r--r-- | test/dtypes.c | 132 |
4 files changed, 151 insertions, 1 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 633695c..bd93295 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -343,6 +343,9 @@ Bug Fixes since HDF5-1.8.0 release Library ------- + - The library allowed the conversion of strings between ASCII and UTF8 + (Issue 7582). We have corrected it to report an error under this + situation. (SLU - 2011/11/8) - The library had seg fault when it tried to shrink the size of compound type through H5Tset_size immediately after the type was created (Issue 7618). It's fixed now. (SLU - 2011/10/26) @@ -4464,7 +4464,7 @@ H5T_path_find(const H5T_t *src, const H5T_t *dst, const char *name, path->cdata.command = H5T_CONV_INIT; if((H5T_g.soft[i].func)(src_id, dst_id, &(path->cdata), (size_t)0, (size_t)0, (size_t)0, NULL, NULL, dxpl_id) < 0) { HDmemset(&(path->cdata), 0, sizeof(H5T_cdata_t)); - H5E_clear_stack(NULL); /*ignore the error*/ + H5E_clear_stack(H5E_DEFAULT); /*ignore the error*/ } /* end if */ else { HDstrcpy(path->name, H5T_g.soft[i].name); diff --git a/src/H5Tconv.c b/src/H5Tconv.c index 405883e..7b1e1d7 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -2913,6 +2913,9 @@ done: * old data. At this moment, it only frees the first level of * VL datatype. It doesn't handle nested VL datatypes. * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of VL strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -2963,6 +2966,11 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") if(H5T_VLEN != dst->shared->type) HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") + if(H5T_VLEN_STRING == src->shared->u.vlen.type && H5T_VLEN_STRING == dst->shared->u.vlen.type) { + if((H5T_CSET_ASCII == src->shared->u.vlen.cset && H5T_CSET_UTF8 == dst->shared->u.vlen.cset) + || (H5T_CSET_ASCII == dst->shared->u.vlen.cset && H5T_CSET_UTF8 == src->shared->u.vlen.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") + } /* Variable-length types don't need a background buffer */ cdata->need_bkg = H5T_BKG_NO; @@ -4360,6 +4368,10 @@ done: * then convert one value at each memory location advancing * BUF_STRIDE bytes each time; otherwise assume both source and * destination values are packed. + * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -4391,6 +4403,9 @@ H5T_conv_s_s(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad source character set") if(H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != dst->shared->u.atomic.u.s.cset) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad destination character set") + if((H5T_CSET_ASCII == src->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == dst->shared->u.atomic.u.s.cset) + || (H5T_CSET_ASCII == dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == src->shared->u.atomic.u.s.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") if(src->shared->u.atomic.u.s.pad < 0 || src->shared->u.atomic.u.s.pad >= H5T_NPAD || dst->shared->u.atomic.u.s.pad < 0 || dst->shared->u.atomic.u.s.pad >= H5T_NPAD) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character padding") diff --git a/test/dtypes.c b/test/dtypes.c index e0d1f8f..648650f 100644 --- a/test/dtypes.c +++ b/test/dtypes.c @@ -81,6 +81,7 @@ const char *FILENAME[] = { "dtypes7", "dtypes8", "dtypes9", + "dtypes10", NULL }; @@ -109,6 +110,10 @@ typedef enum dtype_t { #define DEL_OBJ_NAMED_NAMED_DTYPE "/Dtype" #define DEL_OBJ_NAMED_ATTRIBUTE "Attr" +/* Constant for testing conversion of UTF-8 characters */ +#define UTF8_DATASET "utf8" +#define UTF8_DATASET2 "2nd_utf8" + /* Count opaque conversions */ static int num_opaque_conversions_g = 0; @@ -7021,6 +7026,132 @@ error: } /* end test_deprec() */ #endif /* H5_NO_DEPRECATED_SYMBOLS */ +int test_utf_conv() +{ + hid_t fid; + hid_t did; + hid_t utf8_vtid, ascii_vtid; + hid_t utf8_tid, ascii_tid; + hid_t sid; + const char *utf8 = "foo!"; + char *ascii = NULL; + char ascii2[4]; + herr_t status; + + TESTING("string conversion between ASCII and UTF"); + + /*********************************** + * Test VL string conversion + ***********************************/ + /* Create a variable-length string */ + if((utf8_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_vtid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a variable-length string */ + if((ascii_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_vtid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Create a file */ + if((fid = H5Fcreate(FILENAME[10], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Create a scalar dataspace for the dataset */ + if((sid = H5Screate(H5S_SCALAR)) < 0) FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, UTF8_DATASET, utf8_vtid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string, as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the UTF8 VL-string datatype */ + if((status = H5Tclose(utf8_vtid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII VL-string datatype */ + if((status = H5Tclose(ascii_vtid)) < 0) FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /*************************************** + * Test fixed-length string conversion + ***************************************/ + /* Create a fixed-length UTF8 string */ + if((utf8_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_tid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a fixed-length ASCII string */ + if((ascii_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_tid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, UTF8_DATASET2, utf8_tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, ascii2); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the UTF8 string datatype */ + if((status = H5Tclose(utf8_tid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII string datatype */ + if((status = H5Tclose(ascii_tid)) < 0) FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /* Close the dataspace */ + if((status = H5Sclose(sid)) < 0) FAIL_STACK_ERROR + + /* Close the file */ + if((status = H5Fclose(fid)) < 0) FAIL_STACK_ERROR + + PASSED(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Tclose(utf8_vtid); + H5Tclose(ascii_vtid); + H5Tclose(utf8_tid); + H5Tclose(ascii_tid); + H5Dclose(did); + H5Sclose(sid); + H5Fclose(fid); + } H5E_END_TRY; + return 1; +} + + + /*------------------------------------------------------------------------- * Function: main @@ -7102,6 +7233,7 @@ main(void) nerrors += test_bitfield_funcs(); nerrors += test_opaque(); nerrors += test_set_order(); + nerrors += test_utf_conv(); if(nerrors) { printf("***** %lu FAILURE%s! *****\n", |