From 68da0f773642748d996b498e28b0d39dc3b08710 Mon Sep 17 00:00:00 2001 From: Raymond Lu Date: Tue, 8 Nov 2011 17:34:00 -0500 Subject: [svn-r21734] Issue 7582 - The library allowed the conversion of strings between ASCII and UTF8. I corrected it by adding a condition check in H5T_conv_s_s and H5T_conv_vlen to report an error under this situation. Tested on jam, koala, linew. --- release_docs/RELEASE.txt | 3 ++ src/H5T.c | 2 +- src/H5Tconv.c | 15 ++++++ test/dtypes.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 1 deletion(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 633695c..bd93295 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -343,6 +343,9 @@ Bug Fixes since HDF5-1.8.0 release Library ------- + - The library allowed the conversion of strings between ASCII and UTF8 + (Issue 7582). We have corrected it to report an error under this + situation. (SLU - 2011/11/8) - The library had seg fault when it tried to shrink the size of compound type through H5Tset_size immediately after the type was created (Issue 7618). It's fixed now. (SLU - 2011/10/26) diff --git a/src/H5T.c b/src/H5T.c index 2519a8f..688dfb3 100644 --- a/src/H5T.c +++ b/src/H5T.c @@ -4464,7 +4464,7 @@ H5T_path_find(const H5T_t *src, const H5T_t *dst, const char *name, path->cdata.command = H5T_CONV_INIT; if((H5T_g.soft[i].func)(src_id, dst_id, &(path->cdata), (size_t)0, (size_t)0, (size_t)0, NULL, NULL, dxpl_id) < 0) { HDmemset(&(path->cdata), 0, sizeof(H5T_cdata_t)); - H5E_clear_stack(NULL); /*ignore the error*/ + H5E_clear_stack(H5E_DEFAULT); /*ignore the error*/ } /* end if */ else { HDstrcpy(path->name, H5T_g.soft[i].name); diff --git a/src/H5Tconv.c b/src/H5Tconv.c index 405883e..7b1e1d7 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -2913,6 +2913,9 @@ done: * old data. At this moment, it only frees the first level of * VL datatype. It doesn't handle nested VL datatypes. * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of VL strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -2963,6 +2966,11 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") if(H5T_VLEN != dst->shared->type) HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") + if(H5T_VLEN_STRING == src->shared->u.vlen.type && H5T_VLEN_STRING == dst->shared->u.vlen.type) { + if((H5T_CSET_ASCII == src->shared->u.vlen.cset && H5T_CSET_UTF8 == dst->shared->u.vlen.cset) + || (H5T_CSET_ASCII == dst->shared->u.vlen.cset && H5T_CSET_UTF8 == src->shared->u.vlen.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") + } /* Variable-length types don't need a background buffer */ cdata->need_bkg = H5T_BKG_NO; @@ -4360,6 +4368,10 @@ done: * then convert one value at each memory location advancing * BUF_STRIDE bytes each time; otherwise assume both source and * destination values are packed. + * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -4391,6 +4403,9 @@ H5T_conv_s_s(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad source character set") if(H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != dst->shared->u.atomic.u.s.cset) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad destination character set") + if((H5T_CSET_ASCII == src->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == dst->shared->u.atomic.u.s.cset) + || (H5T_CSET_ASCII == dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == src->shared->u.atomic.u.s.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") if(src->shared->u.atomic.u.s.pad < 0 || src->shared->u.atomic.u.s.pad >= H5T_NPAD || dst->shared->u.atomic.u.s.pad < 0 || dst->shared->u.atomic.u.s.pad >= H5T_NPAD) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character padding") diff --git a/test/dtypes.c b/test/dtypes.c index e0d1f8f..648650f 100644 --- a/test/dtypes.c +++ b/test/dtypes.c @@ -81,6 +81,7 @@ const char *FILENAME[] = { "dtypes7", "dtypes8", "dtypes9", + "dtypes10", NULL }; @@ -109,6 +110,10 @@ typedef enum dtype_t { #define DEL_OBJ_NAMED_NAMED_DTYPE "/Dtype" #define DEL_OBJ_NAMED_ATTRIBUTE "Attr" +/* Constant for testing conversion of UTF-8 characters */ +#define UTF8_DATASET "utf8" +#define UTF8_DATASET2 "2nd_utf8" + /* Count opaque conversions */ static int num_opaque_conversions_g = 0; @@ -7021,6 +7026,132 @@ error: } /* end test_deprec() */ #endif /* H5_NO_DEPRECATED_SYMBOLS */ +int test_utf_conv() +{ + hid_t fid; + hid_t did; + hid_t utf8_vtid, ascii_vtid; + hid_t utf8_tid, ascii_tid; + hid_t sid; + const char *utf8 = "foo!"; + char *ascii = NULL; + char ascii2[4]; + herr_t status; + + TESTING("string conversion between ASCII and UTF"); + + /*********************************** + * Test VL string conversion + ***********************************/ + /* Create a variable-length string */ + if((utf8_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_vtid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a variable-length string */ + if((ascii_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_vtid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Create a file */ + if((fid = H5Fcreate(FILENAME[10], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Create a scalar dataspace for the dataset */ + if((sid = H5Screate(H5S_SCALAR)) < 0) FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, UTF8_DATASET, utf8_vtid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string, as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the UTF8 VL-string datatype */ + if((status = H5Tclose(utf8_vtid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII VL-string datatype */ + if((status = H5Tclose(ascii_vtid)) < 0) FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /*************************************** + * Test fixed-length string conversion + ***************************************/ + /* Create a fixed-length UTF8 string */ + if((utf8_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_tid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a fixed-length ASCII string */ + if((ascii_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_tid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, UTF8_DATASET2, utf8_tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, ascii2); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the UTF8 string datatype */ + if((status = H5Tclose(utf8_tid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII string datatype */ + if((status = H5Tclose(ascii_tid)) < 0) FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /* Close the dataspace */ + if((status = H5Sclose(sid)) < 0) FAIL_STACK_ERROR + + /* Close the file */ + if((status = H5Fclose(fid)) < 0) FAIL_STACK_ERROR + + PASSED(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Tclose(utf8_vtid); + H5Tclose(ascii_vtid); + H5Tclose(utf8_tid); + H5Tclose(ascii_tid); + H5Dclose(did); + H5Sclose(sid); + H5Fclose(fid); + } H5E_END_TRY; + return 1; +} + + + /*------------------------------------------------------------------------- * Function: main @@ -7102,6 +7233,7 @@ main(void) nerrors += test_bitfield_funcs(); nerrors += test_opaque(); nerrors += test_set_order(); + nerrors += test_utf_conv(); if(nerrors) { printf("***** %lu FAILURE%s! *****\n", -- cgit v0.12