From db7410f838e6319dc87413e27f18b0b8ac050314 Mon Sep 17 00:00:00 2001 From: Raymond Lu Date: Tue, 15 Nov 2011 17:45:06 -0500 Subject: [svn-r21765] Issue 7618 - the library had seg fault when it tried to shrink the size of compound data type through H5Tset_size immediately after the type was created. I fixed it in this commit. I'm bringing the fix from the trunk. The only files being change are test/dtypes.c, src/H5T.c, src/H5Tconv.c, and release_docs/RELEASE.txt. All others are property changes. Tested on jam, koala, and linew. --- release_docs/RELEASE.txt | 3 + src/H5T.c | 2 +- src/H5Tconv.c | 15 ++++ test/dtypes.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 236 insertions(+), 1 deletion(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 07e33f9..654819f 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -96,6 +96,9 @@ Bug Fixes since HDF5-1.8.8 Library ------- + - The library allowed the conversion of strings between ASCII and UTF8 + (Issue 7582). We have corrected it to report an error under this + situation. (SLU - 2011/11/8) - The library had seg fault when it tried to shrink the size of compound type through H5Tset_size immediately after the type was created (Issue 7618). It's fixed now. (SLU - 2011/11/4) diff --git a/src/H5T.c b/src/H5T.c index 2519a8f..688dfb3 100644 --- a/src/H5T.c +++ b/src/H5T.c @@ -4464,7 +4464,7 @@ H5T_path_find(const H5T_t *src, const H5T_t *dst, const char *name, path->cdata.command = H5T_CONV_INIT; if((H5T_g.soft[i].func)(src_id, dst_id, &(path->cdata), (size_t)0, (size_t)0, (size_t)0, NULL, NULL, dxpl_id) < 0) { HDmemset(&(path->cdata), 0, sizeof(H5T_cdata_t)); - H5E_clear_stack(NULL); /*ignore the error*/ + H5E_clear_stack(H5E_DEFAULT); /*ignore the error*/ } /* end if */ else { HDstrcpy(path->name, H5T_g.soft[i].name); diff --git a/src/H5Tconv.c b/src/H5Tconv.c index 0de54c9..3209d3e 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -2913,6 +2913,9 @@ done: * old data. At this moment, it only frees the first level of * VL datatype. It doesn't handle nested VL datatypes. * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of VL strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -2963,6 +2966,11 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") if(H5T_VLEN != dst->shared->type) HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype") + if(H5T_VLEN_STRING == src->shared->u.vlen.type && H5T_VLEN_STRING == dst->shared->u.vlen.type) { + if((H5T_CSET_ASCII == src->shared->u.vlen.cset && H5T_CSET_UTF8 == dst->shared->u.vlen.cset) + || (H5T_CSET_ASCII == dst->shared->u.vlen.cset && H5T_CSET_UTF8 == src->shared->u.vlen.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") + } /* Variable-length types don't need a background buffer */ cdata->need_bkg = H5T_BKG_NO; @@ -4360,6 +4368,10 @@ done: * then convert one value at each memory location advancing * BUF_STRIDE bytes each time; otherwise assume both source and * destination values are packed. + * + * Raymond Lu, 8 November 2011 + * I put a condition check to prevent the conversion of strings + * between ASCII and UTF8. *------------------------------------------------------------------------- */ herr_t @@ -4391,6 +4403,9 @@ H5T_conv_s_s(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts, HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad source character set") if(H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != dst->shared->u.atomic.u.s.cset) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad destination character set") + if((H5T_CSET_ASCII == src->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == dst->shared->u.atomic.u.s.cset) + || (H5T_CSET_ASCII == dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == src->shared->u.atomic.u.s.cset)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF") if(src->shared->u.atomic.u.s.pad < 0 || src->shared->u.atomic.u.s.pad >= H5T_NPAD || dst->shared->u.atomic.u.s.pad < 0 || dst->shared->u.atomic.u.s.pad >= H5T_NPAD) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character padding") diff --git a/test/dtypes.c b/test/dtypes.c index 802ce4a..f1ff4ea 100644 --- a/test/dtypes.c +++ b/test/dtypes.c @@ -81,6 +81,7 @@ const char *FILENAME[] = { "dtypes7", "dtypes8", "dtypes9", + "dtypes10", NULL }; @@ -109,6 +110,12 @@ typedef enum dtype_t { #define DEL_OBJ_NAMED_NAMED_DTYPE "/Dtype" #define DEL_OBJ_NAMED_ATTRIBUTE "Attr" +/* Constant for testing conversion of UTF-8 characters */ +#define UTF8_DATASET "utf8" +#define UTF8_DATASET2 "2nd_utf8" +#define ASCII_DATASET "ascii" +#define ASCII_DATASET2 "2nd_ascii" + /* Count opaque conversions */ static int num_opaque_conversions_g = 0; @@ -7024,6 +7031,215 @@ error: /*------------------------------------------------------------------------- + * Function: test_utf_ascii_conv + * + * Purpose: Make sure the library doesn't conversion strings between + * ASCII and UTF8. + * + * Return: Success: 0 + * Failure: number of errors + * + * Programmer: Raymond Lu + * 10 November 2011 + *------------------------------------------------------------------------- + */ +int test_utf_ascii_conv(void) +{ + hid_t fid; + hid_t did; + hid_t utf8_vtid, ascii_vtid; + hid_t utf8_tid, ascii_tid; + hid_t sid; + const char *utf8_w = "foo!"; + char *ascii_r = NULL; + const char *ascii_w = "bar!"; + char *utf8_r = NULL; + + char ascii2[4], utf8_2[4]; + herr_t status; + + TESTING("string conversion between ASCII and UTF"); + + /************************************************ + * Test VL string conversion from UTF8 to ASCII + ************************************************/ + /* Create a variable-length string */ + if((utf8_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_vtid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a variable-length string */ + if((ascii_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_vtid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Test conversion in memory */ + H5E_BEGIN_TRY { + status = H5Tconvert(utf8_vtid, ascii_vtid, 1, (void *)utf8_w, NULL, H5P_DEFAULT); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Create a file */ + if((fid = H5Fcreate(FILENAME[10], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Create a scalar dataspace for the dataset */ + if((sid = H5Screate(H5S_SCALAR)) < 0) FAIL_STACK_ERROR + + /* Create a dataset of UTF8 string type */ + if((did = H5Dcreate2(fid, UTF8_DATASET, utf8_vtid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8_w)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string, as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii_r); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /************************************************ + * Test VL string conversion from ASCII to UTF8 + ************************************************/ + /* Test conversion in memory */ + H5E_BEGIN_TRY { + status = H5Tconvert(ascii_vtid, utf8_vtid, 1, (void *)ascii_w, NULL, H5P_DEFAULT); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Create a dataset of ASCII string type */ + if((did = H5Dcreate2(fid, ASCII_DATASET, ascii_vtid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the ASCII string, as ASCII */ + if((status = H5Dwrite(did, ascii_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii_w)) < 0) FAIL_STACK_ERROR + + /* Read the ASCII string, as UTF8, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, utf8_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8_r); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /* Close the UTF8 VL-string datatype */ + if((status = H5Tclose(utf8_vtid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII VL-string datatype */ + if((status = H5Tclose(ascii_vtid)) < 0) FAIL_STACK_ERROR + + /********************************************************** + * Test fixed-length string conversion from UTF8 to ASCII + **********************************************************/ + /* Create a fixed-length UTF8 string */ + if((utf8_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(utf8_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to UTF-8 */ + if((status = H5Tset_cset(utf8_tid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR + + /* Create a fixed-length ASCII string */ + if((ascii_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR + + if((status = H5Tset_size(ascii_tid, 4)) < 0) FAIL_STACK_ERROR + + /* Set the character set for the string to ASCII (should already be so) */ + if((status = H5Tset_cset(ascii_tid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR + + /* Test conversion in memory */ + H5E_BEGIN_TRY { + status = H5Tconvert(utf8_tid, ascii_tid, 1, utf8_2, NULL, H5P_DEFAULT); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, UTF8_DATASET2, utf8_tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the UTF8 string, as UTF8 */ + if((status = H5Dwrite(did, utf8_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8_w)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, ascii_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii2); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /********************************************************** + * Test fixed-length string conversion from ASCII to UTF8 + **********************************************************/ + /* Test conversion in memory */ + H5E_BEGIN_TRY { + status = H5Tconvert(ascii_tid, utf8_tid, 1, ascii2, NULL, H5P_DEFAULT); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Create a dataset */ + if((did = H5Dcreate2(fid, ASCII_DATASET2, ascii_tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR + + /* Write the ASCII string, as ASCII */ + if((status = H5Dwrite(did, ascii_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii_w)) < 0) FAIL_STACK_ERROR + + /* Read the UTF8 string as ASCII, supposed to fail */ + H5E_BEGIN_TRY { + status = H5Dread(did, utf8_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8_2); + } H5E_END_TRY + if(status >= 0) + FAIL_STACK_ERROR + + /* Close the dataset */ + if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR + + /* Close the UTF8 string datatype */ + if((status = H5Tclose(utf8_tid)) < 0) FAIL_STACK_ERROR + + /* Close the ASCII string datatype */ + if((status = H5Tclose(ascii_tid)) < 0) FAIL_STACK_ERROR + + /* Close the dataspace */ + if((status = H5Sclose(sid)) < 0) FAIL_STACK_ERROR + + /* Close the file */ + if((status = H5Fclose(fid)) < 0) FAIL_STACK_ERROR + + PASSED(); + return 0; + +error: + H5E_BEGIN_TRY { + H5Tclose(utf8_vtid); + H5Tclose(ascii_vtid); + H5Tclose(utf8_tid); + H5Tclose(ascii_tid); + H5Dclose(did); + H5Sclose(sid); + H5Fclose(fid); + } H5E_END_TRY; + return 1; +} + + + + +/*------------------------------------------------------------------------- * Function: main * * Purpose: Test the datatype interface. @@ -7105,6 +7321,7 @@ main(void) nerrors += test_bitfield_funcs(); nerrors += test_opaque(); nerrors += test_set_order(); + nerrors += test_utf_ascii_conv(); if(nerrors) { printf("***** %lu FAILURE%s! *****\n", -- cgit v0.12