summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Lu <songyulu@hdfgroup.org>2011-11-08 22:34:00 (GMT)
committerRaymond Lu <songyulu@hdfgroup.org>2011-11-08 22:34:00 (GMT)
commit68da0f773642748d996b498e28b0d39dc3b08710 (patch)
tree3f1e7b61adad313a72c6a61fc129ca28e2ff40dd
parent264fa3562aa40b083bbfea7bd27731086e19aa9e (diff)
downloadhdf5-68da0f773642748d996b498e28b0d39dc3b08710.zip
hdf5-68da0f773642748d996b498e28b0d39dc3b08710.tar.gz
hdf5-68da0f773642748d996b498e28b0d39dc3b08710.tar.bz2
[svn-r21734] Issue 7582 - The library allowed the conversion of strings between ASCII and UTF8. I corrected it by adding a condition check in H5T_conv_s_s and H5T_conv_vlen to report an error under this situation.
Tested on jam, koala, linew.
-rw-r--r--release_docs/RELEASE.txt3
-rw-r--r--src/H5T.c2
-rw-r--r--src/H5Tconv.c15
-rw-r--r--test/dtypes.c132
4 files changed, 151 insertions, 1 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 633695c..bd93295 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -343,6 +343,9 @@ Bug Fixes since HDF5-1.8.0 release
Library
-------
+ - The library allowed the conversion of strings between ASCII and UTF8
+ (Issue 7582). We have corrected it to report an error under this
+ situation. (SLU - 2011/11/8)
- The library had seg fault when it tried to shrink the size of compound type
through H5Tset_size immediately after the type was created (Issue
7618). It's fixed now. (SLU - 2011/10/26)
diff --git a/src/H5T.c b/src/H5T.c
index 2519a8f..688dfb3 100644
--- a/src/H5T.c
+++ b/src/H5T.c
@@ -4464,7 +4464,7 @@ H5T_path_find(const H5T_t *src, const H5T_t *dst, const char *name,
path->cdata.command = H5T_CONV_INIT;
if((H5T_g.soft[i].func)(src_id, dst_id, &(path->cdata), (size_t)0, (size_t)0, (size_t)0, NULL, NULL, dxpl_id) < 0) {
HDmemset(&(path->cdata), 0, sizeof(H5T_cdata_t));
- H5E_clear_stack(NULL); /*ignore the error*/
+ H5E_clear_stack(H5E_DEFAULT); /*ignore the error*/
} /* end if */
else {
HDstrcpy(path->name, H5T_g.soft[i].name);
diff --git a/src/H5Tconv.c b/src/H5Tconv.c
index 405883e..7b1e1d7 100644
--- a/src/H5Tconv.c
+++ b/src/H5Tconv.c
@@ -2913,6 +2913,9 @@ done:
* old data. At this moment, it only frees the first level of
* VL datatype. It doesn't handle nested VL datatypes.
*
+ * Raymond Lu, 8 November 2011
+ * I put a condition check to prevent the conversion of VL strings
+ * between ASCII and UTF8.
*-------------------------------------------------------------------------
*/
herr_t
@@ -2963,6 +2966,11 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts,
HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype")
if(H5T_VLEN != dst->shared->type)
HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "not a H5T_VLEN datatype")
+ if(H5T_VLEN_STRING == src->shared->u.vlen.type && H5T_VLEN_STRING == dst->shared->u.vlen.type) {
+ if((H5T_CSET_ASCII == src->shared->u.vlen.cset && H5T_CSET_UTF8 == dst->shared->u.vlen.cset)
+ || (H5T_CSET_ASCII == dst->shared->u.vlen.cset && H5T_CSET_UTF8 == src->shared->u.vlen.cset))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF")
+ }
/* Variable-length types don't need a background buffer */
cdata->need_bkg = H5T_BKG_NO;
@@ -4360,6 +4368,10 @@ done:
* then convert one value at each memory location advancing
* BUF_STRIDE bytes each time; otherwise assume both source and
* destination values are packed.
+ *
+ * Raymond Lu, 8 November 2011
+ * I put a condition check to prevent the conversion of strings
+ * between ASCII and UTF8.
*-------------------------------------------------------------------------
*/
herr_t
@@ -4391,6 +4403,9 @@ H5T_conv_s_s(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts,
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad source character set")
if(H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != dst->shared->u.atomic.u.s.cset)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad destination character set")
+ if((H5T_CSET_ASCII == src->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == dst->shared->u.atomic.u.s.cset)
+ || (H5T_CSET_ASCII == dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 == src->shared->u.atomic.u.s.cset))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "The library doesn't convert between strings of ASCII and UTF")
if(src->shared->u.atomic.u.s.pad < 0 || src->shared->u.atomic.u.s.pad >= H5T_NPAD ||
dst->shared->u.atomic.u.s.pad < 0 || dst->shared->u.atomic.u.s.pad >= H5T_NPAD)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character padding")
diff --git a/test/dtypes.c b/test/dtypes.c
index e0d1f8f..648650f 100644
--- a/test/dtypes.c
+++ b/test/dtypes.c
@@ -81,6 +81,7 @@ const char *FILENAME[] = {
"dtypes7",
"dtypes8",
"dtypes9",
+ "dtypes10",
NULL
};
@@ -109,6 +110,10 @@ typedef enum dtype_t {
#define DEL_OBJ_NAMED_NAMED_DTYPE "/Dtype"
#define DEL_OBJ_NAMED_ATTRIBUTE "Attr"
+/* Constant for testing conversion of UTF-8 characters */
+#define UTF8_DATASET "utf8"
+#define UTF8_DATASET2 "2nd_utf8"
+
/* Count opaque conversions */
static int num_opaque_conversions_g = 0;
@@ -7021,6 +7026,132 @@ error:
} /* end test_deprec() */
#endif /* H5_NO_DEPRECATED_SYMBOLS */
+int test_utf_conv()
+{
+ hid_t fid;
+ hid_t did;
+ hid_t utf8_vtid, ascii_vtid;
+ hid_t utf8_tid, ascii_tid;
+ hid_t sid;
+ const char *utf8 = "foo!";
+ char *ascii = NULL;
+ char ascii2[4];
+ herr_t status;
+
+ TESTING("string conversion between ASCII and UTF");
+
+ /***********************************
+ * Test VL string conversion
+ ***********************************/
+ /* Create a variable-length string */
+ if((utf8_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR
+
+ if((status = H5Tset_size(utf8_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR
+
+ /* Set the character set for the string to UTF-8 */
+ if((status = H5Tset_cset(utf8_vtid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR
+
+ /* Create a variable-length string */
+ if((ascii_vtid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR
+
+ if((status = H5Tset_size(ascii_vtid, H5T_VARIABLE)) < 0) FAIL_STACK_ERROR
+
+ /* Set the character set for the string to ASCII (should already be so) */
+ if((status = H5Tset_cset(ascii_vtid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR
+
+ /* Create a file */
+ if((fid = H5Fcreate(FILENAME[10], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR
+
+ /* Create a scalar dataspace for the dataset */
+ if((sid = H5Screate(H5S_SCALAR)) < 0) FAIL_STACK_ERROR
+
+ /* Create a dataset */
+ if((did = H5Dcreate2(fid, UTF8_DATASET, utf8_vtid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR
+
+ /* Write the UTF8 string, as UTF8 */
+ if((status = H5Dwrite(did, utf8_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR
+
+ /* Read the UTF8 string, as ASCII, supposed to fail */
+ H5E_BEGIN_TRY {
+ status = H5Dread(did, ascii_vtid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ascii);
+ } H5E_END_TRY
+ if(status >= 0)
+ FAIL_STACK_ERROR
+
+ /* Close the UTF8 VL-string datatype */
+ if((status = H5Tclose(utf8_vtid)) < 0) FAIL_STACK_ERROR
+
+ /* Close the ASCII VL-string datatype */
+ if((status = H5Tclose(ascii_vtid)) < 0) FAIL_STACK_ERROR
+
+ /* Close the dataset */
+ if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR
+
+ /***************************************
+ * Test fixed-length string conversion
+ ***************************************/
+ /* Create a fixed-length UTF8 string */
+ if((utf8_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR
+
+ if((status = H5Tset_size(utf8_tid, 4)) < 0) FAIL_STACK_ERROR
+
+ /* Set the character set for the string to UTF-8 */
+ if((status = H5Tset_cset(utf8_tid, H5T_CSET_UTF8)) < 0) FAIL_STACK_ERROR
+
+ /* Create a fixed-length ASCII string */
+ if((ascii_tid = H5Tcopy(H5T_C_S1)) < 0) FAIL_STACK_ERROR
+
+ if((status = H5Tset_size(ascii_tid, 4)) < 0) FAIL_STACK_ERROR
+
+ /* Set the character set for the string to ASCII (should already be so) */
+ if((status = H5Tset_cset(ascii_tid, H5T_CSET_ASCII) < 0)) FAIL_STACK_ERROR
+
+ /* Create a dataset */
+ if((did = H5Dcreate2(fid, UTF8_DATASET2, utf8_tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR
+
+ /* Write the UTF8 string, as UTF8 */
+ if((status = H5Dwrite(did, utf8_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &utf8)) < 0) FAIL_STACK_ERROR
+
+ /* Read the UTF8 string as ASCII, supposed to fail */
+ H5E_BEGIN_TRY {
+ status = H5Dread(did, ascii_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, ascii2);
+ } H5E_END_TRY
+ if(status >= 0)
+ FAIL_STACK_ERROR
+
+ /* Close the UTF8 string datatype */
+ if((status = H5Tclose(utf8_tid)) < 0) FAIL_STACK_ERROR
+
+ /* Close the ASCII string datatype */
+ if((status = H5Tclose(ascii_tid)) < 0) FAIL_STACK_ERROR
+
+ /* Close the dataset */
+ if((status = H5Dclose(did)) < 0) FAIL_STACK_ERROR
+
+ /* Close the dataspace */
+ if((status = H5Sclose(sid)) < 0) FAIL_STACK_ERROR
+
+ /* Close the file */
+ if((status = H5Fclose(fid)) < 0) FAIL_STACK_ERROR
+
+ PASSED();
+ return 0;
+
+error:
+ H5E_BEGIN_TRY {
+ H5Tclose(utf8_vtid);
+ H5Tclose(ascii_vtid);
+ H5Tclose(utf8_tid);
+ H5Tclose(ascii_tid);
+ H5Dclose(did);
+ H5Sclose(sid);
+ H5Fclose(fid);
+ } H5E_END_TRY;
+ return 1;
+}
+
+
+
/*-------------------------------------------------------------------------
* Function: main
@@ -7102,6 +7233,7 @@ main(void)
nerrors += test_bitfield_funcs();
nerrors += test_opaque();
nerrors += test_set_order();
+ nerrors += test_utf_conv();
if(nerrors) {
printf("***** %lu FAILURE%s! *****\n",