diff options
Diffstat (limited to 'test/API/tunicode.c')
-rw-r--r-- | test/API/tunicode.c | 867 |
1 files changed, 0 insertions, 867 deletions
diff --git a/test/API/tunicode.c b/test/API/tunicode.c deleted file mode 100644 index 8b404f2..0000000 --- a/test/API/tunicode.c +++ /dev/null @@ -1,867 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Copyright by The HDF Group. * - * All rights reserved. * - * * - * This file is part of HDF5. The full HDF5 copyright notice, including * - * terms governing use, modification, and redistribution, is contained in * - * the COPYING file, which can be found at the root of the source code * - * distribution tree, or in https://www.hdfgroup.org/licenses. * - * If you do not have access to either file, you may request a copy from * - * help@hdfgroup.org. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -/* Unicode test */ -#include "testhdf5.h" - -#define NUM_CHARS 16 -#define MAX_STRING_LENGTH ((NUM_CHARS * 4) + 1) /* Max length in bytes */ -#define MAX_PATH_LENGTH (MAX_STRING_LENGTH + 20) /* Max length in bytes */ -#define MAX_CODE_POINT 0x200000 -#define FILENAME "unicode.h5" -/* A buffer to hold two copies of the UTF-8 string */ -#define LONG_BUF_SIZE (2 * MAX_STRING_LENGTH + 4) - -#define DSET1_NAME "fl_string_dataset" -#define DSET3_NAME "dataset3" -#define DSET4_NAME "dataset4" -#define VL_DSET1_NAME "vl_dset_1" -#define GROUP1_NAME "group1" -#define GROUP2_NAME "group2" -#define GROUP3_NAME "group3" -#define GROUP4_NAME "group4" - -#define RANK 1 -#define COMP_INT_VAL 7 -#define COMP_FLOAT_VAL (-42.0F) -#define COMP_DOUBLE_VAL 42.0 - -/* Test function prototypes */ -void test_fl_string(hid_t fid, const char *string); -void test_strpad(hid_t fid, const char *string); -void test_vl_string(hid_t fid, const char *string); -void test_objnames(hid_t fid, const char *string); -void test_attrname(hid_t fid, const char *string); -void test_compound(hid_t fid, const char *string); -void test_enum(hid_t fid, const char *string); -void test_opaque(hid_t fid, const char *string); - -/* Utility function prototypes */ -static hid_t mkstr(size_t len, H5T_str_t strpad); -unsigned int write_char(unsigned int c, char *test_string, unsigned int cur_pos); -void dump_string(const char *string); - -/* - * test_fl_string - * Tests that UTF-8 can be used for fixed-length string data. - * Writes the string to a dataset and reads it back again. - */ -void -test_fl_string(hid_t fid, const char *string) -{ - hid_t dtype_id, space_id, dset_id; - hsize_t dims = 1; - char read_buf[MAX_STRING_LENGTH]; - H5T_cset_t cset; - herr_t ret; - - /* Create the datatype, ensure that the character set behaves - * correctly (it should default to ASCII and can be set to UTF8) - */ - dtype_id = H5Tcopy(H5T_C_S1); - CHECK(dtype_id, FAIL, "H5Tcopy"); - ret = H5Tset_size(dtype_id, (size_t)MAX_STRING_LENGTH); - CHECK(ret, FAIL, "H5Tset_size"); - cset = H5Tget_cset(dtype_id); - VERIFY(cset, H5T_CSET_ASCII, "H5Tget_cset"); - ret = H5Tset_cset(dtype_id, H5T_CSET_UTF8); - CHECK(ret, FAIL, "H5Tset_cset"); - cset = H5Tget_cset(dtype_id); - VERIFY(cset, H5T_CSET_UTF8, "H5Tget_cset"); - - /* Create dataspace for a dataset */ - space_id = H5Screate_simple(RANK, &dims, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - - /* Create a dataset */ - dset_id = H5Dcreate2(fid, DSET1_NAME, dtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(dset_id, FAIL, "H5Dcreate2"); - - /* Write UTF-8 string to dataset */ - ret = H5Dwrite(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, string); - CHECK(ret, FAIL, "H5Dwrite"); - - /* Read string back and make sure it is unchanged */ - ret = H5Dread(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf); - CHECK(ret, FAIL, "H5Dread"); - - VERIFY(strcmp(string, read_buf), 0, "strcmp"); - - /* Close all */ - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); - - ret = H5Tclose(dtype_id); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); -} - -/* - * test_strpad - * Tests string padding for a UTF-8 string. - * Converts strings to shorter and then longer strings. - * Borrows heavily from dtypes.c, but is more complicated because - * the string is randomly generated. - */ -void -test_strpad(hid_t H5_ATTR_UNUSED fid, const char *string) -{ - /* buf is used to hold the data that H5Tconvert operates on. */ - char buf[LONG_BUF_SIZE]; - - /* cmpbuf holds the output that H5Tconvert should produce, - * to compare against the actual output. */ - char cmpbuf[LONG_BUF_SIZE]; - - /* new_string is a slightly modified version of the UTF-8 - * string to make the tests run more smoothly. */ - char new_string[MAX_STRING_LENGTH + 2]; - - size_t length; /* Length of new_string in bytes */ - size_t small_len; /* Size of the small datatype */ - size_t big_len; /* Size of the larger datatype */ - hid_t src_type, dst_type; - herr_t ret; - - /* The following tests are simpler if the UTF-8 string contains - * the right number of bytes (even or odd, depending on the test). - * We create a 'new_string' whose length is convenient by prepending - * an 'x' to 'string' when necessary. */ - length = strlen(string); - if (length % 2 != 1) { - strcpy(new_string, "x"); - strcat(new_string, string); - length++; - } - else { - strcpy(new_string, string); - } - - /* Convert a null-terminated string to a shorter and longer null - * terminated string. */ - - /* Create a src_type that holds the UTF-8 string and its final NULL */ - big_len = length + 1; /* +1 byte for final NULL */ - assert((2 * big_len) <= sizeof(cmpbuf)); - src_type = mkstr(big_len, H5T_STR_NULLTERM); - CHECK(src_type, FAIL, "mkstr"); - /* Create a dst_type that holds half of the UTF-8 string and a final - * NULL */ - small_len = (length + 1) / 2; - dst_type = mkstr(small_len, H5T_STR_NULLTERM); - CHECK(dst_type, FAIL, "mkstr"); - - /* Fill the buffer with two copies of the UTF-8 string, each with a - * terminating NULL. It will look like "abcdefg\0abcdefg\0". */ - strncpy(buf, new_string, big_len); - strncpy(&buf[big_len], new_string, big_len); - - ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* After conversion, the buffer should look like - * "abc\0abc\0abcdefg\0". Note that this is just what the bytes look - * like; UTF-8 characters may well have been truncated. - * To check that the conversion worked properly, we'll build this - * string manually. */ - strncpy(cmpbuf, new_string, small_len - 1); - cmpbuf[small_len - 1] = '\0'; - strncpy(&cmpbuf[small_len], new_string, small_len - 1); - cmpbuf[2 * small_len - 1] = '\0'; - strcpy(&cmpbuf[2 * small_len], new_string); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - /* Now convert from smaller datatype to bigger datatype. This should - * leave our buffer looking like: "abc\0\0\0\0\0abc\0\0\0\0\0" */ - ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* First fill the buffer with NULLs */ - memset(cmpbuf, '\0', (size_t)LONG_BUF_SIZE); - /* Copy in the characters */ - strncpy(cmpbuf, new_string, small_len - 1); - strncpy(&cmpbuf[big_len], new_string, small_len - 1); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - ret = H5Tclose(src_type); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Tclose(dst_type); - CHECK(ret, FAIL, "H5Tclose"); - - /* Now test null padding. Null-padded strings do *not* need - * terminating NULLs, so the sizes of the datatypes are slightly - * different and we want a string with an even number of characters. */ - length = strlen(string); - if (length % 2 != 0) { - strcpy(new_string, "x"); - strcat(new_string, string); - length++; - } - else { - strcpy(new_string, string); - } - - /* Create a src_type that holds the UTF-8 string */ - big_len = length; - assert((2 * big_len) <= sizeof(cmpbuf)); - src_type = mkstr(big_len, H5T_STR_NULLPAD); - CHECK(src_type, FAIL, "mkstr"); - /* Create a dst_type that holds half of the UTF-8 string */ - small_len = length / 2; - dst_type = mkstr(small_len, H5T_STR_NULLPAD); - CHECK(dst_type, FAIL, "mkstr"); - - /* Fill the buffer with two copies of the UTF-8 string. - * It will look like "abcdefghabcdefgh". */ - strncpy(buf, new_string, big_len); - strncpy(&buf[big_len], new_string, big_len); - - ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* After conversion, the buffer should look like - * "abcdabcdabcdefgh". Note that this is just what the bytes look - * like; UTF-8 characters may well have been truncated. - * To check that the conversion worked properly, we'll build this - * string manually. */ - strncpy(cmpbuf, new_string, small_len); - strncpy(&cmpbuf[small_len], new_string, small_len); - strncpy(&cmpbuf[2 * small_len], new_string, big_len); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - /* Now convert from smaller datatype to bigger datatype. This should - * leave our buffer looking like: "abcd\0\0\0\0abcd\0\0\0\0" */ - ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* First fill the buffer with NULLs */ - memset(cmpbuf, '\0', (size_t)LONG_BUF_SIZE); - /* Copy in the characters */ - strncpy(cmpbuf, new_string, small_len); - strncpy(&cmpbuf[big_len], new_string, small_len); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - ret = H5Tclose(src_type); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Tclose(dst_type); - CHECK(ret, FAIL, "H5Tclose"); - - /* Test space padding. This is very similar to null-padding; we can - use the same values of length, small_len, and big_len. */ - - src_type = mkstr(big_len, H5T_STR_SPACEPAD); - CHECK(src_type, FAIL, "mkstr"); - dst_type = mkstr(small_len, H5T_STR_SPACEPAD); - CHECK(src_type, FAIL, "mkstr"); - - /* Fill the buffer with two copies of the UTF-8 string. - * It will look like "abcdefghabcdefgh". */ - strcpy(buf, new_string); - strcpy(&buf[big_len], new_string); - - ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* After conversion, the buffer should look like - * "abcdabcdabcdefgh". Note that this is just what the bytes look - * like; UTF-8 characters may have been truncated. - * To check that the conversion worked properly, we'll build this - * string manually. */ - strncpy(cmpbuf, new_string, small_len); - strncpy(&cmpbuf[small_len], new_string, small_len); - strncpy(&cmpbuf[2 * small_len], new_string, big_len); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - /* Now convert from smaller datatype to bigger datatype. This should - * leave our buffer looking like: "abcd abcd " */ - ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Tconvert"); - - /* First fill the buffer with spaces */ - memset(cmpbuf, ' ', (size_t)LONG_BUF_SIZE); - /* Copy in the characters */ - strncpy(cmpbuf, new_string, small_len); - strncpy(&cmpbuf[big_len], new_string, small_len); - - VERIFY(memcmp(buf, cmpbuf, 2 * big_len), 0, "memcmp"); - - ret = H5Tclose(src_type); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Tclose(dst_type); - CHECK(ret, FAIL, "H5Tclose"); -} - -/* - * test_vl_string - * Tests variable-length string datatype with UTF-8 strings. - */ -void -test_vl_string(hid_t fid, const char *string) -{ - hid_t type_id, space_id, dset_id; - hsize_t dims = 1; - hsize_t size; /* Number of bytes used */ - char *read_buf[1]; - herr_t ret; - - /* Create dataspace for datasets */ - space_id = H5Screate_simple(RANK, &dims, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - - /* Create a datatype to refer to */ - type_id = H5Tcopy(H5T_C_S1); - CHECK(type_id, FAIL, "H5Tcopy"); - ret = H5Tset_size(type_id, H5T_VARIABLE); - CHECK(ret, FAIL, "H5Tset_size"); - - /* Create a dataset */ - dset_id = H5Dcreate2(fid, VL_DSET1_NAME, type_id, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(dset_id, FAIL, "H5Dcreate2"); - - /* Write dataset to disk */ - ret = H5Dwrite(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string); - CHECK(ret, FAIL, "H5Dwrite"); - - /* Make certain the correct amount of memory will be used */ - ret = H5Dvlen_get_buf_size(dset_id, type_id, space_id, &size); - CHECK(ret, FAIL, "H5Dvlen_get_buf_size"); - VERIFY(size, (hsize_t)strlen(string) + 1, "H5Dvlen_get_buf_size"); - - /* Read dataset from disk */ - ret = H5Dread(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf); - CHECK(ret, FAIL, "H5Dread"); - - /* Compare data read in */ - VERIFY(strcmp(string, read_buf[0]), 0, "strcmp"); - - /* Reclaim the read VL data */ - ret = H5Treclaim(type_id, space_id, H5P_DEFAULT, read_buf); - CHECK(ret, FAIL, "H5Treclaim"); - - /* Close all */ - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); - ret = H5Tclose(type_id); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); -} - -/* - * test_objnames - * Tests that UTF-8 can be used for object names in the file. - * Tests groups, datasets, named datatypes, and soft links. - * Note that this test doesn't actually mark the names as being - * in UTF-8. At the time this test was written, that feature - * didn't exist in HDF5, and when the character encoding property - * was added to links it didn't change how they were stored in the file, - * -JML 2/2/2006 - */ -void -test_objnames(hid_t fid, const char *string) -{ - hid_t grp_id, grp1_id, grp2_id, grp3_id; - hid_t type_id, dset_id, space_id; -#if 0 - char read_buf[MAX_STRING_LENGTH]; -#endif - char path_buf[MAX_PATH_LENGTH]; - hsize_t dims = 1; -#if 0 - hobj_ref_t obj_ref; - ssize_t size; -#endif - herr_t ret; - - /* Create a group with a UTF-8 name */ - grp_id = H5Gcreate2(fid, string, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(grp_id, FAIL, "H5Gcreate2"); -#if 0 - /* Set a comment on the group to test that we can access the group - * Also test that UTF-8 comments can be read. - */ - ret = H5Oset_comment_by_name(fid, string, string, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Oset_comment_by_name"); - size = H5Oget_comment_by_name(fid, string, read_buf, (size_t)MAX_STRING_LENGTH, H5P_DEFAULT); - CHECK(size, FAIL, "H5Oget_comment_by_name"); -#endif - ret = H5Gclose(grp_id); - CHECK(ret, FAIL, "H5Gclose"); -#if 0 - VERIFY(strcmp(string, read_buf), 0, "strcmp"); -#endif - /* Create a new dataset with a UTF-8 name */ - grp1_id = H5Gcreate2(fid, GROUP1_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(grp1_id, FAIL, "H5Gcreate2"); - - space_id = H5Screate_simple(RANK, &dims, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - dset_id = H5Dcreate2(grp1_id, string, H5T_NATIVE_INT, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(dset_id, FAIL, "H5Dcreate2"); - - /* Make sure that dataset can be opened again */ - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); - - dset_id = H5Dopen2(grp1_id, string, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Dopen2"); - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); - ret = H5Gclose(grp1_id); - CHECK(ret, FAIL, "H5Gclose"); - - /* Do the same for a named datatype */ - grp2_id = H5Gcreate2(fid, GROUP2_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(grp2_id, FAIL, "H5Gcreate2"); - - type_id = H5Tcreate(H5T_OPAQUE, (size_t)1); - CHECK(type_id, FAIL, "H5Tcreate"); - ret = H5Tcommit2(grp2_id, string, type_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(type_id, FAIL, "H5Tcommit2"); - ret = H5Tclose(type_id); - CHECK(type_id, FAIL, "H5Tclose"); - - type_id = H5Topen2(grp2_id, string, H5P_DEFAULT); - CHECK(type_id, FAIL, "H5Topen2"); - ret = H5Tclose(type_id); - CHECK(type_id, FAIL, "H5Tclose"); - - /* Don't close the group -- use it to test that object references - * can refer to objects named in UTF-8 */ -#if 0 - space_id = H5Screate_simple(RANK, &dims, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - dset_id = - H5Dcreate2(grp2_id, DSET3_NAME, H5T_STD_REF_OBJ, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Dcreate2"); - - /* Create reference to named datatype */ - ret = H5Rcreate(&obj_ref, grp2_id, string, H5R_OBJECT, (hid_t)-1); - CHECK(ret, FAIL, "H5Rcreate"); - /* Write selection and read it back*/ - ret = H5Dwrite(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref); - CHECK(ret, FAIL, "H5Dwrite"); - ret = H5Dread(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref); - CHECK(ret, FAIL, "H5Dread"); - - /* Ensure that we can open named datatype using object reference */ - type_id = H5Rdereference2(dset_id, H5P_DEFAULT, H5R_OBJECT, &obj_ref); - CHECK(type_id, FAIL, "H5Rdereference2"); - ret = H5Tcommitted(type_id); - VERIFY(ret, 1, "H5Tcommitted"); - - ret = H5Tclose(type_id); - CHECK(type_id, FAIL, "H5Tclose"); - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); -#endif - ret = H5Gclose(grp2_id); - CHECK(ret, FAIL, "H5Gclose"); - - /* Create "group3". Build a hard link from group3 to group2, which has - * a datatype with the UTF-8 name. Create a soft link in group3 - * pointing through the hard link to the datatype. Give the soft - * link a name in UTF-8. Ensure that the soft link works. */ - - grp3_id = H5Gcreate2(fid, GROUP3_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(grp3_id, FAIL, "H5Gcreate2"); - - ret = H5Lcreate_hard(fid, GROUP2_NAME, grp3_id, GROUP2_NAME, H5P_DEFAULT, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Lcreate_hard"); - strcpy(path_buf, GROUP2_NAME); - strcat(path_buf, "/"); - strcat(path_buf, string); - ret = H5Lcreate_hard(grp3_id, path_buf, H5L_SAME_LOC, string, H5P_DEFAULT, H5P_DEFAULT); - CHECK(ret, FAIL, "H5Lcreate_hard"); - - /* Open named datatype using soft link */ - type_id = H5Topen2(grp3_id, string, H5P_DEFAULT); - CHECK(type_id, FAIL, "H5Topen2"); - - ret = H5Tclose(type_id); - CHECK(type_id, FAIL, "H5Tclose"); - ret = H5Gclose(grp3_id); - CHECK(ret, FAIL, "H5Gclose"); -} - -/* - * test_attrname - * Test that attributes can deal with UTF-8 strings - */ -void -test_attrname(hid_t fid, const char *string) -{ - hid_t group_id, attr_id; - hid_t dtype_id, space_id; - hsize_t dims = 1; - char read_buf[MAX_STRING_LENGTH]; - ssize_t size; - herr_t ret; - - /* Create a new group and give it an attribute whose - * name and value are UTF-8 strings. - */ - group_id = H5Gcreate2(fid, GROUP4_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(group_id, FAIL, "H5Gcreate2"); - - space_id = H5Screate_simple(RANK, &dims, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - dtype_id = H5Tcopy(H5T_C_S1); - CHECK(dtype_id, FAIL, "H5Tcopy"); - ret = H5Tset_size(dtype_id, (size_t)MAX_STRING_LENGTH); - CHECK(ret, FAIL, "H5Tset_size"); - - /* Create the attribute and check that its name is correct */ - attr_id = H5Acreate2(group_id, string, dtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT); - CHECK(attr_id, FAIL, "H5Acreate2"); - size = H5Aget_name(attr_id, (size_t)MAX_STRING_LENGTH, read_buf); - CHECK(size, FAIL, "H5Aget_name"); - ret = strcmp(read_buf, string); - VERIFY(ret, 0, "strcmp"); - read_buf[0] = '\0'; - - /* Try writing and reading from the attribute */ - ret = H5Awrite(attr_id, dtype_id, string); - CHECK(ret, FAIL, "H5Awrite"); - ret = H5Aread(attr_id, dtype_id, read_buf); - CHECK(ret, FAIL, "H5Aread"); - ret = strcmp(read_buf, string); - VERIFY(ret, 0, "strcmp"); - - /* Clean up */ - ret = H5Aclose(attr_id); - CHECK(ret, FAIL, "H5Aclose"); - ret = H5Tclose(dtype_id); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); - ret = H5Gclose(group_id); - CHECK(ret, FAIL, "H5Gclose"); -} - -/* - * test_compound - * Test that compound datatypes can have UTF-8 field names. - */ -void -test_compound(hid_t fid, const char *string) -{ - /* Define two compound structures, s1_t and s2_t. - * s2_t is a subset of s1_t, with two out of three - * fields. - * This is stolen from the h5_compound example. - */ - typedef struct s1_t { - int a; - double c; - float b; - } s1_t; - typedef struct s2_t { - double c; - int a; - } s2_t; - /* Actual variable declarations */ - s1_t s1; - s2_t s2; - hid_t s1_tid, s2_tid; - hid_t space_id, dset_id; - hsize_t dim = 1; - char *readbuf; - herr_t ret; - - /* Initialize compound data */ - memset(&s1, 0, sizeof(s1_t)); /* To make purify happy */ - s1.a = COMP_INT_VAL; - s1.c = COMP_DOUBLE_VAL; - s1.b = COMP_FLOAT_VAL; - - /* Create compound datatypes using UTF-8 field name */ - s1_tid = H5Tcreate(H5T_COMPOUND, sizeof(s1_t)); - CHECK(s1_tid, FAIL, "H5Tcreate"); - ret = H5Tinsert(s1_tid, string, HOFFSET(s1_t, a), H5T_NATIVE_INT); - CHECK(ret, FAIL, "H5Tinsert"); - - /* Check that the field name was stored correctly */ - readbuf = H5Tget_member_name(s1_tid, 0); - ret = strcmp(readbuf, string); - VERIFY(ret, 0, "strcmp"); - H5free_memory(readbuf); - - /* Add the other fields to the datatype */ - ret = H5Tinsert(s1_tid, "c_name", HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE); - CHECK(ret, FAIL, "H5Tinsert"); - ret = H5Tinsert(s1_tid, "b_name", HOFFSET(s1_t, b), H5T_NATIVE_FLOAT); - CHECK(ret, FAIL, "H5Tinsert"); - - /* Create second datatype, with only two fields. */ - s2_tid = H5Tcreate(H5T_COMPOUND, sizeof(s2_t)); - CHECK(s2_tid, FAIL, "H5Tcreate"); - ret = H5Tinsert(s2_tid, "c_name", HOFFSET(s2_t, c), H5T_NATIVE_DOUBLE); - CHECK(ret, FAIL, "H5Tinsert"); - ret = H5Tinsert(s2_tid, string, HOFFSET(s2_t, a), H5T_NATIVE_INT); - CHECK(ret, FAIL, "H5Tinsert"); - - /* Create the dataspace and dataset. */ - space_id = H5Screate_simple(1, &dim, NULL); - CHECK(space_id, FAIL, "H5Screate_simple"); - dset_id = H5Dcreate2(fid, DSET4_NAME, s1_tid, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - CHECK(dset_id, FAIL, "H5Dcreate2"); - - /* Write data to the dataset. */ - ret = H5Dwrite(dset_id, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s1); - CHECK(ret, FAIL, "H5Dwrite"); - - /* Ensure that data can be read back by field name into s2 struct */ - ret = H5Dread(dset_id, s2_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s2); - CHECK(ret, FAIL, "H5Dread"); - - VERIFY(s2.a, COMP_INT_VAL, "H5Dread"); - VERIFY(s2.c, COMP_DOUBLE_VAL, "H5Dread"); - - /* Clean up */ - ret = H5Tclose(s1_tid); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Tclose(s2_tid); - CHECK(ret, FAIL, "H5Tclose"); - ret = H5Sclose(space_id); - CHECK(ret, FAIL, "H5Sclose"); - ret = H5Dclose(dset_id); - CHECK(ret, FAIL, "H5Dclose"); -} - -/* - * test_enum - * Test that enumerated datatypes can have UTF-8 member names. - */ -void -test_enum(hid_t H5_ATTR_UNUSED fid, const char *string) -{ - /* Define an enumerated type */ - typedef enum { E1_RED, E1_GREEN, E1_BLUE, E1_WHITE } c_e1; - /* Variable declarations */ - c_e1 val; - herr_t ret; - hid_t type_id; - char readbuf[MAX_STRING_LENGTH]; - - /* Create an enumerated datatype in HDF5 with a UTF-8 member name*/ - type_id = H5Tcreate(H5T_ENUM, sizeof(c_e1)); - CHECK(type_id, FAIL, "H5Tcreate"); - val = E1_RED; - ret = H5Tenum_insert(type_id, "RED", &val); - CHECK(ret, FAIL, "H5Tenum_insert"); - val = E1_GREEN; - ret = H5Tenum_insert(type_id, "GREEN", &val); - CHECK(ret, FAIL, "H5Tenum_insert"); - val = E1_BLUE; - ret = H5Tenum_insert(type_id, "BLUE", &val); - CHECK(ret, FAIL, "H5Tenum_insert"); - val = E1_WHITE; - ret = H5Tenum_insert(type_id, string, &val); - CHECK(ret, FAIL, "H5Tenum_insert"); - - /* Ensure that UTF-8 member name gives the right value and vice versa. */ - ret = H5Tenum_valueof(type_id, string, &val); - CHECK(ret, FAIL, "H5Tenum_valueof"); - VERIFY(val, E1_WHITE, "H5Tenum_valueof"); - ret = H5Tenum_nameof(type_id, &val, readbuf, (size_t)MAX_STRING_LENGTH); - CHECK(ret, FAIL, "H5Tenum_nameof"); - ret = strcmp(readbuf, string); - VERIFY(ret, 0, "strcmp"); - - /* Close the datatype */ - ret = H5Tclose(type_id); - CHECK(ret, FAIL, "H5Tclose"); -} - -/* - * test_opaque - * Test comments on opaque datatypes - */ -void -test_opaque(hid_t H5_ATTR_UNUSED fid, const char *string) -{ - hid_t type_id; - char *read_buf; - herr_t ret; - - /* Create an opaque type and give it a UTF-8 tag */ - type_id = H5Tcreate(H5T_OPAQUE, (size_t)4); - CHECK(type_id, FAIL, "H5Tcreate"); - ret = H5Tset_tag(type_id, string); - CHECK(ret, FAIL, "H5Tset_tag"); - - /* Read the tag back. */ - read_buf = H5Tget_tag(type_id); - ret = strcmp(read_buf, string); - VERIFY(ret, 0, "H5Tget_tag"); - H5free_memory(read_buf); - - ret = H5Tclose(type_id); - CHECK(ret, FAIL, "H5Tclose"); -} - -/*********************/ -/* Utility functions */ -/*********************/ - -/* mkstr - * Borrwed from dtypes.c. - * Creates a new string data type. Used in string padding tests */ -static hid_t -mkstr(size_t len, H5T_str_t strpad) -{ - hid_t t; - if ((t = H5Tcopy(H5T_C_S1)) < 0) - return -1; - if (H5Tset_size(t, len) < 0) - return -1; - if (H5Tset_strpad(t, strpad) < 0) - return -1; - return t; -} - -/* write_char - * Append a unicode code point c to test_string in UTF-8 encoding. - * Return the new end of the string. - */ -unsigned int -write_char(unsigned int c, char *test_string, unsigned int cur_pos) -{ - if (c < 0x80) { - test_string[cur_pos] = (char)c; - cur_pos++; - } - else if (c < 0x800) { - test_string[cur_pos] = (char)(0xC0 | c >> 6); - test_string[cur_pos + 1] = (char)(0x80 | (c & 0x3F)); - cur_pos += 2; - } - else if (c < 0x10000) { - test_string[cur_pos] = (char)(0xE0 | c >> 12); - test_string[cur_pos + 1] = (char)(0x80 | (c >> 6 & 0x3F)); - test_string[cur_pos + 2] = (char)(0x80 | (c & 0x3F)); - cur_pos += 3; - } - else if (c < 0x200000) { - test_string[cur_pos] = (char)(0xF0 | c >> 18); - test_string[cur_pos + 1] = (char)(0x80 | (c >> 12 & 0x3F)); - test_string[cur_pos + 2] = (char)(0x80 | (c >> 6 & 0x3F)); - test_string[cur_pos + 3] = (char)(0x80 | (c & 0x3F)); - cur_pos += 4; - } - - return cur_pos; -} - -/* dump_string - * Print a string both as text (which will look like garbage) and as hex. - * The text display is not guaranteed to be accurate--certain characters - * could confuse printf (e.g., '\n'). */ -void -dump_string(const char *string) -{ - size_t length; - size_t x; - - printf("The string was:\n %s", string); - printf("Or in hex:\n"); - - length = strlen(string); - - for (x = 0; x < length; x++) - printf("%x ", string[x] & (0x000000FF)); - - printf("\n"); -} - -/* Main test. - * Create a string of random Unicode characters, then run each test with - * that string. - */ -void -test_unicode(void) -{ - char test_string[MAX_STRING_LENGTH]; - unsigned int cur_pos = 0; /* Current position in test_string */ - unsigned int unicode_point; /* Unicode code point for a single character */ - hid_t fid; /* ID of file */ - int x; /* Temporary variable */ - herr_t ret; /* Generic return value */ - - /* Output message about test being performed */ - MESSAGE(5, ("Testing UTF-8 Encoding\n")); - - /* Create a random string with length NUM_CHARS */ - HDsrandom((unsigned)HDtime(NULL)); - - memset(test_string, 0, sizeof(test_string)); - for (x = 0; x < NUM_CHARS; x++) { - /* We need to avoid unprintable characters (codes 0-31) and the - * . and / characters, since they aren't allowed in path names. - */ - unicode_point = (unsigned)(HDrandom() % (MAX_CODE_POINT - 32)) + 32; - if (unicode_point != 46 && unicode_point != 47) - cur_pos = write_char(unicode_point, test_string, cur_pos); - } - - /* Avoid unlikely case of the null string */ - if (cur_pos == 0) { - test_string[cur_pos] = 'Q'; - cur_pos++; - } - test_string[cur_pos] = '\0'; - - /* Create file */ - fid = H5Fcreate(FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - CHECK(fid, FAIL, "H5Fcreate"); - - test_fl_string(fid, test_string); - test_strpad(fid, "abcdefgh"); - test_strpad(fid, test_string); - test_vl_string(fid, test_string); - test_objnames(fid, test_string); - test_attrname(fid, test_string); - test_compound(fid, test_string); - test_enum(fid, test_string); - test_opaque(fid, test_string); - - /* Close file */ - ret = H5Fclose(fid); - CHECK(ret, FAIL, "H5Fclose"); - - /* This function could be useful in debugging if certain strings - * create errors. - */ -#ifdef DEBUG - dump_string(test_string); -#endif /* DEBUG */ -} - -/* cleanup_unicode(void) - * Delete the file this test created. - */ -void -cleanup_unicode(void) -{ - H5Fdelete(FILENAME, H5P_DEFAULT); -} |