From 985af5617fe40c9a6e9a0083dc889e56a7add362 Mon Sep 17 00:00:00 2001
From: James Laird <jlaird@hdfgroup.org>
Date: Mon, 23 May 2005 13:20:07 -0500
Subject: [svn-r10785] Purpose: Feature

Description:
Added "support" for UTF-8 character encoding.

Solution:
Wrote tests to check that UTF-8 can be used in a number of places in
HDF5 (object names, data, etc.).  These tests live in test/tunicode.c.
Added a new UTF-8 character encoding for datatypes.

Platforms tested:
mir, modi4, heping

Misc. update:
---
 MANIFEST                    |   1 +
 fortran/src/H5f90global.f90 |  16 +-
 release_docs/RELEASE.txt    |   3 +
 src/H5.c                    |   4 +-
 src/H5Tconv.c               |   6 +-
 src/H5Tpublic.h             |   4 +-
 test/Makefile.am            |   4 +-
 test/Makefile.in            |   8 +-
 test/testhdf5.c             |   1 +
 test/testhdf5.h             |   2 +
 test/tunicode.c             | 837 ++++++++++++++++++++++++++++++++++++++++++++
 tools/h5ls/h5ls.c           |   4 +-
 12 files changed, 872 insertions(+), 18 deletions(-)
 create mode 100644 test/tunicode.c

diff --git a/MANIFEST b/MANIFEST
index 61a3f34..c344b93 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1081,6 +1081,7 @@
 ./test/ttsafe_cancel.c
 ./test/ttsafe_dcreate.c
 ./test/ttsafe_error.c
+./test/tunicode.c
 ./test/tvlstr.c
 ./test/tvltypes.c
 ./test/unlink.c
diff --git a/fortran/src/H5f90global.f90 b/fortran/src/H5f90global.f90
index c1b8817..b17680e 100644
--- a/fortran/src/H5f90global.f90
+++ b/fortran/src/H5f90global.f90
@@ -470,7 +470,7 @@
 !
 ! H5T flags declaration
 !
-      INTEGER, PARAMETER :: H5T_FLAGS_LEN = 30
+      INTEGER, PARAMETER :: H5T_FLAGS_LEN = 31
       INTEGER H5T_flags(H5T_FLAGS_LEN)
 !DEC$if defined(BUILD_HDF5_DLL)
 !DEC$ ATTRIBUTES DLLEXPORT :: /H5T_FLAGS/
@@ -503,6 +503,7 @@
       INTEGER ::  H5T_NORM_MSBSET_F
       INTEGER ::  H5T_NORM_NONE_F 
       INTEGER ::  H5T_CSET_ASCII_F
+      INTEGER ::  H5T_CSET_UTF8_F
       INTEGER ::  H5T_STR_NULLTERM_F 
       INTEGER ::  H5T_STR_NULLPAD_F 
       INTEGER ::  H5T_STR_SPACEPAD_F
@@ -532,12 +533,13 @@
       EQUIVALENCE(H5T_flags(22), H5T_NORM_MSBSET_F)
       EQUIVALENCE(H5T_flags(23), H5T_NORM_NONE_F)
       EQUIVALENCE(H5T_flags(24), H5T_CSET_ASCII_F)
-      EQUIVALENCE(H5T_flags(25), H5T_STR_NULLTERM_F)
-      EQUIVALENCE(H5T_flags(26), H5T_STR_NULLPAD_F)
-      EQUIVALENCE(H5T_flags(27), H5T_STR_SPACEPAD_F)
-      EQUIVALENCE(H5T_flags(28), H5T_STR_ERROR_F)
-      EQUIVALENCE(H5T_flags(29), H5T_VLEN_F)
-      EQUIVALENCE(H5T_flags(30), H5T_ARRAY_F)
+      EQUIVALENCE(H5T_flags(25), H5T_CSET_UTF8_F)
+      EQUIVALENCE(H5T_flags(26), H5T_STR_NULLTERM_F)
+      EQUIVALENCE(H5T_flags(27), H5T_STR_NULLPAD_F)
+      EQUIVALENCE(H5T_flags(28), H5T_STR_SPACEPAD_F)
+      EQUIVALENCE(H5T_flags(29), H5T_STR_ERROR_F)
+      EQUIVALENCE(H5T_flags(30), H5T_VLEN_F)
+      EQUIVALENCE(H5T_flags(31), H5T_ARRAY_F)
 
 !
 ! H5Z flags declaration
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index cc83743..85dac4f 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -90,6 +90,9 @@ New Features
 
     Library:
     --------
+       - Added H5T_CSET_UTF8 character set to mark datatypes that use the
+          UTF-8 Unicode character encoding.  Added tests to ensure that
+          library handles UTF-8 object names, attributes, etc. -JL 2005/05/13
        - HDF5 supports collective MPI-IO for irregular selection with HDF5
           dataset. Irregular selection is when users use H5Sselect_hyperslab
           more than once for the same dataset.
diff --git a/src/H5.c b/src/H5.c
index 821ee3a..5956c9e 100644
--- a/src/H5.c
+++ b/src/H5.c
@@ -2687,7 +2687,9 @@ H5_trace (const double *returning, const char *func, const char *type, ...)
                         case H5T_CSET_ASCII:
                             fprintf (out, "H5T_CSET_ASCII");
                             break;
-                        case H5T_CSET_RESERVED_1:
+                        case H5T_CSET_UTF8:
+                            fprintf (out, "H5T_CSET_UTF8");
+                            break;
                         case H5T_CSET_RESERVED_2:
                         case H5T_CSET_RESERVED_3:
                         case H5T_CSET_RESERVED_4:
diff --git a/src/H5Tconv.c b/src/H5Tconv.c
index 5b4b4b7..d4125a1 100644
--- a/src/H5Tconv.c
+++ b/src/H5Tconv.c
@@ -3907,8 +3907,10 @@ H5T_conv_s_s (hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, size_t nelmts,
                 HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad precision");
             if (0 != src->shared->u.atomic.offset || 0 != dst->shared->u.atomic.offset)
                 HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad offset");
-            if (H5T_CSET_ASCII != src->shared->u.atomic.u.s.cset || H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset)
-                HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character set");
+            if (H5T_CSET_ASCII != src->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != src->shared->u.atomic.u.s.cset)
+                HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad source character set");
+            if (H5T_CSET_ASCII != dst->shared->u.atomic.u.s.cset && H5T_CSET_UTF8 != dst->shared->u.atomic.u.s.cset)
+                HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad destination character set");
             if (src->shared->u.atomic.u.s.pad<0 || src->shared->u.atomic.u.s.pad>=H5T_NPAD ||
                     dst->shared->u.atomic.u.s.pad<0 || dst->shared->u.atomic.u.s.pad>=H5T_NPAD)
                 HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bad character padding");
diff --git a/src/H5Tpublic.h b/src/H5Tpublic.h
index 98e92d3..8fe400a 100644
--- a/src/H5Tpublic.h
+++ b/src/H5Tpublic.h
@@ -78,7 +78,7 @@ typedef enum H5T_norm_t {
 typedef enum H5T_cset_t {
     H5T_CSET_ERROR       = -1,  /*error                                      */
     H5T_CSET_ASCII       = 0,   /*US ASCII                                   */
-    H5T_CSET_RESERVED_1  = 1,   /*reserved for later use		     */
+    H5T_CSET_UTF8        = 1,   /*UTF-8 Unicode encoding		     */
     H5T_CSET_RESERVED_2  = 2,   /*reserved for later use		     */
     H5T_CSET_RESERVED_3  = 3,   /*reserved for later use		     */
     H5T_CSET_RESERVED_4  = 4,   /*reserved for later use		     */
@@ -94,7 +94,7 @@ typedef enum H5T_cset_t {
     H5T_CSET_RESERVED_14 = 14,  /*reserved for later use		     */
     H5T_CSET_RESERVED_15 = 15   /*reserved for later use		     */
 } H5T_cset_t;
-#define H5T_NCSET H5T_CSET_RESERVED_1    		/*Number of character sets actually defined  */
+#define H5T_NCSET H5T_CSET_RESERVED_2    		/*Number of character sets actually defined  */
 
 /*
  * Type of padding to use in character strings.  Do not change these values
diff --git a/test/Makefile.am b/test/Makefile.am
index 4a700a4..b3f888a 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -81,7 +81,7 @@ MOSTLYCLEANFILES=cmpd_dset.h5 compact_dataset.h5 dataset.h5 extend.h5 istore.h5\
             extern_[1-3].h5 extern_[1-4][ab].raw gheap[0-4].h5 dt_atomic[1-2]\
             links.h5 links[1-3].h5 big.data big[0-9][0-9][0-9][0-9][0-9].h5  \
             dtypes[1-8].h5 dt_atomic[1-2].h5 tattr.h5 tselect.h5 mtime.h5 \
-            unlink.h5 \
+            unlink.h5 unicode.h5 \
             fillval_[0-9].h5 fillval.raw mount_[0-9].h5 testmeta.h5 ttime.h5 \
             trefer[1-3].h5 tvltypes.h5 tvlstr.h5 tvlstr2.h5 flush.h5         \
             enum1.h5 titerate.h5 ttsafe.h5 tarray1.h5 tgenprop.h5            \
@@ -95,6 +95,6 @@ MOSTLYCLEANFILES=cmpd_dset.h5 compact_dataset.h5 dataset.h5 extend.h5 istore.h5\
 # Sources for testhdf5 executable
 testhdf5_SOURCES=testhdf5.c tarray.c tattr.c tconfig.c tfile.c tgenprop.c \
     th5s.c theap.c tid.c titerate.c tmeta.c tmisc.c ttime.c trefer.c trefstr.c  \
-    tselect.c tskiplist.c ttst.c tvltypes.c tvlstr.c
+    tselect.c tskiplist.c ttst.c tunicode.c tvltypes.c tvlstr.c
 
 include $(top_srcdir)/config/conclude.am
diff --git a/test/Makefile.in b/test/Makefile.in
index d360fef..a996b94 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -236,7 +236,8 @@ am_testhdf5_OBJECTS = testhdf5.$(OBJEXT) tarray.$(OBJEXT) \
 	tid.$(OBJEXT) titerate.$(OBJEXT) tmeta.$(OBJEXT) \
 	tmisc.$(OBJEXT) ttime.$(OBJEXT) trefer.$(OBJEXT) \
 	trefstr.$(OBJEXT) tselect.$(OBJEXT) tskiplist.$(OBJEXT) \
-	ttst.$(OBJEXT) tvltypes.$(OBJEXT) tvlstr.$(OBJEXT)
+	ttst.$(OBJEXT) tunicode.$(OBJEXT) tvltypes.$(OBJEXT) \
+	tvlstr.$(OBJEXT)
 testhdf5_OBJECTS = $(am_testhdf5_OBJECTS)
 testhdf5_LDADD = $(LDADD)
 testhdf5_DEPENDENCIES = libh5test.la $(am__DEPENDENCIES_1)
@@ -534,7 +535,7 @@ MOSTLYCLEANFILES = cmpd_dset.h5 compact_dataset.h5 dataset.h5 extend.h5 istore.h
             extern_[1-3].h5 extern_[1-4][ab].raw gheap[0-4].h5 dt_atomic[1-2]\
             links.h5 links[1-3].h5 big.data big[0-9][0-9][0-9][0-9][0-9].h5  \
             dtypes[1-8].h5 dt_atomic[1-2].h5 tattr.h5 tselect.h5 mtime.h5 \
-            unlink.h5 \
+            unlink.h5 unicode.h5 \
             fillval_[0-9].h5 fillval.raw mount_[0-9].h5 testmeta.h5 ttime.h5 \
             trefer[1-3].h5 tvltypes.h5 tvlstr.h5 tvlstr2.h5 flush.h5         \
             enum1.h5 titerate.h5 ttsafe.h5 tarray1.h5 tgenprop.h5            \
@@ -549,7 +550,7 @@ MOSTLYCLEANFILES = cmpd_dset.h5 compact_dataset.h5 dataset.h5 extend.h5 istore.h
 # Sources for testhdf5 executable
 testhdf5_SOURCES = testhdf5.c tarray.c tattr.c tconfig.c tfile.c tgenprop.c \
     th5s.c theap.c tid.c titerate.c tmeta.c tmisc.c ttime.c trefer.c trefstr.c  \
-    tselect.c tskiplist.c ttst.c tvltypes.c tvlstr.c
+    tselect.c tskiplist.c ttst.c tunicode.c tvltypes.c tvlstr.c
 
 
 # Automake needs to be taught how to build lib, progs, and tests targets.
@@ -808,6 +809,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ttsafe_dcreate.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ttsafe_error.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ttst.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tunicode.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tvlstr.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tvltypes.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unlink.Po@am__quote@
diff --git a/test/testhdf5.c b/test/testhdf5.c
index 05149aa..0164b88 100644
--- a/test/testhdf5.c
+++ b/test/testhdf5.c
@@ -60,6 +60,7 @@ main(int argc, char *argv[])
     AddTest("iterate", test_iterate, cleanup_iterate,  "Group & Attribute Iteration", NULL);
     AddTest("array", test_array, cleanup_array,  "Array Datatypes", NULL);
     AddTest("genprop", test_genprop, cleanup_genprop,  "Generic Properties", NULL);
+    AddTest("unicode", test_unicode, cleanup_unicode,  "UTF-8 Encoding", NULL);
     AddTest("misc", test_misc, cleanup_misc,  "Miscellaneous", NULL);
 
     /* Display testing information */
diff --git a/test/testhdf5.h b/test/testhdf5.h
index c32782a..7043950 100644
--- a/test/testhdf5.h
+++ b/test/testhdf5.h
@@ -130,6 +130,7 @@ void			test_configure(void);
 void			test_misc(void);
 void			test_ids(void);
 void			test_skiplist(void);
+void			test_unicode(void);
 
 /* Prototypes for the cleanup routines */
 void                    cleanup_metadata(void);
@@ -146,6 +147,7 @@ void                    cleanup_array(void);
 void                    cleanup_genprop(void);
 void			cleanup_configure(void);
 void			cleanup_misc(void);
+void			cleanup_unicode(void);
 
 #ifdef __cplusplus
 }
diff --git a/test/tunicode.c b/test/tunicode.c
new file mode 100644
index 0000000..2a4b73e
--- /dev/null
+++ b/test/tunicode.c
@@ -0,0 +1,837 @@
+/* Unicode test */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "testhdf5.h"
+
+#define NUM_CHARS 16
+#define MAX_STRING_LENGTH ((NUM_CHARS * 4) + 1) /* Max length in bytes */
+#define MAX_PATH_LENGTH (MAX_STRING_LENGTH + 20) /* Max length in bytes */
+#define MAX_CODE_POINT 0x200000
+#define FILENAME "unicode.h5"
+/* A buffer to hold two copies of the UTF-8 string */
+#define LONG_BUF_SIZE (2 * MAX_STRING_LENGTH + 4)
+
+#define DSET1_NAME "fl_string_dataset"
+#define DSET2_NAME "dataset2"
+#define DSET3_NAME "dataset3"
+#define DSET4_NAME "dataset4"
+#define VL_DSET1_NAME "vl_dset_1"
+#define VL_DSET2_NAME "vl_dset_2"
+#define GROUP1_NAME "group1"
+#define GROUP2_NAME "group2"
+#define GROUP3_NAME "group3"
+#define GROUP4_NAME "group4"
+#define SLINK_NAME "soft_link"
+
+#define RANK 1
+#define COMP_INT_VAL 7
+#define COMP_FLOAT_VAL -42.0
+#define COMP_DOUBLE_VAL 42.0
+
+/* Test function prototypes */
+void test_fl_string(hid_t fid, const char *string);
+void test_strpad(hid_t fid, const char *string);
+void test_vl_string(hid_t fid, const char *string);
+void test_objnames(hid_t fid, const char *string);
+void test_attrname(hid_t fid, const char *string);
+void test_compound(hid_t fid, const char *string);
+void test_enum(hid_t fid, const char *string);
+void test_opaque(hid_t fid, const char *string);
+
+/* Utility function prototypes */
+static hid_t mkstr(size_t len, H5T_str_t strpad);
+unsigned int write_char(unsigned int c, char * test_string, unsigned int cur_pos);
+void dump_string(const char * string);
+
+/*
+ * test_fl_string
+ * Tests that UTF-8 can be used for fixed-length string data.
+ * Writes the string to a dataset and reads it back again.
+ */
+void test_fl_string(hid_t fid, const char *string)
+{
+  hid_t dtype_id, space_id, dset_id;
+  hsize_t dims=1;
+  char read_buf[MAX_STRING_LENGTH];
+  H5T_cset_t cset;
+  herr_t ret;
+  
+  /* Create the datatype, ensure that the character set behaves
+   * correctly (it should default to ASCII and can be set to UTF8)
+   */
+  dtype_id = H5Tcopy(H5T_C_S1);
+  CHECK(dtype_id, FAIL, "H5Tcopy");
+  ret=H5Tset_size(dtype_id, MAX_STRING_LENGTH);
+  CHECK(ret, FAIL, "H5Tset_size");
+  cset=H5Tget_cset(dtype_id);
+  VERIFY(cset, H5T_CSET_ASCII, "H5Tget_cset");
+  ret=H5Tset_cset(dtype_id, H5T_CSET_UTF8);
+  CHECK(ret, FAIL, "H5Tset_cset");
+  cset=H5Tget_cset(dtype_id);
+  VERIFY(cset, H5T_CSET_UTF8, "H5Tget_cset");
+
+  /* Create dataspace for a dataset */
+  space_id = H5Screate_simple(RANK, &dims, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+  /* Create a dataset */
+  dset_id=H5Dcreate(fid, DSET1_NAME, dtype_id, space_id, H5P_DEFAULT);
+  CHECK(dset_id, FAIL, "H5Dcreate");
+
+  /* Write UTF-8 string to dataset */
+  ret = H5Dwrite(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, string);
+  CHECK(ret, FAIL, "H5Dwrite");
+
+  /* Read string back and make sure it is unchanged */
+  ret = H5Dread(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf);
+  CHECK(ret, FAIL, "H5Dread");
+
+  VERIFY(strcmp(string, read_buf), 0, "strcmp");
+
+  /* Close all */
+  ret=H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+
+  ret=H5Tclose(dtype_id);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret=H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+}
+
+/*
+ * test_strpad
+ * Tests string padding for a UTF-8 string.
+ * Converts strings to shorter and then longer strings.
+ * Borrows heavily from dtypes.c, but is more complicated because
+ * the string is randomly generated.
+ */
+void test_strpad(hid_t UNUSED fid, const char *string)
+{
+  /* buf is used to hold the data that H5Tconvert operates on. */
+  char     buf[LONG_BUF_SIZE];
+
+  /* cmpbuf holds the output that H5Tconvert should produce,
+   * to compare against the actual output. */
+  char     cmpbuf[LONG_BUF_SIZE];
+
+  /* new_string is a slightly modified version of the UTF-8
+   * string to make the tests run more smoothly. */
+  char     new_string[MAX_STRING_LENGTH + 2];
+
+  unsigned int length;  /* Length of new_string in bytes */
+  unsigned int small_len;  /* Size of the small datatype */
+  unsigned int big_len;   /* Size of the larger datatype */
+  hid_t    src_type, dst_type;
+  herr_t   ret;
+
+  /* The following tests are simpler if the UTF-8 string contains
+   * the right number of bytes (even or odd, depending on the test).
+   * We create a 'new_string' whose length is convenient by prepending
+   * an 'x' to 'string' when necessary. */
+  length = strlen(string);
+  if(length % 2 != 1)
+  {
+    strcpy(new_string, "x");
+    strcat(new_string, string);
+    length++;
+  } else {
+    strcpy(new_string, string);
+  }
+
+
+  /* Convert a null-terminated string to a shorter and longer null
+   * terminated string. */
+
+  /* Create a src_type that holds the UTF-8 string and its final NULL */
+  big_len = length + 1;                     /* +1 byte for final NULL */
+  src_type = mkstr(big_len, H5T_STR_NULLTERM);
+  CHECK(src_type, FAIL, "mkstr");
+  /* Create a dst_type that holds half of the UTF-8 string and a final
+   * NULL */
+  small_len = (length +1) / 2;
+  dst_type = mkstr(small_len, H5T_STR_NULLTERM);
+  CHECK(dst_type, FAIL, "mkstr");
+
+  /* Fill the buffer with two copies of the UTF-8 string, each with a
+   * terminating NULL.  It will look like "abcdefg\0abcdefg\0". */
+  strncpy(buf, new_string, big_len);
+  strncpy(&buf[big_len], new_string, big_len);
+
+  ret = H5Tconvert(src_type, dst_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* After conversion, the buffer should look like
+   * "abc\0abc\0abcdefg\0".  Note that this is just what the bytes look
+   * like; UTF-8 characters may well have been truncated.
+   * To check that the conversion worked properly, we'll build this
+   * string manually. */
+  strncpy(cmpbuf, new_string, small_len -1);
+  cmpbuf[small_len - 1] = '\0';
+  strncpy(&cmpbuf[small_len], new_string, small_len -1);
+  cmpbuf[2 * small_len - 1] = '\0';
+  strcpy(&cmpbuf[2 * small_len], new_string);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  /* Now convert from smaller datatype to bigger datatype.  This should
+   * leave our buffer looking like: "abc\0\0\0\0\0abc\0\0\0\0\0" */
+  ret = H5Tconvert(dst_type, src_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* First fill the buffer with NULLs */
+  HDmemset(cmpbuf, '\0', LONG_BUF_SIZE);
+  /* Copy in the characters */
+  strncpy(cmpbuf, new_string, small_len -1);
+  strncpy(&cmpbuf[big_len], new_string, small_len -1);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  ret = H5Tclose(src_type);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Tclose(dst_type);
+  CHECK(ret, FAIL, "H5Tclose");
+
+
+  /* Now test null padding.  Null-padded strings do *not* need
+   * terminating NULLs, so the sizes of the datatypes are slightly
+   * different and we want a string with an even number of characters. */
+  length = strlen(string);
+  if(length % 2 != 0)
+  {
+    strcpy(new_string, "x");
+    strcat(new_string, string);
+    length++;
+  } else {
+    strcpy(new_string, string);
+  }
+
+  /* Create a src_type that holds the UTF-8 string */
+  big_len = length; 
+  src_type = mkstr(big_len, H5T_STR_NULLPAD);
+  CHECK(src_type, FAIL, "mkstr");
+  /* Create a dst_type that holds half of the UTF-8 string */
+  small_len = length / 2;
+  dst_type = mkstr(small_len, H5T_STR_NULLPAD);
+  CHECK(dst_type, FAIL, "mkstr");
+
+  /* Fill the buffer with two copies of the UTF-8 string.
+   * It will look like "abcdefghabcdefgh". */
+  strncpy(buf, new_string, big_len);
+  strncpy(&buf[big_len], new_string, big_len);
+
+  ret = H5Tconvert(src_type, dst_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* After conversion, the buffer should look like
+   * "abcdabcdabcdefgh".  Note that this is just what the bytes look
+   * like; UTF-8 characters may well have been truncated.
+   * To check that the conversion worked properly, we'll build this
+   * string manually. */
+  strncpy(cmpbuf, new_string, small_len);
+  strncpy(&cmpbuf[small_len], new_string, small_len);
+  strncpy(&cmpbuf[2 * small_len], new_string, big_len);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  /* Now convert from smaller datatype to bigger datatype.  This should
+   * leave our buffer looking like: "abcd\0\0\0\0abcd\0\0\0\0" */
+  ret = H5Tconvert(dst_type, src_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* First fill the buffer with NULLs */
+  HDmemset(cmpbuf, '\0', LONG_BUF_SIZE);
+  /* Copy in the characters */
+  strncpy(cmpbuf, new_string, small_len);
+  strncpy(&cmpbuf[big_len], new_string, small_len);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  ret = H5Tclose(src_type);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Tclose(dst_type);
+  CHECK(ret, FAIL, "H5Tclose");
+
+
+  /* Test space padding.  This is very similar to null-padding; we can
+     use the same values of length, small_len, and big_len. */
+
+  src_type = mkstr(big_len, H5T_STR_SPACEPAD);
+  CHECK(src_type, FAIL, "mkstr");
+  dst_type = mkstr(small_len, H5T_STR_SPACEPAD);
+  CHECK(src_type, FAIL, "mkstr");
+
+  /* Fill the buffer with two copies of the UTF-8 string.
+   * It will look like "abcdefghabcdefgh". */
+  strcpy(buf, new_string);
+  strcpy(&buf[big_len], new_string);
+
+  ret = H5Tconvert(src_type, dst_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* After conversion, the buffer should look like
+   * "abcdabcdabcdefgh".  Note that this is just what the bytes look
+   * like; UTF-8 characters may have been truncated.
+   * To check that the conversion worked properly, we'll build this
+   * string manually. */
+  strncpy(cmpbuf, new_string, small_len);
+  strncpy(&cmpbuf[small_len], new_string, small_len);
+  strncpy(&cmpbuf[2 * small_len], new_string, big_len);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  /* Now convert from smaller datatype to bigger datatype.  This should
+   * leave our buffer looking like: "abcd    abcd    " */
+  ret = H5Tconvert(dst_type, src_type, 2, buf, NULL, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Tconvert");
+
+  /* First fill the buffer with spaces */
+  HDmemset(cmpbuf, ' ', LONG_BUF_SIZE);
+  /* Copy in the characters */
+  strncpy(cmpbuf, new_string, small_len);
+  strncpy(&cmpbuf[big_len], new_string, small_len);
+
+  VERIFY(HDmemcmp(buf, cmpbuf, 2*big_len), 0, "HDmemcmp");
+
+  ret = H5Tclose(src_type);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Tclose(dst_type);
+  CHECK(ret, FAIL, "H5Tclose");
+}
+
+
+/*
+ * test_vl_string
+ * Tests variable-length string datatype with UTF-8 strings.
+ */
+void test_vl_string(hid_t fid, const char *string)
+{
+  hid_t type_id, space_id, dset_id;
+  hsize_t dims=1;
+  hsize_t size;  /* Number of bytes used */
+  char *read_buf[1];
+  herr_t ret;
+
+  /* Create dataspace for datasets */
+  space_id = H5Screate_simple(RANK, &dims, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+
+  /* Create a datatype to refer to */
+  type_id = H5Tcopy(H5T_C_S1);
+  CHECK(type_id, FAIL, "H5Tcopy");
+  ret = H5Tset_size(type_id, H5T_VARIABLE);
+  CHECK(ret, FAIL, "H5Tset_size");
+
+  /* Create a dataset */
+  dset_id=H5Dcreate(fid, VL_DSET1_NAME, type_id, space_id, H5P_DEFAULT);
+  CHECK(dset_id, FAIL, "H5Dcreate");
+
+  /* Write dataset to disk */
+  ret=H5Dwrite(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string);
+  CHECK(ret, FAIL, "H5Dwrite");
+
+  /* Make certain the correct amount of memory will be used */
+  ret=H5Dvlen_get_buf_size(dset_id, type_id, space_id, &size);
+  CHECK(ret, FAIL, "H5Dvlen_get_buf_size");
+  VERIFY(size, (hsize_t)strlen(string) + 1, "H5Dvlen_get_buf_size");
+
+  /* Read dataset from disk */
+  ret=H5Dread(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf);
+  CHECK(ret, FAIL, "H5Dread");
+
+  /* Compare data read in */
+  VERIFY(strcmp(string, read_buf[0]), 0, "strcmp");
+
+  /* Reclaim the read VL data */
+  ret=H5Dvlen_reclaim(type_id, space_id, H5P_DEFAULT, read_buf);
+  CHECK(ret, FAIL, "H5Dvlen_reclaim");
+
+  /* Close all */
+  ret = H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+  ret = H5Tclose(type_id);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+}
+
+/*
+ * test_objnames
+ * Tests that UTF-8 can be used for object names in the file.
+ * Tests groups, datasets, named datatypes, and soft links.
+ */
+void test_objnames(hid_t fid, const char* string)
+{
+  hid_t grp_id, grp1_id, grp2_id, grp3_id;
+  hid_t type_id, dset_id, space_id;
+  char read_buf[MAX_STRING_LENGTH];
+  char path_buf[MAX_PATH_LENGTH];
+  hsize_t dims=1;
+  hobj_ref_t obj_ref;
+  herr_t ret;
+
+  /* Create a group with a UTF-8 name */
+  grp_id = H5Gcreate(fid, string, 0);
+  CHECK(grp_id, FAIL, "H5Gcreate");
+
+  /* Set a comment on the group to test that we can access the group
+   * Also test that UTF-8 comments can be read.
+   */
+  ret = H5Gset_comment(fid, string, string);
+  CHECK(ret, FAIL, "H5Gset_comment");
+  ret = H5Gget_comment(fid, string, MAX_STRING_LENGTH, read_buf);
+  CHECK(ret, FAIL, "H5Gget_comment");
+
+  ret = H5Gclose(grp_id);
+  CHECK(ret, FAIL, "H5Gclose");
+
+  VERIFY(strcmp(string, read_buf), 0, "strcmp");
+
+  /* Create a new dataset with a UTF-8 name */
+  grp1_id = H5Gcreate(fid, GROUP1_NAME, 0);
+  CHECK(grp1_id, FAIL, "H5Gcreate");
+
+  space_id = H5Screate_simple(RANK, &dims, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+  dset_id=H5Dcreate(grp1_id, string, H5T_NATIVE_INT, space_id, H5P_DEFAULT);
+  CHECK(dset_id, FAIL, "H5Dcreate");
+
+  /* Make sure that dataset can be opened again */
+  ret=H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+  ret=H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+
+  dset_id=H5Dopen(grp1_id, string);
+  CHECK(ret, FAIL, "H5Dopen");
+  ret=H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+  ret = H5Gclose(grp1_id);
+  CHECK(ret, FAIL, "H5Gclose");
+
+  /* Do the same for a named datatype */
+  grp2_id = H5Gcreate(fid, GROUP2_NAME, 0);
+  CHECK(grp2_id, FAIL, "H5Gcreate");
+
+  type_id = H5Tcreate(H5T_OPAQUE, 1);
+  CHECK(type_id, FAIL, "H5Tcreate");
+  ret = H5Tcommit(grp2_id, string, type_id);
+  CHECK(type_id, FAIL, "H5Tcommit");
+  ret = H5Tclose(type_id);
+  CHECK(type_id, FAIL, "H5Tclose");
+
+  type_id = H5Topen(grp2_id, string);
+  CHECK(type_id, FAIL, "H5Topen");
+  ret = H5Tclose(type_id);
+  CHECK(type_id, FAIL, "H5Tclose");
+
+  /* Don't close the group -- use it to test that object references
+   * can refer to objects named in UTF-8 */
+
+  space_id = H5Screate_simple(RANK, &dims, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+  dset_id=H5Dcreate(grp2_id, DSET3_NAME, H5T_STD_REF_OBJ, space_id, H5P_DEFAULT);
+  CHECK(ret, FAIL, "H5Dcreate");
+
+  /* Create reference to named datatype */
+  ret = H5Rcreate(&obj_ref, grp2_id, string, H5R_OBJECT, -1);
+  CHECK(ret, FAIL, "H5Rcreate");
+  /* Write selection and read it back*/
+  ret = H5Dwrite(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref);
+  CHECK(ret, FAIL, "H5Dwrite");
+  ret = H5Dread(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref);
+  CHECK(ret, FAIL, "H5Dread");
+
+  /* Ensure that we can open named datatype using object reference */
+  type_id = H5Rdereference(dset_id, H5R_OBJECT, &obj_ref);
+  CHECK(type_id, FAIL, "H5Rdereference");
+  ret = H5Tcommitted(type_id);
+  VERIFY(ret, 1, "H5Tcommitted");
+
+  ret = H5Tclose(type_id);
+  CHECK(type_id, FAIL, "H5Tclose");
+  ret = H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+  ret = H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+  ret = H5Gclose(grp2_id);
+  CHECK(ret, FAIL, "H5Gclose");
+
+  /* Create "group3".  Build a hard link from group3 to group2, which has
+   * a datatype with the UTF-8 name.  Create a soft link in group3
+   * pointing through the hard link to the datatype.  Give the soft
+   * link a name in UTF-8.  Ensure that the soft link works. */
+
+  grp3_id = H5Gcreate(fid, GROUP3_NAME, 0);
+  CHECK(grp3_id, FAIL, "H5Gcreate");
+
+  ret = H5Glink2(fid, GROUP2_NAME, H5G_LINK_HARD, grp3_id, GROUP2_NAME);
+  CHECK(ret, FAIL, "H5Glink2");
+  strcpy(path_buf, GROUP2_NAME);
+  strcat(path_buf, "/");
+  strcat(path_buf, string);
+  ret = H5Glink(grp3_id, H5G_LINK_SOFT, path_buf, string);
+  CHECK(ret, FAIL, "H5Glink");
+
+  /* Open named datatype using soft link */
+  type_id = H5Topen(grp3_id, string);
+  CHECK(type_id, FAIL, "H5Topen");
+
+  ret = H5Tclose(type_id);
+  CHECK(type_id, FAIL, "H5Tclose");
+  ret = H5Gclose(grp3_id);
+  CHECK(ret, FAIL, "H5Gclose");
+}
+
+/*
+ * test_attrname
+ * Test that attributes can deal with UTF-8 strings
+ */
+void test_attrname(hid_t fid, const char * string)
+{
+  hid_t group_id, attr_id;
+  hid_t dtype_id, space_id;
+  hsize_t dims=1;
+  char read_buf[MAX_STRING_LENGTH];
+  herr_t ret;
+
+ /* Create a new group and give it an attribute whose
+  * name and value are UTF-8 strings.
+  */
+  group_id = H5Gcreate(fid, GROUP4_NAME, 0);
+  CHECK(group_id, FAIL, "H5Gcreate");
+
+  space_id = H5Screate_simple(RANK, &dims, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+  dtype_id = H5Tcopy(H5T_C_S1);
+  CHECK(dtype_id, FAIL, "H5Tcopy");
+  ret=H5Tset_size(dtype_id, MAX_STRING_LENGTH);
+  CHECK(ret, FAIL, "H5Tset_size");
+
+  /* Create the attribute and check that its name is correct */
+  attr_id = H5Acreate(group_id, string, dtype_id, space_id, H5P_DEFAULT);
+  CHECK(attr_id, FAIL, "H5Acreate");
+  ret = H5Aget_name(attr_id, MAX_STRING_LENGTH, read_buf);
+  CHECK(ret, FAIL, "H5Aget_name");
+  ret = strcmp(read_buf, string);
+  VERIFY(ret, 0, "strcmp");
+  read_buf[0] = '\0';
+
+  /* Try writing and reading from the attribute */
+  ret = H5Awrite(attr_id, dtype_id, string);
+  CHECK(ret, FAIL, "H5Awrite");
+  ret = H5Aread(attr_id, dtype_id, read_buf);
+  CHECK(ret, FAIL, "H5Aread");
+  ret = strcmp(read_buf, string);
+  VERIFY(ret, 0, "strcmp");
+
+  /* Clean up */
+  ret = H5Aclose(attr_id);
+  CHECK(ret, FAIL, "H5Aclose");
+  ret = H5Tclose(dtype_id);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+  ret = H5Gclose(group_id);
+  CHECK(ret, FAIL, "H5Gclose");
+}
+
+/*
+ * test_attrname
+ * Test that compound datatypes can have UTF-8 field names.
+ */
+void test_compound(hid_t fid, const char * string)
+{
+  /* Define two compound structures, s1_t and s2_t.
+   * s2_t is a subset of s1_t, with two out of three
+   * fields.
+   * This is stolen from the h5_compound example.
+   */
+  typedef struct s1_t {
+      int    a;
+      double c;
+      float b;
+  } s1_t;
+  typedef struct s2_t {
+      double c;
+      int    a;
+  } s2_t;
+  /* Actual variable declarations */
+  s1_t       s1;
+  s2_t       s2;
+  hid_t      s1_tid, s2_tid;
+  hid_t      space_id, dset_id;
+  hsize_t    dim = 1;
+  char      *readbuf;
+  herr_t     ret;
+
+  /* Initialize compound data */
+  s1.a = COMP_INT_VAL;
+  s1.c = COMP_DOUBLE_VAL;
+  s1.b = COMP_FLOAT_VAL;
+
+  /* Create compound datatypes using UTF-8 field name */
+  s1_tid = H5Tcreate (H5T_COMPOUND, sizeof(s1_t));
+  CHECK(s1_tid, FAIL, "H5Tcreate");
+  ret = H5Tinsert(s1_tid, string, HOFFSET(s1_t, a), H5T_NATIVE_INT);
+  CHECK(ret, FAIL, "H5Tinsert");
+
+  /* Check that the field name was stored correctly */
+  readbuf=H5Tget_member_name(s1_tid, 0);
+  ret = strcmp(readbuf, string);
+  VERIFY(ret, 0, "strcmp");
+  free(readbuf);
+
+  /* Add the other fields to the datatype */
+  ret = H5Tinsert(s1_tid, "c_name", HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE);
+  CHECK(ret, FAIL, "H5Tinsert");
+  ret = H5Tinsert(s1_tid, "b_name", HOFFSET(s1_t, b), H5T_NATIVE_FLOAT);
+  CHECK(ret, FAIL, "H5Tinsert");
+
+  /* Create second datatype, with only two fields. */
+  s2_tid = H5Tcreate (H5T_COMPOUND, sizeof(s2_t));
+  CHECK(s2_tid, FAIL, "H5Tcreate");
+  ret = H5Tinsert(s2_tid, "c_name", HOFFSET(s2_t, c), H5T_NATIVE_DOUBLE);
+  CHECK(ret, FAIL, "H5Tinsert");
+  ret = H5Tinsert(s2_tid, string, HOFFSET(s2_t, a), H5T_NATIVE_INT);
+  CHECK(ret, FAIL, "H5Tinsert");
+
+  /* Create the dataspace and dataset. */
+  space_id = H5Screate_simple(1, &dim, NULL);
+  CHECK(space_id, FAIL, "H5Screate_simple");
+  dset_id = H5Dcreate(fid, DSET4_NAME, s1_tid, space_id, H5P_DEFAULT);
+  CHECK(dset_id, FAIL, "H5Dcreate");
+
+  /* Write data to the dataset. */
+  ret = H5Dwrite(dset_id, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s1);
+  CHECK(ret, FAIL, "H5Dwrite");
+
+  /* Ensure that data can be read back by field name into s2 struct */
+  ret = H5Dread(dset_id, s2_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s2);
+  CHECK(ret, FAIL, "H5Dread");
+
+  VERIFY(s2.a, COMP_INT_VAL, "H5Dread");
+  VERIFY(s2.c, COMP_DOUBLE_VAL, "H5Dread");
+
+  /* Clean up */
+  ret = H5Tclose(s1_tid);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Tclose(s2_tid);
+  CHECK(ret, FAIL, "H5Tclose");
+  ret = H5Sclose(space_id);
+  CHECK(ret, FAIL, "H5Sclose");
+  ret = H5Dclose(dset_id);
+  CHECK(ret, FAIL, "H5Dclose");
+}
+
+/*
+ * test_enum
+ * Test that enumerated datatypes can have UTF-8 member names.
+ */
+void test_enum(hid_t UNUSED fid, const char * string)
+{
+  /* Define an enumerated type */
+  typedef enum {
+    E1_RED,
+    E1_GREEN,
+    E1_BLUE,
+    E1_WHITE
+  } c_e1;
+  /* Variable declarations */
+  c_e1 val;
+  herr_t ret;
+  hid_t type_id;
+  char readbuf[MAX_STRING_LENGTH];
+
+  /* Create an enumerated datatype in HDF5 with a UTF-8 member name*/
+  type_id = H5Tcreate(H5T_ENUM, sizeof(c_e1));
+  CHECK(type_id, FAIL, "H5Tcreate");
+  val = E1_RED;
+  ret = H5Tenum_insert(type_id, "RED", &val);
+  CHECK(ret, FAIL, "H5Tenum_insert");
+  val = E1_GREEN;
+  ret = H5Tenum_insert(type_id, "GREEN", &val);
+  CHECK(ret, FAIL, "H5Tenum_insert");
+  val = E1_BLUE;
+  ret = H5Tenum_insert(type_id, "BLUE", &val);
+  CHECK(ret, FAIL, "H5Tenum_insert");
+  val = E1_WHITE;
+  ret = H5Tenum_insert(type_id, string, &val);
+  CHECK(ret, FAIL, "H5Tenum_insert");
+
+  /* Ensure that UTF-8 member name gives the right value and vice versa. */
+  ret = H5Tenum_valueof(type_id, string, &val);
+  CHECK(ret, FAIL, "H5Tenum_valueof");
+  VERIFY(val, E1_WHITE, "H5Tenum_valueof");
+  ret = H5Tenum_nameof(type_id, &val, readbuf, MAX_STRING_LENGTH);
+  CHECK(ret, FAIL, "H5Tenum_nameof");
+  ret = strcmp(readbuf, string);
+  VERIFY(ret, 0, "strcmp");
+
+  /* Close the datatype */
+  ret = H5Tclose(type_id);
+  CHECK(ret, FAIL, "H5Tclose");
+}
+
+/*
+ * test_opaque
+ * Test comments on opaque datatypes 
+ */
+void test_opaque(hid_t UNUSED fid, const char * string)
+{
+  hid_t type_id;
+  char * read_buf;
+  herr_t ret;
+
+  /* Create an opaque type and give it a UTF-8 tag */
+  type_id = H5Tcreate(H5T_OPAQUE, 4);
+  CHECK(type_id, FAIL, "H5Tcreate");
+  ret = H5Tset_tag(type_id, string);
+  CHECK(ret, FAIL, "H5Tset_tag");
+
+  /* Read the tag back. */
+  read_buf = H5Tget_tag(type_id);
+  ret = strcmp(read_buf, string);
+  VERIFY(ret, 0, "H5Tget_tag");
+  free(read_buf);
+
+  ret = H5Tclose(type_id);
+  CHECK(ret, FAIL, "H5Tclose");
+}
+
+/*********************/
+/* Utility functions */
+/*********************/
+
+/* mkstr
+ * Borrwed from dtypes.c.
+ * Creates a new string data type.  Used in string padding tests */
+static hid_t mkstr(size_t len, H5T_str_t strpad)
+{
+    hid_t       t;
+    if ((t=H5Tcopy(H5T_C_S1))<0) return -1;
+    if (H5Tset_size(t, len)<0) return -1;
+    if (H5Tset_strpad(t, strpad)<0) return -1;
+    return t;
+}
+
+/* write_char
+ * Append a unicode code point c to test_string in UTF-8 encoding.
+ * Return the new end of the string.
+ */
+unsigned int write_char(unsigned int c, char * test_string, unsigned int cur_pos)
+{
+  if (c < 0x80) {
+    test_string[cur_pos] = c;
+    cur_pos++;
+  }
+  else if (c < 0x800) {
+    test_string[cur_pos] = (0xC0 | c>>6);
+    test_string[cur_pos+1] = (0x80 | (c & 0x3F));
+    cur_pos += 2;
+  }
+  else if (c < 0x10000) {
+    test_string[cur_pos] = (0xE0 | c>>12);
+    test_string[cur_pos+1] = (0x80 | (c>>6 & 0x3F));
+    test_string[cur_pos+2] = (0x80 | (c & 0x3F));
+    cur_pos += 3;
+  }
+  else if (c < 0x200000) {
+    test_string[cur_pos] = (0xF0 | c>>18);
+    test_string[cur_pos+1] = (0x80 | (c>>12 & 0x3F));
+    test_string[cur_pos+2] = (0x80 | (c>>6 & 0x3F));
+    test_string[cur_pos+3] = (0x80 | (c & 0x3F));
+    cur_pos += 4;
+  }
+
+  return cur_pos;
+}
+
+/* dump_string
+ * Print a string both as text (which will look like garbage) and as hex.
+ * The text display is not guaranteed to be accurate--certain characters
+ * could confuse printf (e.g., '\n'). */
+void dump_string(const char * string)
+{
+  unsigned int length;
+  unsigned int x;
+
+  printf("The string was:\n");
+  printf(string);
+  printf("Or in hex:\n");
+ 
+  length = strlen(string);
+
+    for(x=0; x<length; x++)
+    printf("%x ", string[x] & (0x000000FF));
+
+  printf("\n");
+}
+
+/* Main test.
+ * Create a string of random Unicode characters, then run each test with
+ * that string.
+ */
+void test_unicode(void)
+{
+  char test_string[MAX_STRING_LENGTH];
+  unsigned int cur_pos=0;      /* Current position in test_string */
+  unsigned int unicode_point;  /* Unicode code point for a single character */
+  hid_t fid;                   /* ID of file */
+  int x;                       /* Temporary variable */
+
+  /* Output message about test being performed */
+  MESSAGE(5, ("Testing Attributes\n"));
+
+  /* Create a random string with length NUM_CHARS */
+  HDsrandom((unsigned long)HDtime(NULL));
+
+  for(x=0; x<NUM_CHARS; x++)
+  {
+    /* We need to avoid unprintable characters (codes 0-31) and the
+     * . and / characters, since they aren't allowed in path names.
+     */
+    unicode_point = HDrandom() % (MAX_CODE_POINT-32) + 32;
+    if(unicode_point != 56 && unicode_point != 57)
+      cur_pos = write_char(unicode_point, test_string, cur_pos);
+  }
+
+  /* Avoid unlikely case of the null string */
+  if(cur_pos == 0)
+  {
+    test_string[cur_pos] = 'Q';
+    cur_pos++;
+  }
+  test_string[cur_pos]='\0';
+
+  /* Create file */
+  fid = H5Fcreate(FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  CHECK(fid, FAIL, "H5Fcreate");
+
+  test_fl_string(fid, test_string);
+  test_strpad(fid, "abcdefgh");
+  test_strpad(fid, test_string);
+  test_vl_string(fid, test_string);
+  test_objnames(fid, test_string);
+  test_attrname(fid, test_string);
+  test_compound(fid, test_string);
+  test_enum(fid, test_string);
+  test_opaque(fid, test_string);
+
+  /* This function could be useful in debugging if certain strings
+   * create errors.
+   */
+#ifdef DEBUG
+  dump_string(test_string);
+#endif /* DEBUG */
+}
+
+/* cleanup_unicode(void)
+ * Delete the file this test created.
+ */
+void cleanup_unicode(void)
+{
+    remove(FILENAME);
+}
+
+
diff --git a/tools/h5ls/h5ls.c b/tools/h5ls/h5ls.c
index d10e0d5..8485821 100644
--- a/tools/h5ls/h5ls.c
+++ b/tools/h5ls/h5ls.c
@@ -904,7 +904,9 @@ display_string_type(hid_t type, int UNUSED ind)
     case H5T_CSET_ASCII:
  cset_s = "ASCII";
  break;
-    case H5T_CSET_RESERVED_1:
+    case H5T_CSET_UTF8:
+ cset_s = "UTF-8";
+ break;
     case H5T_CSET_RESERVED_2:
     case H5T_CSET_RESERVED_3:
     case H5T_CSET_RESERVED_4:
-- 
cgit v0.12