From 750b5c293076b6a446088fa3020e4e0787d489d7 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Fri, 15 Mar 2019 00:41:39 -0700 Subject: Adds _wopen support on Windows so that files with UTF-8 names can be opened. Fixes: HDFFV-2714, HDFFV-3914, HDFFV-3895, HDFFV-8237, HDFFV-10413, HDFFV-10691 --- release_docs/RELEASE.txt | 8 +++ src/H5system.c | 126 ++++++++++++++++++++++++++++++++++++++++ src/H5win32defs.h | 14 +++-- tools/test/perform/sio_engine.c | 6 ++ 4 files changed, 148 insertions(+), 6 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index aed539b..b5cc20c 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -135,6 +135,14 @@ New Features (DER - 2018/12/08, HDFFV-10252) + - Added the ability to open files with UTF-8 file names on Windows. + + The POSIX open(2) API call on Windows is limited to ASCII + file names. The library has been updated to convert incoming file + names to UTF-16 (via MultiByteToWideChar(CP_UTF8, ...) and use + _wopen() instead. + + (DER - 2019/03/15, HDFFV-2714, HDFFV-3914, HDFFV-3895, HDFFV-8237, HDFFV-10413, HDFFV-10691) Parallel Library: ----------------- diff --git a/src/H5system.c b/src/H5system.c index f47d057..2ddc29a 100644 --- a/src/H5system.c +++ b/src/H5system.c @@ -985,6 +985,132 @@ Wroundf(float arg) return (float)(arg < 0.0F ? HDceil(arg - 0.5F) : HDfloor(arg + 0.5F)); } +/*------------------------------------------------------------------------- +* Function: H5_get_utf16_str +* +* Purpose: Gets a UTF-16 string from an UTF-8 (or ASCII) string. +* +* Return: Success: A pointer to a UTF-16 string +* This must be freed by the caller using H5MM_xfree() +* Failure: NULL +* +* Programmer: Dana Robinson +* Spring 2019 +* +*------------------------------------------------------------------------- +*/ +const wchar_t * +H5_get_utf16_str(const char *s) +{ + int nwchars = -1; /* Length of the UTF-16 buffer */ + wchar_t *ret_s = NULL; /* UTF-16 version of the string */ + + /* Get the number of UTF-16 characters needed */ + if(0 == (nwchars = MultiByteToWideChar(CP_UTF8, 0, s, -1, NULL, 0))) + goto error; + + /* Allocate a buffer for the UTF-16 string */ + if(NULL == (ret_s = (wchar_t *)H5MM_calloc(sizeof(wchar_t) * (size_t)nwchars))) + goto error; + + /* Convert the input UTF-8 string to UTF-16 */ + if(0 == MultiByteToWideChar(CP_UTF8, 0, s, -1, ret_s, nwchars)) + goto error; + + return ret_s; + +error: + if(ret_s) + H5MM_xfree((void *)ret_s); + return NULL; +} /* end H5_get_utf16_str() */ + +/*------------------------------------------------------------------------- + * Function: Wopen_utf8 + * + * Purpose: UTF-8 equivalent of open(2) for use on Windows. + * Converts a UTF-8 input path to UTF-16 and then opens the + * file via _wopen() under the hood + * + * Return: Success: A POSIX file descriptor + * Failure: -1 + * + * Programmer: Dana Robinson + * Spring 2019 + * + *------------------------------------------------------------------------- + */ +int +Wopen_utf8(const char *path, int oflag, ...) +{ + int fd = -1; /* POSIX file descriptor to be returned */ + wchar_t *wpath = NULL; /* UTF-16 version of the path */ + int pmode = 0; /* mode (optionally set via variable args) */ + + /* Convert the input UTF-8 path to UTF-16 */ + if(NULL == (wpath = H5_get_utf16_str(path))) + goto done; + + /* _O_BINARY must be set in Windows to avoid CR-LF <-> LF EOL + * transformations when performing I/O. Note that this will + * produce Unix-style text files, though. + */ + oflag |= _O_BINARY; + + /* Get the mode, if O_CREAT was specified */ + if(oflag & O_CREAT) { + va_list vl; + + HDva_start(vl, oflag); + pmode = HDva_arg(vl, int); + HDva_end(vl); + } + + /* Open the file */ + fd = _wopen(wpath, oflag, pmode); + +done: + if(wpath) + H5MM_xfree((void *)wpath); + + return fd; +} /* end Wopen_utf8() */ + +/*------------------------------------------------------------------------- + * Function: Wremove_utf8 + * + * Purpose: UTF-8 equivalent of remove(3) for use on Windows. + * Converts a UTF-8 input path to UTF-16 and then opens the + * file via _wremove() under the hood + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Dana Robinson + * Spring 2019 + * + *------------------------------------------------------------------------- + */ +int +Wremove_utf8(const char *path) +{ + wchar_t *wpath = NULL; /* UTF-16 version of the path */ + int ret; + + /* Convert the input UTF-8 path to UTF-16 */ + if(NULL == (wpath = H5_get_utf16_str(path))) + goto done; + + /* Open the file */ + ret = _wremove(wpath); + +done: + if(wpath) + H5MM_xfree((void *)wpath); + + return ret; +} /* end Wremove_utf8() */ + #endif /* H5_HAVE_WIN32_API */ diff --git a/src/H5win32defs.h b/src/H5win32defs.h index 140afc3..2ae2575 100644 --- a/src/H5win32defs.h +++ b/src/H5win32defs.h @@ -34,6 +34,7 @@ typedef __int64 h5_stat_size_t; #define HDaccess(F,M) _access(F,M) #define HDchdir(S) _chdir(S) #define HDclose(F) _close(F) +#define HDcreat(S,M) Wopen_utf8(S,O_CREAT|O_TRUNC|O_RDWR,M) #define HDdup(F) _dup(F) #define HDfdopen(N,S) _fdopen(N,S) #define HDfileno(F) _fileno(F) @@ -47,15 +48,13 @@ typedef __int64 h5_stat_size_t; #define HDmkdir(S,M) _mkdir(S) #define HDnanosleep(N, O) Wnanosleep(N, O) #define HDoff_t __int64 -/* _O_BINARY must be set in Windows to avoid CR-LF <-> LF EOL - * transformations when performing I/O. Note that this will - * produce Unix-style text files, though. - * - * Also note that the variadic macro is using a VC++ extension + +/* Note that the variadic HDopen macro is using a VC++ extension * where the comma is dropped if nothing is passed to the ellipsis. */ -#define HDopen(S,F,...) _open(S, F | _O_BINARY, __VA_ARGS__) +#define HDopen(S,F,...) Wopen_utf8(S,F,__VA_ARGS__) #define HDread(F,M,Z) _read(F,M,Z) +#define HDremove(S) Wremove_utf8(S) #define HDrmdir(S) _rmdir(S) #define HDsetvbuf(F,S,M,Z) setvbuf(F,S,M,(Z>1?Z:2)) #define HDsleep(S) Sleep(S*1000) @@ -128,6 +127,9 @@ extern "C" { H5_DLL int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap); H5_DLL int Wnanosleep(const struct timespec *req, struct timespec *rem); H5_DLL herr_t H5_expand_windows_env_vars(char **env_var); + H5_DLL const wchar_t *H5_get_utf16_str(const char *s); + H5_DLL int Wopen_utf8(const char *path, int oflag, ...); + H5_DLL int Wremove_utf8(const char *path); /* Round functions only needed for VS2012 and earlier. * They are always built to ensure they don't go stale and diff --git a/tools/test/perform/sio_engine.c b/tools/test/perform/sio_engine.c index 4fead3f..11de229 100644 --- a/tools/test/perform/sio_engine.c +++ b/tools/test/perform/sio_engine.c @@ -54,6 +54,12 @@ } while(0) /* POSIX I/O macros */ +#ifdef H5_HAVE_WIN32_API +/* Can't link against the library, so this test will use the older, non-Unicode + * _open() call on Windows. + */ +#define HDopen(S,F,...) _open(S, F | _O_BINARY, __VA_ARGS__) +#endif /* H5_HAVE_WIN32_API */ #define POSIXCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600) #define POSIXOPEN(fn, F) HDopen(fn, F, 0600) #define POSIXCLOSE(F) HDclose(F) -- cgit v0.12 From 0c20c65e2f3abf390ad87c9167daca4cdff2de39 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Fri, 15 Mar 2019 08:45:38 -0700 Subject: Added the HDopen work-around on windows to pio_engine.c --- tools/test/perform/pio_engine.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/test/perform/pio_engine.c b/tools/test/perform/pio_engine.c index 1c0d621..43a0f64 100644 --- a/tools/test/perform/pio_engine.c +++ b/tools/test/perform/pio_engine.c @@ -82,6 +82,12 @@ /* POSIX I/O macros */ +#ifdef H5_HAVE_WIN32_API +/* Can't link against the library, so this test will use the older, non-Unicode + * _open() call on Windows. + */ +#define HDopen(S,F,...) _open(S, F | _O_BINARY, __VA_ARGS__) +#endif /* H5_HAVE_WIN32_API */ #define POSIXCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600) #define POSIXOPEN(fn, F) HDopen(fn, F, 0600) #define POSIXCLOSE(F) HDclose(F) -- cgit v0.12