From 177528494062f82a82280977e5ea3129515cf955 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 13 Sep 2018 20:30:20 +0000 Subject: Fix test-case registry-6.20, which started failing starting with the TIP #389 commit. --- generic/tclStubInit.c | 20 +++++++++++++++++--- win/tclWin32Dll.c | 18 ++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index bf3a8d1..36550ea 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -225,17 +225,31 @@ Tcl_WinUtfToTChar( int len, Tcl_DString *dsPtr) { - WCHAR *wp; + WCHAR *wp, *p; int size = MultiByteToWideChar(CP_UTF8, 0, string, len, 0, 0); Tcl_DStringInit(dsPtr); Tcl_DStringSetLength(dsPtr, 2*size+2); - wp = (WCHAR *)Tcl_DStringValue(dsPtr); + p = wp = (WCHAR *)Tcl_DStringValue(dsPtr); MultiByteToWideChar(CP_UTF8, 0, string, len, wp, size+1); if (len == -1) --size; /* account for 0-byte at string end */ + + /* It turns out that MultiByteToWideChar() cannot handle the 'modified' + * UTF-8 as used by Tcl. Every sequence of 0xC0 followed by 0x80 will + * be translated to two 0xfffd characters. This results in a test-failure + * of the registry-6.20 test-case. The simplest solution is to search for + * those two 0xfffd characters and replace them by a \u0000 character. */ + while (p < wp + size - 1) { + if (p[0] == 0xfffd && p[1] == 0xfffd) { + memmove(p+1, p+2, sizeof(WCHAR) * (p - wp + size - 2)); + p[0] = 0; + ++p; --size; + } + ++p; + } Tcl_DStringSetLength(dsPtr, 2*size); wp[size] = 0; - return (char *)wp; + return wp; } char * diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c index 74787c5..26da566 100644 --- a/win/tclWin32Dll.c +++ b/win/tclWin32Dll.c @@ -471,14 +471,28 @@ Tcl_WinUtfToTChar( Tcl_DString *dsPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - TCHAR *wp; + TCHAR *wp, *p; int size = MultiByteToWideChar(CP_UTF8, 0, string, len, 0, 0); Tcl_DStringInit(dsPtr); Tcl_DStringSetLength(dsPtr, 2*size+2); - wp = (TCHAR *)Tcl_DStringValue(dsPtr); + p = wp = (TCHAR *)Tcl_DStringValue(dsPtr); MultiByteToWideChar(CP_UTF8, 0, string, len, wp, size+1); if (len == -1) --size; /* account for 0-byte at string end */ + + /* It turns out that MultiByteToWideChar() cannot handle the 'modified' + * UTF-8 as used by Tcl. Every sequence of 0xC0 followed by 0x80 will + * be translated to two 0xfffd characters. This results in a test-failure + * of the registry-6.20 test-case. The simplest solution is to search for + * those two 0xfffd characters and replace them by a \u0000 character. */ + while (p < wp + size - 1) { + if (p[0] == 0xfffd && p[1] == 0xfffd) { + memmove(p+1, p+2, sizeof(TCHAR) * (p - wp + size - 2)); + p[0] = 0; + ++p; --size; + } + ++p; + } Tcl_DStringSetLength(dsPtr, 2*size); wp[size] = 0; return wp; -- cgit v0.12