diff options
-rw-r--r-- | generic/regcomp.c | 2 | ||||
-rw-r--r-- | generic/tclInt.h | 11 | ||||
-rw-r--r-- | generic/tclStubInit.c | 96 | ||||
-rw-r--r-- | generic/tclUtf.c | 204 | ||||
-rw-r--r-- | win/tclWin32Dll.c | 100 | ||||
-rw-r--r-- | win/tclWinFile.c | 12 | ||||
-rw-r--r-- | win/tclWinInit.c | 8 | ||||
-rw-r--r-- | win/tclWinPipe.c | 1 |
8 files changed, 232 insertions, 202 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c index 0855685..47f06c8 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -206,7 +206,7 @@ struct vars { int cflags; /* copy of compile flags */ int lasttype; /* type of previous token */ int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ + int nextvalue; /* value (if any) of next token */ int lexcon; /* lexical context type (see lex.c) */ int nsubexp; /* subexpression count */ struct subre **subs; /* subRE pointer vector */ diff --git a/generic/tclInt.h b/generic/tclInt.h index d999603..bb20792 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3170,6 +3170,17 @@ MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command); MODULE_SCOPE void TclRegisterCommandTypeName( Tcl_ObjCmdProc *implementationProc, const char *nameStr); +#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) +MODULE_SCOPE int TclUtfToWChar(const char *src, WCHAR *chPtr); +MODULE_SCOPE char * TclWCharToUtfDString(const WCHAR *uniStr, + int uniLength, Tcl_DString *dsPtr); +MODULE_SCOPE WCHAR * TclUtfToWCharDString(const char *src, + int length, Tcl_DString *dsPtr); +#else +# define TclUtfToWChar TclUtfToUniChar +# define TclWCharToUtfDString Tcl_UniCharToUtfDString +# define TclUtfToWCharDString Tcl_UtfToUniCharDString +#endif MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE size_t TclUtfCount(int ch); diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 37d0e80..e49fc02 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -118,68 +118,11 @@ Tcl_WinUtfToTChar( size_t len, Tcl_DString *dsPtr) { -#if TCL_UTF_MAX > 4 - Tcl_UniChar ch = 0; - wchar_t *w, *wString; - const char *p, *end; - int oldLength; -#endif - Tcl_DStringInit(dsPtr); if (!string) { return NULL; } -#if TCL_UTF_MAX > 4 - - if (len < 0) { - len = strlen(string); - } - - /* - * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in - * bytes. - */ - - oldLength = Tcl_DStringLength(dsPtr); - - Tcl_DStringSetLength(dsPtr, - oldLength + (int) ((len + 1) * sizeof(wchar_t))); - wString = (wchar_t *) (Tcl_DStringValue(dsPtr) + oldLength); - - w = wString; - p = string; - end = string + len - 4; - while (p < end) { - p += TclUtfToUniChar(p, &ch); - if (ch > 0xFFFF) { - *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10)); - *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF)); - } else { - *w++ = ch; - } - } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); - } else { - ch = UCHAR(*p++); - } - if (ch > 0xFFFF) { - *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10)); - *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF)); - } else { - *w++ = ch; - } - } - *w = '\0'; - Tcl_DStringSetLength(dsPtr, - oldLength + ((char *) w - (char *) wString)); - - return (char *)wString; -#else - return (char *)Tcl_UtfToUniCharDString(string, len, dsPtr); -#endif + return (char *)Tcl_UtfToWCharDString(string, len, dsPtr); } char * @@ -188,12 +131,6 @@ Tcl_WinTCharToUtf( size_t len, Tcl_DString *dsPtr) { -#if TCL_UTF_MAX > 4 - const wchar_t *w, *wEnd; - char *p, *result; - int oldLength, blen = 1; -#endif - Tcl_DStringInit(dsPtr); if (!string) { return NULL; @@ -203,36 +140,7 @@ Tcl_WinTCharToUtf( } else { len /= 2; } -#if TCL_UTF_MAX > 4 - oldLength = Tcl_DStringLength(dsPtr); - Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4); - result = Tcl_DStringValue(dsPtr) + oldLength; - - p = result; - wEnd = (wchar_t *)string + len; - for (w = (wchar_t *)string; w < wEnd; ) { - if (!blen && ((*w & 0xFC00) != 0xDC00)) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } - blen = Tcl_UniCharToUtf(*w, p); - p += blen; - if ((*w >= 0xD800) && (blen < 3)) { - /* Indication that high surrogate is handled */ - blen = 0; - } - w++; - } - if (!blen) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } - Tcl_DStringSetLength(dsPtr, oldLength + (p - result)); - - return result; -#else - return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr); -#endif + return TclWCharToUtfDString((Tcl_UniChar *)string, len, dsPtr); } #if defined(TCL_WIDE_INT_IS_LONG) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 8c0a5b7..59116c4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -267,6 +267,50 @@ Tcl_UniCharToUtfDString( return string; } +#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) +char * +TclWCharToUtfDString( + const WCHAR *uniStr, /* WCHAR string to convert to UTF-8. */ + int uniLength, /* Length of WCHAR string in Tcl_UniChars + * (must be >= 0). */ + Tcl_DString *dsPtr) /* UTF-8 representation of string is appended + * to this previously initialized DString. */ +{ + const WCHAR *w, *wEnd; + char *p, *string; + int oldLength, len = 1; + + /* + * UTF-8 string length in bytes will be <= Unicode string length * 4. + */ + + oldLength = Tcl_DStringLength(dsPtr); + Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4); + string = Tcl_DStringValue(dsPtr) + oldLength; + + p = string; + wEnd = uniStr + uniLength; + for (w = uniStr; w < wEnd; ) { + if (!len && ((*w & 0xFC00) != 0xDC00)) { + /* Special case for handling high surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } + len = Tcl_UniCharToUtf(*w, p); + p += len; + if ((*w >= 0xD800) && (len < 3)) { + len = 0; /* Indication that high surrogate was found */ + } + w++; + } + if (!len) { + /* Special case for handling high surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } + Tcl_DStringSetLength(dsPtr, oldLength + (p - string)); + + return string; +} +#endif /* *--------------------------------------------------------------------------- * @@ -418,7 +462,109 @@ Tcl_UtfToUniChar( *chPtr = byte; return 1; } - + +#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) +int +TclUtfToWChar( + const char *src, /* The UTF-8 string. */ + WCHAR *chPtr)/* Filled with the WCHAR represented by + * the UTF-8 string. */ +{ + WCHAR byte; + + /* + * Unroll 1 to 4 byte UTF-8 sequences. + */ + + byte = *((unsigned char *) src); + if (byte < 0xC0) { + /* + * Handles properly formed UTF-8 characters between 0x01 and 0x7F. + * Treats naked trail bytes 0x80 to 0x9F as valid characters from + * the cp1252 table. See: <https://en.wikipedia.org/wiki/UTF-8> + * Also treats \0 and other naked trail bytes 0xA0 to 0xBF as valid + * characters representing themselves. + */ + + /* If *chPtr contains a high surrogate (produced by a previous + * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation + * bytes, then we must produce a follow-up low surrogate. We only + * do that if the high surrogate matches the bits we encounter. + */ + if ((byte >= 0x80) + && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC)) + && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80)) + && ((src[2] & 0xC0) == 0x80)) { + *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00; + return 3; + } + if ((unsigned)(byte-0x80) < (unsigned)0x20) { + *chPtr = cp1252[byte-0x80]; + } else { + *chPtr = byte; + } + return 1; + } else if (byte < 0xE0) { + if ((src[1] & 0xC0) == 0x80) { + /* + * Two-byte-character lead-byte followed by a trail-byte. + */ + + *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F)); + if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) { + return 2; + } + } + + /* + * A two-byte-character lead-byte not followed by trail-byte + * represents itself. + */ + } else if (byte < 0xF0) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) { + /* + * Three-byte-character lead byte followed by two trail bytes. + */ + + *chPtr = (((byte & 0x0F) << 12) + | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); + if (*chPtr > 0x7FF) { + return 3; + } + } + + /* + * A three-byte-character lead-byte not followed by two trail-bytes + * represents itself. + */ + } + else if (byte < 0xF8) { + if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { + /* + * Four-byte-character lead byte followed by three trail bytes. + */ + WCHAR high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) + | ((src[2] & 0x3F) >> 4)) - 0x40; + if (high >= 0x400) { + /* out of range, < 0x10000 or > 0x10ffff */ + } else { + /* produce high surrogate, advance source pointer */ + *chPtr = 0xD800 + high; + return 1; + } + } + + /* + * A four-byte-character lead-byte not followed by three trail-bytes + * represents itself. + */ + } + + *chPtr = byte; + return 1; +} +#endif + /* *--------------------------------------------------------------------------- * @@ -489,7 +635,61 @@ Tcl_UtfToUniCharDString( return wString; } - + +#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) +WCHAR * +TclUtfToWCharDString( + const char *src, /* UTF-8 string to convert to Unicode. */ + int length, /* Length of UTF-8 string in bytes, or -1 for + * strlen(). */ + Tcl_DString *dsPtr) /* Unicode representation of string is + * appended to this previously initialized + * DString. */ +{ + WCHAR ch = 0, *w, *wString; + const char *p, *end; + int oldLength; + + if (length < 0) { + length = strlen(src); + } + + /* + * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in + * bytes. + */ + + oldLength = Tcl_DStringLength(dsPtr); + + Tcl_DStringSetLength(dsPtr, + oldLength + (int) ((length + 1) * sizeof(WCHAR))); + wString = (WCHAR *) (Tcl_DStringValue(dsPtr) + oldLength); + + w = wString; + p = src; + end = src + length - 4; + while (p < end) { + p += TclUtfToWChar(p, &ch); + *w++ = ch; + } + end += 4; + while (p < end) { + if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToWChar(p, &ch); + } else if (((UCHAR(*p)-0x80)) < 0x20) { + ch = cp1252[UCHAR(*p++)-0x80]; + } else { + ch = UCHAR(*p++); + } + *w++ = ch; + } + *w = '\0'; + Tcl_DStringSetLength(dsPtr, + oldLength + ((char *) w - (char *) wString)); + + return wString; +} +#endif /* *--------------------------------------------------------------------------- * diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c index c7e38cc..aee41c6 100644 --- a/win/tclWin32Dll.c +++ b/win/tclWin32Dll.c @@ -471,123 +471,31 @@ Tcl_WinUtfToTChar( Tcl_DString *dsPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { -#if TCL_UTF_MAX > 4 - Tcl_UniChar ch = 0; - TCHAR *w, *wString; - const char *p, *end; - int oldLength; -#endif - Tcl_DStringInit(dsPtr); if (!string) { return NULL; } -#if TCL_UTF_MAX > 4 - - if (len < 0) { - len = strlen(string); - } - - /* - * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in - * bytes. - */ - - oldLength = Tcl_DStringLength(dsPtr); - - Tcl_DStringSetLength(dsPtr, - oldLength + (int) ((len + 1) * sizeof(TCHAR))); - wString = (TCHAR *) (Tcl_DStringValue(dsPtr) + oldLength); - - w = wString; - p = string; - end = string + len - 4; - while (p < end) { - p += TclUtfToUniChar(p, &ch); - if (ch > 0xFFFF) { - *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10)); - *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF)); - } else { - *w++ = ch; - } - } - end += 4; - while (p < end) { - if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); - } else { - ch = UCHAR(*p++); - } - if (ch > 0xFFFF) { - *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10)); - *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF)); - } else { - *w++ = ch; - } - } - *w = '\0'; - Tcl_DStringSetLength(dsPtr, - oldLength + ((char *) w - (char *) wString)); - - return wString; -#else - return Tcl_UtfToUniCharDString(string, len, dsPtr); -#endif + return TclUtfToWCharDString(string, len, dsPtr); } char * Tcl_WinTCharToUtf( - const TCHAR *string, /* Source string in Unicode. */ + const WCHAR *string, /* Source string in Unicode. */ size_t len, /* Source string length in bytes, or -1 * for platform-specific string length. */ Tcl_DString *dsPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { -#if TCL_UTF_MAX > 4 - const TCHAR *w, *wEnd; - char *p, *result; - int oldLength, blen = 1; -#endif - Tcl_DStringInit(dsPtr); if (!string) { return NULL; } if (len == TCL_AUTO_LENGTH) { - len = wcslen((TCHAR *)string); + len = wcslen((WCHAR *)string); } else { len /= 2; } -#if TCL_UTF_MAX > 4 - oldLength = Tcl_DStringLength(dsPtr); - Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4); - result = Tcl_DStringValue(dsPtr) + oldLength; - - p = result; - wEnd = (TCHAR *)string + len; - for (w = (TCHAR *)string; w < wEnd; ) { - if (!blen && ((*w & 0xFC00) != 0xDC00)) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } - blen = Tcl_UniCharToUtf(*w, p); - p += blen; - if ((*w >= 0xD800) && (blen < 3)) { - /* Indication that high surrogate is handled */ - blen = 0; - } - w++; - } - if (!blen) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } - Tcl_DStringSetLength(dsPtr, oldLength + (p - result)); - - return result; -#else - return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr); -#endif + return TclWCharToUtfDString((unsigned short *)string, len, dsPtr); } /* diff --git a/win/tclWinFile.c b/win/tclWinFile.c index aba0f04..6a48c13 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -1444,13 +1444,15 @@ TclpGetUserHome( } Tcl_DStringFree(&ds); } else { - wName = Tcl_WinUtfToTChar(domain + 1, -1, &ds); + Tcl_DStringInit(&ds); + wName = TclUtfToWCharDString(domain + 1, -1, &ds); rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain); Tcl_DStringFree(&ds); nameLen = domain - name; } if (rc == 0) { - wName = Tcl_WinUtfToTChar(name, nameLen, &ds); + Tcl_DStringInit(&ds); + wName = TclUtfToWCharDString(name, nameLen, &ds); while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) { /* * user does not exists - if domain was not specified, @@ -1468,14 +1470,14 @@ TclpGetUserHome( wHomeDir = uiPtr->usri1_home_dir; if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) { size = lstrlenW(wHomeDir); - Tcl_WinTCharToUtf((TCHAR *) wHomeDir, size * sizeof(WCHAR), bufferPtr); + TclWCharToUtfDString(wHomeDir, size, bufferPtr); } else { /* * User exists but has no home dir. Return * "{GetProfilesDirectory}/<user>". */ GetProfilesDirectoryW(buf, &size); - Tcl_WinTCharToUtf(buf, (size-1) * sizeof(WCHAR), bufferPtr); + TclWCharToUtfDString(buf, size-1, bufferPtr); Tcl_DStringAppend(bufferPtr, "/", 1); Tcl_DStringAppend(bufferPtr, name, nameLen); } @@ -2895,7 +2897,7 @@ TclpNativeToNormalized( { Tcl_DString ds; Tcl_Obj *objPtr; - int len; + size_t len; char *copy, *p; Tcl_WinTCharToUtf((const TCHAR *) clientData, -1, &ds); diff --git a/win/tclWinInit.c b/win/tclWinInit.c index 80bb210..a9d6abc 100644 --- a/win/tclWinInit.c +++ b/win/tclWinInit.c @@ -188,6 +188,7 @@ TclpInitLibraryPath( Tcl_Obj *pathPtr; char installLib[LIBRARY_SIZE]; const char *bytes; + size_t length; pathPtr = Tcl_NewObj(); @@ -223,9 +224,10 @@ TclpInitLibraryPath( TclGetProcessGlobalValue(&sourceLibraryDir)); *encodingPtr = NULL; - bytes = TclGetStringFromObj(pathPtr, lengthPtr); - *valuePtr = Tcl_Alloc(*lengthPtr + 1); - memcpy(*valuePtr, bytes, *lengthPtr + 1); + bytes = TclGetStringFromObj(pathPtr, &length); + *lengthPtr = length++; + *valuePtr = Tcl_Alloc(length); + memcpy(*valuePtr, bytes, length); Tcl_DecrRefCount(pathPtr); } diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index f1542be..6f86b95 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -3120,7 +3120,6 @@ TclpOpenTemporaryFile( } namePtr += length * sizeof(TCHAR); if (basenameObj) { - size_t length; const char *string = TclGetStringFromObj(basenameObj, &length); Tcl_WinUtfToTChar(string, length, &buf); |