summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/regcomp.c2
-rw-r--r--generic/tclInt.h11
-rw-r--r--generic/tclStubInit.c96
-rw-r--r--generic/tclUtf.c204
-rw-r--r--win/tclWin32Dll.c100
-rw-r--r--win/tclWinFile.c12
-rw-r--r--win/tclWinInit.c8
-rw-r--r--win/tclWinPipe.c1
8 files changed, 232 insertions, 202 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 0855685..47f06c8 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -206,7 +206,7 @@ struct vars {
int cflags; /* copy of compile flags */
int lasttype; /* type of previous token */
int nexttype; /* type of next token */
- chr nextvalue; /* value (if any) of next token */
+ int nextvalue; /* value (if any) of next token */
int lexcon; /* lexical context type (see lex.c) */
int nsubexp; /* subexpression count */
struct subre **subs; /* subRE pointer vector */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index d999603..bb20792 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3170,6 +3170,17 @@ MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command);
MODULE_SCOPE void TclRegisterCommandTypeName(
Tcl_ObjCmdProc *implementationProc,
const char *nameStr);
+#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
+MODULE_SCOPE int TclUtfToWChar(const char *src, WCHAR *chPtr);
+MODULE_SCOPE char * TclWCharToUtfDString(const WCHAR *uniStr,
+ int uniLength, Tcl_DString *dsPtr);
+MODULE_SCOPE WCHAR * TclUtfToWCharDString(const char *src,
+ int length, Tcl_DString *dsPtr);
+#else
+# define TclUtfToWChar TclUtfToUniChar
+# define TclWCharToUtfDString Tcl_UniCharToUtfDString
+# define TclUtfToWCharDString Tcl_UtfToUniCharDString
+#endif
MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE size_t TclUtfCount(int ch);
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 37d0e80..e49fc02 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -118,68 +118,11 @@ Tcl_WinUtfToTChar(
size_t len,
Tcl_DString *dsPtr)
{
-#if TCL_UTF_MAX > 4
- Tcl_UniChar ch = 0;
- wchar_t *w, *wString;
- const char *p, *end;
- int oldLength;
-#endif
-
Tcl_DStringInit(dsPtr);
if (!string) {
return NULL;
}
-#if TCL_UTF_MAX > 4
-
- if (len < 0) {
- len = strlen(string);
- }
-
- /*
- * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
- * bytes.
- */
-
- oldLength = Tcl_DStringLength(dsPtr);
-
- Tcl_DStringSetLength(dsPtr,
- oldLength + (int) ((len + 1) * sizeof(wchar_t)));
- wString = (wchar_t *) (Tcl_DStringValue(dsPtr) + oldLength);
-
- w = wString;
- p = string;
- end = string + len - 4;
- while (p < end) {
- p += TclUtfToUniChar(p, &ch);
- if (ch > 0xFFFF) {
- *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
- *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
- } else {
- *w++ = ch;
- }
- }
- end += 4;
- while (p < end) {
- if (Tcl_UtfCharComplete(p, end-p)) {
- p += TclUtfToUniChar(p, &ch);
- } else {
- ch = UCHAR(*p++);
- }
- if (ch > 0xFFFF) {
- *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
- *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
- } else {
- *w++ = ch;
- }
- }
- *w = '\0';
- Tcl_DStringSetLength(dsPtr,
- oldLength + ((char *) w - (char *) wString));
-
- return (char *)wString;
-#else
- return (char *)Tcl_UtfToUniCharDString(string, len, dsPtr);
-#endif
+ return (char *)Tcl_UtfToWCharDString(string, len, dsPtr);
}
char *
@@ -188,12 +131,6 @@ Tcl_WinTCharToUtf(
size_t len,
Tcl_DString *dsPtr)
{
-#if TCL_UTF_MAX > 4
- const wchar_t *w, *wEnd;
- char *p, *result;
- int oldLength, blen = 1;
-#endif
-
Tcl_DStringInit(dsPtr);
if (!string) {
return NULL;
@@ -203,36 +140,7 @@ Tcl_WinTCharToUtf(
} else {
len /= 2;
}
-#if TCL_UTF_MAX > 4
- oldLength = Tcl_DStringLength(dsPtr);
- Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
- result = Tcl_DStringValue(dsPtr) + oldLength;
-
- p = result;
- wEnd = (wchar_t *)string + len;
- for (w = (wchar_t *)string; w < wEnd; ) {
- if (!blen && ((*w & 0xFC00) != 0xDC00)) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
- blen = Tcl_UniCharToUtf(*w, p);
- p += blen;
- if ((*w >= 0xD800) && (blen < 3)) {
- /* Indication that high surrogate is handled */
- blen = 0;
- }
- w++;
- }
- if (!blen) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
- Tcl_DStringSetLength(dsPtr, oldLength + (p - result));
-
- return result;
-#else
- return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
-#endif
+ return TclWCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
}
#if defined(TCL_WIDE_INT_IS_LONG)
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 8c0a5b7..59116c4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -267,6 +267,50 @@ Tcl_UniCharToUtfDString(
return string;
}
+#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
+char *
+TclWCharToUtfDString(
+ const WCHAR *uniStr, /* WCHAR string to convert to UTF-8. */
+ int uniLength, /* Length of WCHAR string in Tcl_UniChars
+ * (must be >= 0). */
+ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
+ * to this previously initialized DString. */
+{
+ const WCHAR *w, *wEnd;
+ char *p, *string;
+ int oldLength, len = 1;
+
+ /*
+ * UTF-8 string length in bytes will be <= Unicode string length * 4.
+ */
+
+ oldLength = Tcl_DStringLength(dsPtr);
+ Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
+ string = Tcl_DStringValue(dsPtr) + oldLength;
+
+ p = string;
+ wEnd = uniStr + uniLength;
+ for (w = uniStr; w < wEnd; ) {
+ if (!len && ((*w & 0xFC00) != 0xDC00)) {
+ /* Special case for handling high surrogates. */
+ p += Tcl_UniCharToUtf(-1, p);
+ }
+ len = Tcl_UniCharToUtf(*w, p);
+ p += len;
+ if ((*w >= 0xD800) && (len < 3)) {
+ len = 0; /* Indication that high surrogate was found */
+ }
+ w++;
+ }
+ if (!len) {
+ /* Special case for handling high surrogates. */
+ p += Tcl_UniCharToUtf(-1, p);
+ }
+ Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
+
+ return string;
+}
+#endif
/*
*---------------------------------------------------------------------------
*
@@ -418,7 +462,109 @@ Tcl_UtfToUniChar(
*chPtr = byte;
return 1;
}
-
+
+#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
+int
+TclUtfToWChar(
+ const char *src, /* The UTF-8 string. */
+ WCHAR *chPtr)/* Filled with the WCHAR represented by
+ * the UTF-8 string. */
+{
+ WCHAR byte;
+
+ /*
+ * Unroll 1 to 4 byte UTF-8 sequences.
+ */
+
+ byte = *((unsigned char *) src);
+ if (byte < 0xC0) {
+ /*
+ * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
+ * Treats naked trail bytes 0x80 to 0x9F as valid characters from
+ * the cp1252 table. See: <https://en.wikipedia.org/wiki/UTF-8>
+ * Also treats \0 and other naked trail bytes 0xA0 to 0xBF as valid
+ * characters representing themselves.
+ */
+
+ /* If *chPtr contains a high surrogate (produced by a previous
+ * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
+ * bytes, then we must produce a follow-up low surrogate. We only
+ * do that if the high surrogate matches the bits we encounter.
+ */
+ if ((byte >= 0x80)
+ && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
+ && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
+ && ((src[2] & 0xC0) == 0x80)) {
+ *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
+ return 3;
+ }
+ if ((unsigned)(byte-0x80) < (unsigned)0x20) {
+ *chPtr = cp1252[byte-0x80];
+ } else {
+ *chPtr = byte;
+ }
+ return 1;
+ } else if (byte < 0xE0) {
+ if ((src[1] & 0xC0) == 0x80) {
+ /*
+ * Two-byte-character lead-byte followed by a trail-byte.
+ */
+
+ *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
+ if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
+ return 2;
+ }
+ }
+
+ /*
+ * A two-byte-character lead-byte not followed by trail-byte
+ * represents itself.
+ */
+ } else if (byte < 0xF0) {
+ if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
+ /*
+ * Three-byte-character lead byte followed by two trail bytes.
+ */
+
+ *chPtr = (((byte & 0x0F) << 12)
+ | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
+ if (*chPtr > 0x7FF) {
+ return 3;
+ }
+ }
+
+ /*
+ * A three-byte-character lead-byte not followed by two trail-bytes
+ * represents itself.
+ */
+ }
+ else if (byte < 0xF8) {
+ if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
+ /*
+ * Four-byte-character lead byte followed by three trail bytes.
+ */
+ WCHAR high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
+ | ((src[2] & 0x3F) >> 4)) - 0x40;
+ if (high >= 0x400) {
+ /* out of range, < 0x10000 or > 0x10ffff */
+ } else {
+ /* produce high surrogate, advance source pointer */
+ *chPtr = 0xD800 + high;
+ return 1;
+ }
+ }
+
+ /*
+ * A four-byte-character lead-byte not followed by three trail-bytes
+ * represents itself.
+ */
+ }
+
+ *chPtr = byte;
+ return 1;
+}
+#endif
+
/*
*---------------------------------------------------------------------------
*
@@ -489,7 +635,61 @@ Tcl_UtfToUniCharDString(
return wString;
}
-
+
+#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
+WCHAR *
+TclUtfToWCharDString(
+ const char *src, /* UTF-8 string to convert to Unicode. */
+ int length, /* Length of UTF-8 string in bytes, or -1 for
+ * strlen(). */
+ Tcl_DString *dsPtr) /* Unicode representation of string is
+ * appended to this previously initialized
+ * DString. */
+{
+ WCHAR ch = 0, *w, *wString;
+ const char *p, *end;
+ int oldLength;
+
+ if (length < 0) {
+ length = strlen(src);
+ }
+
+ /*
+ * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
+ * bytes.
+ */
+
+ oldLength = Tcl_DStringLength(dsPtr);
+
+ Tcl_DStringSetLength(dsPtr,
+ oldLength + (int) ((length + 1) * sizeof(WCHAR)));
+ wString = (WCHAR *) (Tcl_DStringValue(dsPtr) + oldLength);
+
+ w = wString;
+ p = src;
+ end = src + length - 4;
+ while (p < end) {
+ p += TclUtfToWChar(p, &ch);
+ *w++ = ch;
+ }
+ end += 4;
+ while (p < end) {
+ if (Tcl_UtfCharComplete(p, end-p)) {
+ p += TclUtfToWChar(p, &ch);
+ } else if (((UCHAR(*p)-0x80)) < 0x20) {
+ ch = cp1252[UCHAR(*p++)-0x80];
+ } else {
+ ch = UCHAR(*p++);
+ }
+ *w++ = ch;
+ }
+ *w = '\0';
+ Tcl_DStringSetLength(dsPtr,
+ oldLength + ((char *) w - (char *) wString));
+
+ return wString;
+}
+#endif
/*
*---------------------------------------------------------------------------
*
diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c
index c7e38cc..aee41c6 100644
--- a/win/tclWin32Dll.c
+++ b/win/tclWin32Dll.c
@@ -471,123 +471,31 @@ Tcl_WinUtfToTChar(
Tcl_DString *dsPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
-#if TCL_UTF_MAX > 4
- Tcl_UniChar ch = 0;
- TCHAR *w, *wString;
- const char *p, *end;
- int oldLength;
-#endif
-
Tcl_DStringInit(dsPtr);
if (!string) {
return NULL;
}
-#if TCL_UTF_MAX > 4
-
- if (len < 0) {
- len = strlen(string);
- }
-
- /*
- * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
- * bytes.
- */
-
- oldLength = Tcl_DStringLength(dsPtr);
-
- Tcl_DStringSetLength(dsPtr,
- oldLength + (int) ((len + 1) * sizeof(TCHAR)));
- wString = (TCHAR *) (Tcl_DStringValue(dsPtr) + oldLength);
-
- w = wString;
- p = string;
- end = string + len - 4;
- while (p < end) {
- p += TclUtfToUniChar(p, &ch);
- if (ch > 0xFFFF) {
- *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
- *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
- } else {
- *w++ = ch;
- }
- }
- end += 4;
- while (p < end) {
- if (Tcl_UtfCharComplete(p, end-p)) {
- p += TclUtfToUniChar(p, &ch);
- } else {
- ch = UCHAR(*p++);
- }
- if (ch > 0xFFFF) {
- *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
- *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
- } else {
- *w++ = ch;
- }
- }
- *w = '\0';
- Tcl_DStringSetLength(dsPtr,
- oldLength + ((char *) w - (char *) wString));
-
- return wString;
-#else
- return Tcl_UtfToUniCharDString(string, len, dsPtr);
-#endif
+ return TclUtfToWCharDString(string, len, dsPtr);
}
char *
Tcl_WinTCharToUtf(
- const TCHAR *string, /* Source string in Unicode. */
+ const WCHAR *string, /* Source string in Unicode. */
size_t len, /* Source string length in bytes, or -1
* for platform-specific string length. */
Tcl_DString *dsPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
-#if TCL_UTF_MAX > 4
- const TCHAR *w, *wEnd;
- char *p, *result;
- int oldLength, blen = 1;
-#endif
-
Tcl_DStringInit(dsPtr);
if (!string) {
return NULL;
}
if (len == TCL_AUTO_LENGTH) {
- len = wcslen((TCHAR *)string);
+ len = wcslen((WCHAR *)string);
} else {
len /= 2;
}
-#if TCL_UTF_MAX > 4
- oldLength = Tcl_DStringLength(dsPtr);
- Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
- result = Tcl_DStringValue(dsPtr) + oldLength;
-
- p = result;
- wEnd = (TCHAR *)string + len;
- for (w = (TCHAR *)string; w < wEnd; ) {
- if (!blen && ((*w & 0xFC00) != 0xDC00)) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
- blen = Tcl_UniCharToUtf(*w, p);
- p += blen;
- if ((*w >= 0xD800) && (blen < 3)) {
- /* Indication that high surrogate is handled */
- blen = 0;
- }
- w++;
- }
- if (!blen) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
- Tcl_DStringSetLength(dsPtr, oldLength + (p - result));
-
- return result;
-#else
- return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
-#endif
+ return TclWCharToUtfDString((unsigned short *)string, len, dsPtr);
}
/*
diff --git a/win/tclWinFile.c b/win/tclWinFile.c
index aba0f04..6a48c13 100644
--- a/win/tclWinFile.c
+++ b/win/tclWinFile.c
@@ -1444,13 +1444,15 @@ TclpGetUserHome(
}
Tcl_DStringFree(&ds);
} else {
- wName = Tcl_WinUtfToTChar(domain + 1, -1, &ds);
+ Tcl_DStringInit(&ds);
+ wName = TclUtfToWCharDString(domain + 1, -1, &ds);
rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain);
Tcl_DStringFree(&ds);
nameLen = domain - name;
}
if (rc == 0) {
- wName = Tcl_WinUtfToTChar(name, nameLen, &ds);
+ Tcl_DStringInit(&ds);
+ wName = TclUtfToWCharDString(name, nameLen, &ds);
while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) {
/*
* user does not exists - if domain was not specified,
@@ -1468,14 +1470,14 @@ TclpGetUserHome(
wHomeDir = uiPtr->usri1_home_dir;
if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) {
size = lstrlenW(wHomeDir);
- Tcl_WinTCharToUtf((TCHAR *) wHomeDir, size * sizeof(WCHAR), bufferPtr);
+ TclWCharToUtfDString(wHomeDir, size, bufferPtr);
} else {
/*
* User exists but has no home dir. Return
* "{GetProfilesDirectory}/<user>".
*/
GetProfilesDirectoryW(buf, &size);
- Tcl_WinTCharToUtf(buf, (size-1) * sizeof(WCHAR), bufferPtr);
+ TclWCharToUtfDString(buf, size-1, bufferPtr);
Tcl_DStringAppend(bufferPtr, "/", 1);
Tcl_DStringAppend(bufferPtr, name, nameLen);
}
@@ -2895,7 +2897,7 @@ TclpNativeToNormalized(
{
Tcl_DString ds;
Tcl_Obj *objPtr;
- int len;
+ size_t len;
char *copy, *p;
Tcl_WinTCharToUtf((const TCHAR *) clientData, -1, &ds);
diff --git a/win/tclWinInit.c b/win/tclWinInit.c
index 80bb210..a9d6abc 100644
--- a/win/tclWinInit.c
+++ b/win/tclWinInit.c
@@ -188,6 +188,7 @@ TclpInitLibraryPath(
Tcl_Obj *pathPtr;
char installLib[LIBRARY_SIZE];
const char *bytes;
+ size_t length;
pathPtr = Tcl_NewObj();
@@ -223,9 +224,10 @@ TclpInitLibraryPath(
TclGetProcessGlobalValue(&sourceLibraryDir));
*encodingPtr = NULL;
- bytes = TclGetStringFromObj(pathPtr, lengthPtr);
- *valuePtr = Tcl_Alloc(*lengthPtr + 1);
- memcpy(*valuePtr, bytes, *lengthPtr + 1);
+ bytes = TclGetStringFromObj(pathPtr, &length);
+ *lengthPtr = length++;
+ *valuePtr = Tcl_Alloc(length);
+ memcpy(*valuePtr, bytes, length);
Tcl_DecrRefCount(pathPtr);
}
diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c
index f1542be..6f86b95 100644
--- a/win/tclWinPipe.c
+++ b/win/tclWinPipe.c
@@ -3120,7 +3120,6 @@ TclpOpenTemporaryFile(
}
namePtr += length * sizeof(TCHAR);
if (basenameObj) {
- size_t length;
const char *string = TclGetStringFromObj(basenameObj, &length);
Tcl_WinUtfToTChar(string, length, &buf);