summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-14 12:41:37 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-14 12:41:37 (GMT)
commit93022718af12833e135ad743bc6169bcfd443ddf (patch)
treeb16d8ef4b2c6dd1fc739b04a568e411969721611 /generic
parent5406b207723fa1acec5df7441387d1c9229a88ac (diff)
parent0ef77b52637aa508cfcf98f6fb583cbeca47b5a4 (diff)
downloadtcl-93022718af12833e135ad743bc6169bcfd443ddf.zip
tcl-93022718af12833e135ad743bc6169bcfd443ddf.tar.gz
tcl-93022718af12833e135ad743bc6169bcfd443ddf.tar.bz2
TIP #548 implementation: Support `wchar_t` conversion functions and deprecate `Tcl_WinUtfToTChar()` and `Tcl_WinTCharToUtf()`
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls19
-rw-r--r--generic/tcl.h4
-rw-r--r--generic/tclDecls.h72
-rw-r--r--generic/tclIOSock.c5
-rw-r--r--generic/tclInt.h18
-rw-r--r--generic/tclMain.c6
-rw-r--r--generic/tclPlatDecls.h14
-rw-r--r--generic/tclStubInit.c37
-rw-r--r--generic/tclUtf.c155
-rw-r--r--generic/tclUtil.c2
-rw-r--r--generic/tclZipfs.c3
11 files changed, 196 insertions, 139 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index a3647d7..910c29e 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -1198,7 +1198,7 @@ declare 335 {
int Tcl_UtfToTitle(char *src)
}
declare 336 {
- int Tcl_UtfToUniChar(const char *src, Tcl_UniChar *chPtr)
+ int Tcl_UtfToChar16(const char *src, unsigned short *chPtr)
}
declare 337 {
int Tcl_UtfToUpper(char *src)
@@ -1253,11 +1253,11 @@ declare 353 {
unsigned long numChars)
}
declare 354 {
- char *Tcl_UniCharToUtfDString(const Tcl_UniChar *uniStr,
+ char *Tcl_Char16ToUtfDString(const unsigned short *uniStr,
int uniLength, Tcl_DString *dsPtr)
}
declare 355 {
- Tcl_UniChar *Tcl_UtfToUniCharDString(const char *src,
+ unsigned short *Tcl_UtfToChar16DString(const char *src,
int length, Tcl_DString *dsPtr)
}
declare 356 {
@@ -2390,6 +2390,19 @@ declare 645 {
int endValue, int *indexPtr)
}
+# TIP #548
+declare 646 {
+ int Tcl_UtfToUniChar(const char *src, int *chPtr)
+}
+declare 647 {
+ char *Tcl_UniCharToUtfDString(const int *uniStr,
+ int uniLength, Tcl_DString *dsPtr)
+}
+declare 648 {
+ int *Tcl_UtfToUniCharDString(const char *src,
+ int length, Tcl_DString *dsPtr)
+}
+
# ----- BASELINE -- FOR -- 8.7.0 ----- #
##############################################################################
diff --git a/generic/tcl.h b/generic/tcl.h
index a5d5799..ff4cd8d 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2154,7 +2154,7 @@ typedef struct Tcl_EncodingType {
#if TCL_UTF_MAX > 4
/*
- * unsigned int isn't 100% accurate as it should be a strict 4-byte value
+ * int isn't 100% accurate as it should be a strict 4-byte value
* (perhaps wchar_t). 64-bit systems may have troubles. The size of this
* value must be reflected correctly in regcustom.h and
* in tclEncoding.c.
@@ -2162,7 +2162,7 @@ typedef struct Tcl_EncodingType {
* XXX: string rep that Tcl_UniChar represents. Changing the size
* XXX: of Tcl_UniChar is /not/ supported.
*/
-typedef unsigned int Tcl_UniChar;
+typedef int Tcl_UniChar;
#else
typedef unsigned short Tcl_UniChar;
#endif
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 3f39cd5..eddd385 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1027,7 +1027,8 @@ EXTERN int Tcl_UtfToLower(char *src);
/* 335 */
EXTERN int Tcl_UtfToTitle(char *src);
/* 336 */
-EXTERN int Tcl_UtfToUniChar(const char *src, Tcl_UniChar *chPtr);
+EXTERN int Tcl_UtfToChar16(const char *src,
+ unsigned short *chPtr);
/* 337 */
EXTERN int Tcl_UtfToUpper(char *src);
/* 338 */
@@ -1068,10 +1069,10 @@ EXTERN int Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
const Tcl_UniChar *uct,
unsigned long numChars);
/* 354 */
-EXTERN char * Tcl_UniCharToUtfDString(const Tcl_UniChar *uniStr,
+EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr,
int uniLength, Tcl_DString *dsPtr);
/* 355 */
-EXTERN Tcl_UniChar * Tcl_UtfToUniCharDString(const char *src, int length,
+EXTERN unsigned short * Tcl_UtfToChar16DString(const char *src, int length,
Tcl_DString *dsPtr);
/* 356 */
EXTERN Tcl_RegExp Tcl_GetRegExpFromObj(Tcl_Interp *interp,
@@ -1904,6 +1905,14 @@ EXTERN int Tcl_LinkArray(Tcl_Interp *interp,
/* 645 */
EXTERN int Tcl_GetIntForIndex(Tcl_Interp *interp,
Tcl_Obj *objPtr, int endValue, int *indexPtr);
+/* 646 */
+EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr);
+/* 647 */
+EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr,
+ int uniLength, Tcl_DString *dsPtr);
+/* 648 */
+EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length,
+ Tcl_DString *dsPtr);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2275,7 +2284,7 @@ typedef struct TclStubs {
char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */
int (*tcl_UtfToLower) (char *src); /* 334 */
int (*tcl_UtfToTitle) (char *src); /* 335 */
- int (*tcl_UtfToUniChar) (const char *src, Tcl_UniChar *chPtr); /* 336 */
+ int (*tcl_UtfToChar16) (const char *src, unsigned short *chPtr); /* 336 */
int (*tcl_UtfToUpper) (char *src); /* 337 */
int (*tcl_WriteChars) (Tcl_Channel chan, const char *src, int srcLen); /* 338 */
int (*tcl_WriteObj) (Tcl_Channel chan, Tcl_Obj *objPtr); /* 339 */
@@ -2293,8 +2302,8 @@ typedef struct TclStubs {
int (*tcl_UniCharIsWordChar) (int ch); /* 351 */
int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
- char * (*tcl_UniCharToUtfDString) (const Tcl_UniChar *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
- Tcl_UniChar * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
+ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
+ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */
TCL_DEPRECATED_API("Use Tcl_EvalTokensStandard") Tcl_Obj * (*tcl_EvalTokens) (Tcl_Interp *interp, Tcl_Token *tokenPtr, int count); /* 357 */
void (*tcl_FreeParse) (Tcl_Parse *parsePtr); /* 358 */
@@ -2585,6 +2594,9 @@ typedef struct TclStubs {
int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */
int (*tcl_LinkArray) (Tcl_Interp *interp, const char *varName, void *addr, int type, int size); /* 644 */
int (*tcl_GetIntForIndex) (Tcl_Interp *interp, Tcl_Obj *objPtr, int endValue, int *indexPtr); /* 645 */
+ int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */
+ char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */
+ int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3287,8 +3299,8 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UtfToLower) /* 334 */
#define Tcl_UtfToTitle \
(tclStubsPtr->tcl_UtfToTitle) /* 335 */
-#define Tcl_UtfToUniChar \
- (tclStubsPtr->tcl_UtfToUniChar) /* 336 */
+#define Tcl_UtfToChar16 \
+ (tclStubsPtr->tcl_UtfToChar16) /* 336 */
#define Tcl_UtfToUpper \
(tclStubsPtr->tcl_UtfToUpper) /* 337 */
#define Tcl_WriteChars \
@@ -3323,10 +3335,10 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UniCharLen) /* 352 */
#define Tcl_UniCharNcmp \
(tclStubsPtr->tcl_UniCharNcmp) /* 353 */
-#define Tcl_UniCharToUtfDString \
- (tclStubsPtr->tcl_UniCharToUtfDString) /* 354 */
-#define Tcl_UtfToUniCharDString \
- (tclStubsPtr->tcl_UtfToUniCharDString) /* 355 */
+#define Tcl_Char16ToUtfDString \
+ (tclStubsPtr->tcl_Char16ToUtfDString) /* 354 */
+#define Tcl_UtfToChar16DString \
+ (tclStubsPtr->tcl_UtfToChar16DString) /* 355 */
#define Tcl_GetRegExpFromObj \
(tclStubsPtr->tcl_GetRegExpFromObj) /* 356 */
#define Tcl_EvalTokens \
@@ -3907,6 +3919,12 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_LinkArray) /* 644 */
#define Tcl_GetIntForIndex \
(tclStubsPtr->tcl_GetIntForIndex) /* 645 */
+#define Tcl_UtfToUniChar \
+ (tclStubsPtr->tcl_UtfToUniChar) /* 646 */
+#define Tcl_UniCharToUtfDString \
+ (tclStubsPtr->tcl_UniCharToUtfDString) /* 647 */
+#define Tcl_UtfToUniCharDString \
+ (tclStubsPtr->tcl_UtfToUniCharDString) /* 648 */
#endif /* defined(USE_TCL_STUBS) */
@@ -4092,6 +4110,36 @@ extern const TclStubs *tclStubsPtr;
#undef Tcl_StringMatch
#define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0)
+#if TCL_UTF_MAX <= 4
+# undef Tcl_UniCharToUtfDString
+# define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString
+# undef Tcl_UtfToUniCharDString
+# define Tcl_UtfToUniCharDString Tcl_UtfToChar16DString
+# undef Tcl_UtfToUniChar
+# define Tcl_UtfToUniChar Tcl_UtfToChar16
+#endif
+#if defined(USE_TCL_STUBS)
+# define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \
+ ? (char *(*)(const wchar_t *, int, Tcl_DString *))tclStubsPtr->tcl_UniCharToUtfDString \
+ : (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString)
+# define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \
+ ? (wchar_t *(*)(const char *, int, Tcl_DString *))tclStubsPtr->tcl_UtfToUniCharDString \
+ : (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString)
+# define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \
+ ? (int (*)(const char *, wchar_t *))tclStubsPtr->tcl_UtfToChar16 \
+ : (int (*)(const char *, wchar_t *))Tcl_UtfToUniChar)
+#else
+# define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \
+ ? (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_UniCharToUtfDString \
+ : (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString)
+# define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \
+ ? (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToUniCharDString \
+ : (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString)
+# define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \
+ ? (int (*)(const char *, wchar_t *))Tcl_UtfToChar16 \
+ : (int (*)(const char *, wchar_t *))Tcl_UtfToUniChar)
+#endif
+
/*
* Deprecated Tcl procedures:
*/
diff --git a/generic/tclIOSock.c b/generic/tclIOSock.c
index 12e2900..adf729a 100644
--- a/generic/tclIOSock.c
+++ b/generic/tclIOSock.c
@@ -30,11 +30,12 @@ gai_strerror(
ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
if (tsdPtr->initialized) {
- Tcl_DStringFree(&tsdPtr->errorMsg);
+ Tcl_DStringSetLength(&tsdPtr->errorMsg, 0);
} else {
+ Tcl_DStringInit(&tsdPtr->errorMsg);
tsdPtr->initialized = 1;
}
- Tcl_WinTCharToUtf(gai_strerrorW(code), -1, &tsdPtr->errorMsg);
+ Tcl_WCharToUtfDString(gai_strerrorW(code), -1, &tsdPtr->errorMsg);
return Tcl_DStringValue(&tsdPtr->errorMsg);
}
#endif
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 3db3f24..caa9f7a 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3243,17 +3243,6 @@ MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command);
MODULE_SCOPE void TclRegisterCommandTypeName(
Tcl_ObjCmdProc *implementationProc,
const char *nameStr);
-#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
-MODULE_SCOPE int TclUtfToWChar(const char *src, WCHAR *chPtr);
-MODULE_SCOPE char * TclWCharToUtfDString(const WCHAR *uniStr,
- int uniLength, Tcl_DString *dsPtr);
-MODULE_SCOPE WCHAR * TclUtfToWCharDString(const char *src,
- int length, Tcl_DString *dsPtr);
-#else
-# define TclUtfToWChar TclUtfToUniChar
-# define TclWCharToUtfDString Tcl_UniCharToUtfDString
-# define TclUtfToWCharDString Tcl_UtfToUniCharDString
-#endif
MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
@@ -4631,10 +4620,17 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file,
*----------------------------------------------------------------
*/
+#if TCL_UTF_MAX > 4
#define TclUtfToUniChar(str, chPtr) \
((((unsigned char) *(str)) < 0x80) ? \
((*(chPtr) = (unsigned char) *(str)), 1) \
: Tcl_UtfToUniChar(str, chPtr))
+#else
+#define TclUtfToUniChar(str, chPtr) \
+ ((((unsigned char) *(str)) < 0x80) ? \
+ ((*(chPtr) = (unsigned char) *(str)), 1) \
+ : Tcl_UtfToChar16(str, chPtr))
+#endif
/*
*----------------------------------------------------------------
diff --git a/generic/tclMain.c b/generic/tclMain.c
index 4b8fa8c..4a66793 100644
--- a/generic/tclMain.c
+++ b/generic/tclMain.c
@@ -70,10 +70,8 @@ NewNativeObj(
Tcl_DString ds;
#ifdef UNICODE
- if (length > 0) {
- length *= sizeof(WCHAR);
- }
- Tcl_WinTCharToUtf(string, length, &ds);
+ Tcl_DStringInit(&ds);
+ Tcl_WCharToUtfDString(string, length, &ds);
#else
Tcl_ExternalToUtfDString(NULL, (char *) string, length, &ds);
#endif
diff --git a/generic/tclPlatDecls.h b/generic/tclPlatDecls.h
index abc8ee8..354d752 100644
--- a/generic/tclPlatDecls.h
+++ b/generic/tclPlatDecls.h
@@ -117,6 +117,16 @@ extern const TclPlatStubs *tclPlatStubsPtr;
#undef TCL_STORAGE_CLASS
#define TCL_STORAGE_CLASS DLLIMPORT
-#endif /* _TCLPLATDECLS */
-
+#if defined(USE_TCL_STUBS) && (defined(_WIN32) || defined(__CYGWIN__))\
+ && (defined(TCL_NO_DEPRECATED) || TCL_MAJOR_VERSION > 8)
+#undef Tcl_WinUtfToTChar
+#undef Tcl_WinTCharToUtf
+#ifdef _WIN32
+#define Tcl_WinUtfToTChar(string, len, dsPtr) (Tcl_DStringInit(dsPtr), \
+ (TCHAR *)Tcl_UtfToChar16DString((string), (len), (dsPtr)))
+#define Tcl_WinTCharToUtf(string, len, dsPtr) (Tcl_DStringInit(dsPtr), \
+ (char *)Tcl_Char16ToUtfDString((string), ((((len) + 2) >> 1) - 1), (dsPtr)))
+#endif
+#endif
+#endif /* _TCLPLATDECLS */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 5e918f5..1c3f094 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -60,6 +60,9 @@
#undef TclBNInitBignumFromLong
#undef Tcl_BackgroundError
#define TclStaticPackage Tcl_StaticPackage
+#undef Tcl_UniCharToUtfDString
+#undef Tcl_UtfToUniCharDString
+#undef Tcl_UtfToUniChar
#undef TclBN_mp_tc_and
#undef TclBN_mp_tc_or
@@ -245,6 +248,8 @@ TclpGetPid(Tcl_Pid pid)
return (int) (size_t) pid;
}
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
+#undef Tcl_WinUtfToTChar
char *
Tcl_WinUtfToTChar(
const char *string,
@@ -252,12 +257,9 @@ Tcl_WinUtfToTChar(
Tcl_DString *dsPtr)
{
Tcl_DStringInit(dsPtr);
- if (!string) {
- return NULL;
- }
- return (char *)TclUtfToWCharDString(string, len, dsPtr);
+ return (char *)Tcl_UtfToChar16DString(string, len, dsPtr);
}
-
+#undef Tcl_WinTCharToUtf
char *
Tcl_WinTCharToUtf(
const char *string,
@@ -265,16 +267,9 @@ Tcl_WinTCharToUtf(
Tcl_DString *dsPtr)
{
Tcl_DStringInit(dsPtr);
- if (!string) {
- return NULL;
- }
- if (len < 0) {
- len = wcslen((wchar_t *)string);
- } else {
- len /= 2;
- }
- return TclWCharToUtfDString((const WCHAR *)string, len, dsPtr);
+ return Tcl_Char16ToUtfDString((const unsigned short *)string, len >> 1, dsPtr);
}
+#endif /* !defined(TCL_NO_DEPRECATED) */
#if defined(TCL_WIDE_INT_IS_LONG)
/* On Cygwin64, long is 64-bit while on Win64 long is 32-bit. Therefore
@@ -479,6 +474,11 @@ tellOld(
}
#endif /* !TCL_NO_DEPRECATED */
+#if defined(TCL_NO_DEPRECATED) || TCL_MAJOR_VERSION > 8
+#define Tcl_WinUtfToTChar 0
+#define Tcl_WinTCharToUtf 0
+#endif
+
/*
* WARNING: The contents of this file is automatically generated by the
* tools/genStubs.tcl script. Any modifications to the function declarations
@@ -1332,7 +1332,7 @@ const TclStubs tclStubs = {
Tcl_UtfToExternalDString, /* 333 */
Tcl_UtfToLower, /* 334 */
Tcl_UtfToTitle, /* 335 */
- Tcl_UtfToUniChar, /* 336 */
+ Tcl_UtfToChar16, /* 336 */
Tcl_UtfToUpper, /* 337 */
Tcl_WriteChars, /* 338 */
Tcl_WriteObj, /* 339 */
@@ -1350,8 +1350,8 @@ const TclStubs tclStubs = {
Tcl_UniCharIsWordChar, /* 351 */
Tcl_UniCharLen, /* 352 */
Tcl_UniCharNcmp, /* 353 */
- Tcl_UniCharToUtfDString, /* 354 */
- Tcl_UtfToUniCharDString, /* 355 */
+ Tcl_Char16ToUtfDString, /* 354 */
+ Tcl_UtfToChar16DString, /* 355 */
Tcl_GetRegExpFromObj, /* 356 */
Tcl_EvalTokens, /* 357 */
Tcl_FreeParse, /* 358 */
@@ -1642,6 +1642,9 @@ const TclStubs tclStubs = {
Tcl_IsShared, /* 643 */
Tcl_LinkArray, /* 644 */
Tcl_GetIntForIndex, /* 645 */
+ Tcl_UtfToUniChar, /* 646 */
+ Tcl_UniCharToUtfDString, /* 647 */
+ Tcl_UtfToUniCharDString, /* 648 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 6c39d1c..320d7aa 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -221,22 +221,33 @@ three:
*---------------------------------------------------------------------------
*/
+#undef Tcl_UniCharToUtfDString
char *
Tcl_UniCharToUtfDString(
- const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
- int uniLength, /* Length of Unicode string in Tcl_UniChars
- * (must be >= 0). */
+ const int *uniStr, /* Unicode string to convert to UTF-8. */
+ int uniLength, /* Length of Unicode string. */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
- const Tcl_UniChar *w, *wEnd;
+ const int *w, *wEnd;
char *p, *string;
- int oldLength, len = 1;
+ int oldLength;
/*
* UTF-8 string length in bytes will be <= Unicode string length * 4.
*/
+ if (uniStr == NULL) {
+ return NULL;
+ }
+ if (uniLength < 0) {
+ uniLength = 0;
+ w = uniStr;
+ while (*w != '\0') {
+ uniLength++;
+ w++;
+ }
+ }
oldLength = Tcl_DStringLength(dsPtr);
Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
string = Tcl_DStringValue(dsPtr) + oldLength;
@@ -244,45 +255,43 @@ Tcl_UniCharToUtfDString(
p = string;
wEnd = uniStr + uniLength;
for (w = uniStr; w < wEnd; ) {
- if (!len && ((*w & 0xFC00) != 0xDC00)) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
- len = Tcl_UniCharToUtf(*w, p);
- p += len;
- if ((*w >= 0xD800) && (len < 3)) {
- len = 0; /* Indication that high surrogate was found */
- }
+ p += Tcl_UniCharToUtf(*w, p);
w++;
}
- if (!len) {
- /* Special case for handling high surrogates. */
- p += Tcl_UniCharToUtf(-1, p);
- }
Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
return string;
}
-#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
char *
-TclWCharToUtfDString(
- const WCHAR *uniStr, /* WCHAR string to convert to UTF-8. */
- int uniLength, /* Length of WCHAR string in Tcl_UniChars
- * (must be >= 0). */
+Tcl_Char16ToUtfDString(
+ const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */
+ int uniLength, /* Length of Utf-16 string. */
Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
* to this previously initialized DString. */
{
- const WCHAR *w, *wEnd;
+ const unsigned short *w, *wEnd;
char *p, *string;
int oldLength, len = 1;
/*
- * UTF-8 string length in bytes will be <= Unicode string length * 4.
+ * UTF-8 string length in bytes will be <= Utf16 string length * 3.
*/
+ if (uniStr == NULL) {
+ return NULL;
+ }
+ if (uniLength < 0) {
+
+ uniLength = 0;
+ w = uniStr;
+ while (*w != '\0') {
+ uniLength++;
+ w++;
+ }
+ }
oldLength = Tcl_DStringLength(dsPtr);
- Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
+ Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 3);
string = Tcl_DStringValue(dsPtr) + oldLength;
p = string;
@@ -307,7 +316,6 @@ TclWCharToUtfDString(
return string;
}
-#endif
/*
*---------------------------------------------------------------------------
*
@@ -350,13 +358,14 @@ static const unsigned short cp1252[32] = {
0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178
};
+#undef Tcl_UtfToUniChar
int
Tcl_UtfToUniChar(
const char *src, /* The UTF-8 string. */
- Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
+ int *chPtr)/* Filled with the unsigned int represented by
* the UTF-8 string. */
{
- Tcl_UniChar byte;
+ int byte;
/*
* Unroll 1 to 4 byte UTF-8 sequences.
@@ -372,20 +381,6 @@ Tcl_UtfToUniChar(
* characters representing themselves.
*/
-#if TCL_UTF_MAX <= 4
- /* If *chPtr contains a high surrogate (produced by a previous
- * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
- * bytes, then we must produce a follow-up low surrogate. We only
- * do that if the high surrogate matches the bits we encounter.
- */
- if ((byte >= 0x80)
- && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
- && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
- && ((src[2] & 0xC0) == 0x80)) {
- *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
- return 3;
- }
-#endif
if ((unsigned)(byte-0x80) < (unsigned)0x20) {
*chPtr = cp1252[byte-0x80];
} else {
@@ -431,23 +426,11 @@ Tcl_UtfToUniChar(
/*
* Four-byte-character lead byte followed by three trail bytes.
*/
-#if TCL_UTF_MAX <= 4
- Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
- | ((src[2] & 0x3F) >> 4)) - 0x40;
- if (high >= 0x400) {
- /* out of range, < 0x10000 or > 0x10ffff */
- } else {
- /* produce high surrogate, advance source pointer */
- *chPtr = 0xD800 + high;
- return 1;
- }
-#else
*chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
return 4;
}
-#endif
}
/*
@@ -460,14 +443,13 @@ Tcl_UtfToUniChar(
return 1;
}
-#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
int
-TclUtfToWChar(
+Tcl_UtfToChar16(
const char *src, /* The UTF-8 string. */
- WCHAR *chPtr)/* Filled with the WCHAR represented by
+ unsigned short *chPtr)/* Filled with the unsigned short represented by
* the UTF-8 string. */
{
- WCHAR byte;
+ unsigned short byte;
/*
* Unroll 1 to 4 byte UTF-8 sequences.
@@ -540,7 +522,7 @@ TclUtfToWChar(
/*
* Four-byte-character lead byte followed by three trail bytes.
*/
- WCHAR high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
+ unsigned short high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
| ((src[2] & 0x3F) >> 4)) - 0x40;
if (high >= 0x400) {
/* out of range, < 0x10000 or > 0x10ffff */
@@ -560,7 +542,6 @@ TclUtfToWChar(
*chPtr = byte;
return 1;
}
-#endif
/*
*---------------------------------------------------------------------------
@@ -580,7 +561,8 @@ TclUtfToWChar(
*---------------------------------------------------------------------------
*/
-Tcl_UniChar *
+#undef Tcl_UtfToUniCharDString
+int *
Tcl_UtfToUniCharDString(
const char *src, /* UTF-8 string to convert to Unicode. */
int length, /* Length of UTF-8 string in bytes, or -1 for
@@ -589,10 +571,13 @@ Tcl_UtfToUniCharDString(
* appended to this previously initialized
* DString. */
{
- Tcl_UniChar ch = 0, *w, *wString;
+ int ch = 0, *w, *wString;
const char *p, *end;
int oldLength;
+ if (src == NULL) {
+ return NULL;
+ }
if (length < 0) {
length = strlen(src);
}
@@ -605,20 +590,20 @@ Tcl_UtfToUniCharDString(
oldLength = Tcl_DStringLength(dsPtr);
Tcl_DStringSetLength(dsPtr,
- oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar)));
- wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
+ oldLength + ((length + 1) * sizeof(int)));
+ wString = (int *) (Tcl_DStringValue(dsPtr) + oldLength);
w = wString;
p = src;
end = src + length - 4;
while (p < end) {
- p += TclUtfToUniChar(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
*w++ = ch;
}
end += 4;
while (p < end) {
if (Tcl_UtfCharComplete(p, end-p)) {
- p += TclUtfToUniChar(p, &ch);
+ p += Tcl_UtfToUniChar(p, &ch);
} else {
ch = UCHAR(*p++);
}
@@ -631,9 +616,8 @@ Tcl_UtfToUniCharDString(
return wString;
}
-#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
-WCHAR *
-TclUtfToWCharDString(
+unsigned short *
+Tcl_UtfToChar16DString(
const char *src, /* UTF-8 string to convert to Unicode. */
int length, /* Length of UTF-8 string in bytes, or -1 for
* strlen(). */
@@ -641,10 +625,14 @@ TclUtfToWCharDString(
* appended to this previously initialized
* DString. */
{
- WCHAR ch = 0, *w, *wString;
+ unsigned short ch = 0;
+ unsigned short *w, *wString;
const char *p, *end;
int oldLength;
+ if (src == NULL) {
+ return NULL;
+ }
if (length < 0) {
length = strlen(src);
}
@@ -657,20 +645,20 @@ TclUtfToWCharDString(
oldLength = Tcl_DStringLength(dsPtr);
Tcl_DStringSetLength(dsPtr,
- oldLength + (int) ((length + 1) * sizeof(WCHAR)));
- wString = (WCHAR *) (Tcl_DStringValue(dsPtr) + oldLength);
+ oldLength + ((length + 1) * sizeof(unsigned short)));
+ wString = (unsigned short *) (Tcl_DStringValue(dsPtr) + oldLength);
w = wString;
p = src;
end = src + length - 4;
while (p < end) {
- p += TclUtfToWChar(p, &ch);
+ p += Tcl_UtfToChar16(p, &ch);
*w++ = ch;
}
end += 4;
while (p < end) {
if (Tcl_UtfCharComplete(p, end-p)) {
- p += TclUtfToWChar(p, &ch);
+ p += Tcl_UtfToChar16(p, &ch);
} else {
ch = UCHAR(*p++);
}
@@ -682,7 +670,6 @@ TclUtfToWCharDString(
return wString;
}
-#endif
/*
*---------------------------------------------------------------------------
*
@@ -2151,7 +2138,7 @@ Tcl_UniCharCaseMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while (*uniStr && (p != *uniStr)
- && (p != (Tcl_UniChar)Tcl_UniCharToLower(*uniStr))) {
+ && (p != Tcl_UniCharToLower(*uniStr))) {
uniStr++;
}
} else {
@@ -2191,13 +2178,13 @@ Tcl_UniCharCaseMatch(
Tcl_UniChar startChar, endChar;
uniPattern++;
- ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniStr) : *uniStr);
+ ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr);
uniStr++;
while (1) {
if ((*uniPattern == ']') || (*uniPattern == 0)) {
return 0;
}
- startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern)
+ startChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
: *uniPattern);
uniPattern++;
if (*uniPattern == '-') {
@@ -2205,7 +2192,7 @@ Tcl_UniCharCaseMatch(
if (*uniPattern == 0) {
return 0;
}
- endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern)
+ endChar = (nocase ? Tcl_UniCharToLower(*uniPattern)
: *uniPattern);
uniPattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
@@ -2343,7 +2330,7 @@ TclUniCharMatch(
if ((p != '[') && (p != '?') && (p != '\\')) {
if (nocase) {
while ((string < stringEnd) && (p != *string)
- && (p != (Tcl_UniChar)Tcl_UniCharToLower(*string))) {
+ && (p != Tcl_UniCharToLower(*string))) {
string++;
}
} else {
@@ -2384,20 +2371,20 @@ TclUniCharMatch(
Tcl_UniChar ch1, startChar, endChar;
pattern++;
- ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*string) : *string);
+ ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
string++;
while (1) {
if ((*pattern == ']') || (pattern == patternEnd)) {
return 0;
}
- startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern) : *pattern);
+ startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
pattern++;
if (*pattern == '-') {
pattern++;
if (pattern == patternEnd) {
return 0;
}
- endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern)
+ endChar = (nocase ? Tcl_UniCharToLower(*pattern)
: *pattern);
pattern++;
if (((startChar <= ch1) && (ch1 <= endChar))
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index c5c816f..0e4bb18 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2304,7 +2304,7 @@ Tcl_StringCaseMatch(
if (nocase) {
while (*str) {
charLen = TclUtfToUniChar(str, &ch1);
- if (ch2==ch1 || ch2==(Tcl_UniChar)Tcl_UniCharToLower(ch1)) {
+ if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) {
break;
}
str += charLen;
diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c
index d842289..d59d893 100644
--- a/generic/tclZipfs.c
+++ b/generic/tclZipfs.c
@@ -4927,7 +4927,8 @@ TclZipfs_AppHook(
#ifdef _WIN32
Tcl_DString ds;
- archive = Tcl_WinTCharToUtf((*argvPtr)[1], -1, &ds);
+ Tcl_DStringInit(&ds);
+ archive = Tcl_WCharToUtfDString((*argvPtr)[1], -1, &ds);
#else /* !_WIN32 */
archive = (*argvPtr)[1];
#endif /* _WIN32 */