diff options
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.decls | 19 | ||||
-rw-r--r-- | generic/tcl.h | 4 | ||||
-rw-r--r-- | generic/tclDecls.h | 72 | ||||
-rw-r--r-- | generic/tclIOSock.c | 5 | ||||
-rw-r--r-- | generic/tclInt.h | 18 | ||||
-rw-r--r-- | generic/tclMain.c | 6 | ||||
-rw-r--r-- | generic/tclPlatDecls.h | 14 | ||||
-rw-r--r-- | generic/tclStubInit.c | 37 | ||||
-rw-r--r-- | generic/tclUtf.c | 155 | ||||
-rw-r--r-- | generic/tclUtil.c | 2 | ||||
-rw-r--r-- | generic/tclZipfs.c | 3 |
11 files changed, 196 insertions, 139 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index a3647d7..910c29e 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1198,7 +1198,7 @@ declare 335 { int Tcl_UtfToTitle(char *src) } declare 336 { - int Tcl_UtfToUniChar(const char *src, Tcl_UniChar *chPtr) + int Tcl_UtfToChar16(const char *src, unsigned short *chPtr) } declare 337 { int Tcl_UtfToUpper(char *src) @@ -1253,11 +1253,11 @@ declare 353 { unsigned long numChars) } declare 354 { - char *Tcl_UniCharToUtfDString(const Tcl_UniChar *uniStr, + char *Tcl_Char16ToUtfDString(const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr) } declare 355 { - Tcl_UniChar *Tcl_UtfToUniCharDString(const char *src, + unsigned short *Tcl_UtfToChar16DString(const char *src, int length, Tcl_DString *dsPtr) } declare 356 { @@ -2390,6 +2390,19 @@ declare 645 { int endValue, int *indexPtr) } +# TIP #548 +declare 646 { + int Tcl_UtfToUniChar(const char *src, int *chPtr) +} +declare 647 { + char *Tcl_UniCharToUtfDString(const int *uniStr, + int uniLength, Tcl_DString *dsPtr) +} +declare 648 { + int *Tcl_UtfToUniCharDString(const char *src, + int length, Tcl_DString *dsPtr) +} + # ----- BASELINE -- FOR -- 8.7.0 ----- # ############################################################################## diff --git a/generic/tcl.h b/generic/tcl.h index a5d5799..ff4cd8d 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2154,7 +2154,7 @@ typedef struct Tcl_EncodingType { #if TCL_UTF_MAX > 4 /* - * unsigned int isn't 100% accurate as it should be a strict 4-byte value + * int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this * value must be reflected correctly in regcustom.h and * in tclEncoding.c. @@ -2162,7 +2162,7 @@ typedef struct Tcl_EncodingType { * XXX: string rep that Tcl_UniChar represents. Changing the size * XXX: of Tcl_UniChar is /not/ supported. */ -typedef unsigned int Tcl_UniChar; +typedef int Tcl_UniChar; #else typedef unsigned short Tcl_UniChar; #endif diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 3f39cd5..eddd385 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1027,7 +1027,8 @@ EXTERN int Tcl_UtfToLower(char *src); /* 335 */ EXTERN int Tcl_UtfToTitle(char *src); /* 336 */ -EXTERN int Tcl_UtfToUniChar(const char *src, Tcl_UniChar *chPtr); +EXTERN int Tcl_UtfToChar16(const char *src, + unsigned short *chPtr); /* 337 */ EXTERN int Tcl_UtfToUpper(char *src); /* 338 */ @@ -1068,10 +1069,10 @@ EXTERN int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 354 */ -EXTERN char * Tcl_UniCharToUtfDString(const Tcl_UniChar *uniStr, +EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 355 */ -EXTERN Tcl_UniChar * Tcl_UtfToUniCharDString(const char *src, int length, +EXTERN unsigned short * Tcl_UtfToChar16DString(const char *src, int length, Tcl_DString *dsPtr); /* 356 */ EXTERN Tcl_RegExp Tcl_GetRegExpFromObj(Tcl_Interp *interp, @@ -1904,6 +1905,14 @@ EXTERN int Tcl_LinkArray(Tcl_Interp *interp, /* 645 */ EXTERN int Tcl_GetIntForIndex(Tcl_Interp *interp, Tcl_Obj *objPtr, int endValue, int *indexPtr); +/* 646 */ +EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr); +/* 647 */ +EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr, + int uniLength, Tcl_DString *dsPtr); +/* 648 */ +EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length, + Tcl_DString *dsPtr); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2275,7 +2284,7 @@ typedef struct TclStubs { char * (*tcl_UtfToExternalDString) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr); /* 333 */ int (*tcl_UtfToLower) (char *src); /* 334 */ int (*tcl_UtfToTitle) (char *src); /* 335 */ - int (*tcl_UtfToUniChar) (const char *src, Tcl_UniChar *chPtr); /* 336 */ + int (*tcl_UtfToChar16) (const char *src, unsigned short *chPtr); /* 336 */ int (*tcl_UtfToUpper) (char *src); /* 337 */ int (*tcl_WriteChars) (Tcl_Channel chan, const char *src, int srcLen); /* 338 */ int (*tcl_WriteObj) (Tcl_Channel chan, Tcl_Obj *objPtr); /* 339 */ @@ -2293,8 +2302,8 @@ typedef struct TclStubs { int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */ int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ - char * (*tcl_UniCharToUtfDString) (const Tcl_UniChar *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ - Tcl_UniChar * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ + char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ + unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ TCL_DEPRECATED_API("Use Tcl_EvalTokensStandard") Tcl_Obj * (*tcl_EvalTokens) (Tcl_Interp *interp, Tcl_Token *tokenPtr, int count); /* 357 */ void (*tcl_FreeParse) (Tcl_Parse *parsePtr); /* 358 */ @@ -2585,6 +2594,9 @@ typedef struct TclStubs { int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */ int (*tcl_LinkArray) (Tcl_Interp *interp, const char *varName, void *addr, int type, int size); /* 644 */ int (*tcl_GetIntForIndex) (Tcl_Interp *interp, Tcl_Obj *objPtr, int endValue, int *indexPtr); /* 645 */ + int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */ + char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */ + int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -3287,8 +3299,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UtfToLower) /* 334 */ #define Tcl_UtfToTitle \ (tclStubsPtr->tcl_UtfToTitle) /* 335 */ -#define Tcl_UtfToUniChar \ - (tclStubsPtr->tcl_UtfToUniChar) /* 336 */ +#define Tcl_UtfToChar16 \ + (tclStubsPtr->tcl_UtfToChar16) /* 336 */ #define Tcl_UtfToUpper \ (tclStubsPtr->tcl_UtfToUpper) /* 337 */ #define Tcl_WriteChars \ @@ -3323,10 +3335,10 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UniCharLen) /* 352 */ #define Tcl_UniCharNcmp \ (tclStubsPtr->tcl_UniCharNcmp) /* 353 */ -#define Tcl_UniCharToUtfDString \ - (tclStubsPtr->tcl_UniCharToUtfDString) /* 354 */ -#define Tcl_UtfToUniCharDString \ - (tclStubsPtr->tcl_UtfToUniCharDString) /* 355 */ +#define Tcl_Char16ToUtfDString \ + (tclStubsPtr->tcl_Char16ToUtfDString) /* 354 */ +#define Tcl_UtfToChar16DString \ + (tclStubsPtr->tcl_UtfToChar16DString) /* 355 */ #define Tcl_GetRegExpFromObj \ (tclStubsPtr->tcl_GetRegExpFromObj) /* 356 */ #define Tcl_EvalTokens \ @@ -3907,6 +3919,12 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_LinkArray) /* 644 */ #define Tcl_GetIntForIndex \ (tclStubsPtr->tcl_GetIntForIndex) /* 645 */ +#define Tcl_UtfToUniChar \ + (tclStubsPtr->tcl_UtfToUniChar) /* 646 */ +#define Tcl_UniCharToUtfDString \ + (tclStubsPtr->tcl_UniCharToUtfDString) /* 647 */ +#define Tcl_UtfToUniCharDString \ + (tclStubsPtr->tcl_UtfToUniCharDString) /* 648 */ #endif /* defined(USE_TCL_STUBS) */ @@ -4092,6 +4110,36 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_StringMatch #define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0) +#if TCL_UTF_MAX <= 4 +# undef Tcl_UniCharToUtfDString +# define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString +# undef Tcl_UtfToUniCharDString +# define Tcl_UtfToUniCharDString Tcl_UtfToChar16DString +# undef Tcl_UtfToUniChar +# define Tcl_UtfToUniChar Tcl_UtfToChar16 +#endif +#if defined(USE_TCL_STUBS) +# define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \ + ? (char *(*)(const wchar_t *, int, Tcl_DString *))tclStubsPtr->tcl_UniCharToUtfDString \ + : (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString) +# define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \ + ? (wchar_t *(*)(const char *, int, Tcl_DString *))tclStubsPtr->tcl_UtfToUniCharDString \ + : (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString) +# define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \ + ? (int (*)(const char *, wchar_t *))tclStubsPtr->tcl_UtfToChar16 \ + : (int (*)(const char *, wchar_t *))Tcl_UtfToUniChar) +#else +# define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \ + ? (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_UniCharToUtfDString \ + : (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString) +# define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \ + ? (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToUniCharDString \ + : (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString) +# define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \ + ? (int (*)(const char *, wchar_t *))Tcl_UtfToChar16 \ + : (int (*)(const char *, wchar_t *))Tcl_UtfToUniChar) +#endif + /* * Deprecated Tcl procedures: */ diff --git a/generic/tclIOSock.c b/generic/tclIOSock.c index 12e2900..adf729a 100644 --- a/generic/tclIOSock.c +++ b/generic/tclIOSock.c @@ -30,11 +30,12 @@ gai_strerror( ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); if (tsdPtr->initialized) { - Tcl_DStringFree(&tsdPtr->errorMsg); + Tcl_DStringSetLength(&tsdPtr->errorMsg, 0); } else { + Tcl_DStringInit(&tsdPtr->errorMsg); tsdPtr->initialized = 1; } - Tcl_WinTCharToUtf(gai_strerrorW(code), -1, &tsdPtr->errorMsg); + Tcl_WCharToUtfDString(gai_strerrorW(code), -1, &tsdPtr->errorMsg); return Tcl_DStringValue(&tsdPtr->errorMsg); } #endif diff --git a/generic/tclInt.h b/generic/tclInt.h index 3db3f24..caa9f7a 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3243,17 +3243,6 @@ MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command); MODULE_SCOPE void TclRegisterCommandTypeName( Tcl_ObjCmdProc *implementationProc, const char *nameStr); -#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) -MODULE_SCOPE int TclUtfToWChar(const char *src, WCHAR *chPtr); -MODULE_SCOPE char * TclWCharToUtfDString(const WCHAR *uniStr, - int uniLength, Tcl_DString *dsPtr); -MODULE_SCOPE WCHAR * TclUtfToWCharDString(const char *src, - int length, Tcl_DString *dsPtr); -#else -# define TclUtfToWChar TclUtfToUniChar -# define TclWCharToUtfDString Tcl_UniCharToUtfDString -# define TclUtfToWCharDString Tcl_UtfToUniCharDString -#endif MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); @@ -4631,10 +4620,17 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, *---------------------------------------------------------------- */ +#if TCL_UTF_MAX > 4 #define TclUtfToUniChar(str, chPtr) \ ((((unsigned char) *(str)) < 0x80) ? \ ((*(chPtr) = (unsigned char) *(str)), 1) \ : Tcl_UtfToUniChar(str, chPtr)) +#else +#define TclUtfToUniChar(str, chPtr) \ + ((((unsigned char) *(str)) < 0x80) ? \ + ((*(chPtr) = (unsigned char) *(str)), 1) \ + : Tcl_UtfToChar16(str, chPtr)) +#endif /* *---------------------------------------------------------------- diff --git a/generic/tclMain.c b/generic/tclMain.c index 4b8fa8c..4a66793 100644 --- a/generic/tclMain.c +++ b/generic/tclMain.c @@ -70,10 +70,8 @@ NewNativeObj( Tcl_DString ds; #ifdef UNICODE - if (length > 0) { - length *= sizeof(WCHAR); - } - Tcl_WinTCharToUtf(string, length, &ds); + Tcl_DStringInit(&ds); + Tcl_WCharToUtfDString(string, length, &ds); #else Tcl_ExternalToUtfDString(NULL, (char *) string, length, &ds); #endif diff --git a/generic/tclPlatDecls.h b/generic/tclPlatDecls.h index abc8ee8..354d752 100644 --- a/generic/tclPlatDecls.h +++ b/generic/tclPlatDecls.h @@ -117,6 +117,16 @@ extern const TclPlatStubs *tclPlatStubsPtr; #undef TCL_STORAGE_CLASS #define TCL_STORAGE_CLASS DLLIMPORT -#endif /* _TCLPLATDECLS */ - +#if defined(USE_TCL_STUBS) && (defined(_WIN32) || defined(__CYGWIN__))\ + && (defined(TCL_NO_DEPRECATED) || TCL_MAJOR_VERSION > 8) +#undef Tcl_WinUtfToTChar +#undef Tcl_WinTCharToUtf +#ifdef _WIN32 +#define Tcl_WinUtfToTChar(string, len, dsPtr) (Tcl_DStringInit(dsPtr), \ + (TCHAR *)Tcl_UtfToChar16DString((string), (len), (dsPtr))) +#define Tcl_WinTCharToUtf(string, len, dsPtr) (Tcl_DStringInit(dsPtr), \ + (char *)Tcl_Char16ToUtfDString((string), ((((len) + 2) >> 1) - 1), (dsPtr))) +#endif +#endif +#endif /* _TCLPLATDECLS */ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 5e918f5..1c3f094 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -60,6 +60,9 @@ #undef TclBNInitBignumFromLong #undef Tcl_BackgroundError #define TclStaticPackage Tcl_StaticPackage +#undef Tcl_UniCharToUtfDString +#undef Tcl_UtfToUniCharDString +#undef Tcl_UtfToUniChar #undef TclBN_mp_tc_and #undef TclBN_mp_tc_or @@ -245,6 +248,8 @@ TclpGetPid(Tcl_Pid pid) return (int) (size_t) pid; } +#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 +#undef Tcl_WinUtfToTChar char * Tcl_WinUtfToTChar( const char *string, @@ -252,12 +257,9 @@ Tcl_WinUtfToTChar( Tcl_DString *dsPtr) { Tcl_DStringInit(dsPtr); - if (!string) { - return NULL; - } - return (char *)TclUtfToWCharDString(string, len, dsPtr); + return (char *)Tcl_UtfToChar16DString(string, len, dsPtr); } - +#undef Tcl_WinTCharToUtf char * Tcl_WinTCharToUtf( const char *string, @@ -265,16 +267,9 @@ Tcl_WinTCharToUtf( Tcl_DString *dsPtr) { Tcl_DStringInit(dsPtr); - if (!string) { - return NULL; - } - if (len < 0) { - len = wcslen((wchar_t *)string); - } else { - len /= 2; - } - return TclWCharToUtfDString((const WCHAR *)string, len, dsPtr); + return Tcl_Char16ToUtfDString((const unsigned short *)string, len >> 1, dsPtr); } +#endif /* !defined(TCL_NO_DEPRECATED) */ #if defined(TCL_WIDE_INT_IS_LONG) /* On Cygwin64, long is 64-bit while on Win64 long is 32-bit. Therefore @@ -479,6 +474,11 @@ tellOld( } #endif /* !TCL_NO_DEPRECATED */ +#if defined(TCL_NO_DEPRECATED) || TCL_MAJOR_VERSION > 8 +#define Tcl_WinUtfToTChar 0 +#define Tcl_WinTCharToUtf 0 +#endif + /* * WARNING: The contents of this file is automatically generated by the * tools/genStubs.tcl script. Any modifications to the function declarations @@ -1332,7 +1332,7 @@ const TclStubs tclStubs = { Tcl_UtfToExternalDString, /* 333 */ Tcl_UtfToLower, /* 334 */ Tcl_UtfToTitle, /* 335 */ - Tcl_UtfToUniChar, /* 336 */ + Tcl_UtfToChar16, /* 336 */ Tcl_UtfToUpper, /* 337 */ Tcl_WriteChars, /* 338 */ Tcl_WriteObj, /* 339 */ @@ -1350,8 +1350,8 @@ const TclStubs tclStubs = { Tcl_UniCharIsWordChar, /* 351 */ Tcl_UniCharLen, /* 352 */ Tcl_UniCharNcmp, /* 353 */ - Tcl_UniCharToUtfDString, /* 354 */ - Tcl_UtfToUniCharDString, /* 355 */ + Tcl_Char16ToUtfDString, /* 354 */ + Tcl_UtfToChar16DString, /* 355 */ Tcl_GetRegExpFromObj, /* 356 */ Tcl_EvalTokens, /* 357 */ Tcl_FreeParse, /* 358 */ @@ -1642,6 +1642,9 @@ const TclStubs tclStubs = { Tcl_IsShared, /* 643 */ Tcl_LinkArray, /* 644 */ Tcl_GetIntForIndex, /* 645 */ + Tcl_UtfToUniChar, /* 646 */ + Tcl_UniCharToUtfDString, /* 647 */ + Tcl_UtfToUniCharDString, /* 648 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6c39d1c..320d7aa 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -221,22 +221,33 @@ three: *--------------------------------------------------------------------------- */ +#undef Tcl_UniCharToUtfDString char * Tcl_UniCharToUtfDString( - const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */ - int uniLength, /* Length of Unicode string in Tcl_UniChars - * (must be >= 0). */ + const int *uniStr, /* Unicode string to convert to UTF-8. */ + int uniLength, /* Length of Unicode string. */ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended * to this previously initialized DString. */ { - const Tcl_UniChar *w, *wEnd; + const int *w, *wEnd; char *p, *string; - int oldLength, len = 1; + int oldLength; /* * UTF-8 string length in bytes will be <= Unicode string length * 4. */ + if (uniStr == NULL) { + return NULL; + } + if (uniLength < 0) { + uniLength = 0; + w = uniStr; + while (*w != '\0') { + uniLength++; + w++; + } + } oldLength = Tcl_DStringLength(dsPtr); Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4); string = Tcl_DStringValue(dsPtr) + oldLength; @@ -244,45 +255,43 @@ Tcl_UniCharToUtfDString( p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { - if (!len && ((*w & 0xFC00) != 0xDC00)) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } - len = Tcl_UniCharToUtf(*w, p); - p += len; - if ((*w >= 0xD800) && (len < 3)) { - len = 0; /* Indication that high surrogate was found */ - } + p += Tcl_UniCharToUtf(*w, p); w++; } - if (!len) { - /* Special case for handling high surrogates. */ - p += Tcl_UniCharToUtf(-1, p); - } Tcl_DStringSetLength(dsPtr, oldLength + (p - string)); return string; } -#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) char * -TclWCharToUtfDString( - const WCHAR *uniStr, /* WCHAR string to convert to UTF-8. */ - int uniLength, /* Length of WCHAR string in Tcl_UniChars - * (must be >= 0). */ +Tcl_Char16ToUtfDString( + const unsigned short *uniStr,/* Utf-16 string to convert to UTF-8. */ + int uniLength, /* Length of Utf-16 string. */ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended * to this previously initialized DString. */ { - const WCHAR *w, *wEnd; + const unsigned short *w, *wEnd; char *p, *string; int oldLength, len = 1; /* - * UTF-8 string length in bytes will be <= Unicode string length * 4. + * UTF-8 string length in bytes will be <= Utf16 string length * 3. */ + if (uniStr == NULL) { + return NULL; + } + if (uniLength < 0) { + + uniLength = 0; + w = uniStr; + while (*w != '\0') { + uniLength++; + w++; + } + } oldLength = Tcl_DStringLength(dsPtr); - Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4); + Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 3); string = Tcl_DStringValue(dsPtr) + oldLength; p = string; @@ -307,7 +316,6 @@ TclWCharToUtfDString( return string; } -#endif /* *--------------------------------------------------------------------------- * @@ -350,13 +358,14 @@ static const unsigned short cp1252[32] = { 0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178 }; +#undef Tcl_UtfToUniChar int Tcl_UtfToUniChar( const char *src, /* The UTF-8 string. */ - Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by + int *chPtr)/* Filled with the unsigned int represented by * the UTF-8 string. */ { - Tcl_UniChar byte; + int byte; /* * Unroll 1 to 4 byte UTF-8 sequences. @@ -372,20 +381,6 @@ Tcl_UtfToUniChar( * characters representing themselves. */ -#if TCL_UTF_MAX <= 4 - /* If *chPtr contains a high surrogate (produced by a previous - * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation - * bytes, then we must produce a follow-up low surrogate. We only - * do that if the high surrogate matches the bits we encounter. - */ - if ((byte >= 0x80) - && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC)) - && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80)) - && ((src[2] & 0xC0) == 0x80)) { - *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00; - return 3; - } -#endif if ((unsigned)(byte-0x80) < (unsigned)0x20) { *chPtr = cp1252[byte-0x80]; } else { @@ -431,23 +426,11 @@ Tcl_UtfToUniChar( /* * Four-byte-character lead byte followed by three trail bytes. */ -#if TCL_UTF_MAX <= 4 - Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) - | ((src[2] & 0x3F) >> 4)) - 0x40; - if (high >= 0x400) { - /* out of range, < 0x10000 or > 0x10ffff */ - } else { - /* produce high surrogate, advance source pointer */ - *chPtr = 0xD800 + high; - return 1; - } -#else *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } -#endif } /* @@ -460,14 +443,13 @@ Tcl_UtfToUniChar( return 1; } -#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) int -TclUtfToWChar( +Tcl_UtfToChar16( const char *src, /* The UTF-8 string. */ - WCHAR *chPtr)/* Filled with the WCHAR represented by + unsigned short *chPtr)/* Filled with the unsigned short represented by * the UTF-8 string. */ { - WCHAR byte; + unsigned short byte; /* * Unroll 1 to 4 byte UTF-8 sequences. @@ -540,7 +522,7 @@ TclUtfToWChar( /* * Four-byte-character lead byte followed by three trail bytes. */ - WCHAR high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) + unsigned short high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; if (high >= 0x400) { /* out of range, < 0x10000 or > 0x10ffff */ @@ -560,7 +542,6 @@ TclUtfToWChar( *chPtr = byte; return 1; } -#endif /* *--------------------------------------------------------------------------- @@ -580,7 +561,8 @@ TclUtfToWChar( *--------------------------------------------------------------------------- */ -Tcl_UniChar * +#undef Tcl_UtfToUniCharDString +int * Tcl_UtfToUniCharDString( const char *src, /* UTF-8 string to convert to Unicode. */ int length, /* Length of UTF-8 string in bytes, or -1 for @@ -589,10 +571,13 @@ Tcl_UtfToUniCharDString( * appended to this previously initialized * DString. */ { - Tcl_UniChar ch = 0, *w, *wString; + int ch = 0, *w, *wString; const char *p, *end; int oldLength; + if (src == NULL) { + return NULL; + } if (length < 0) { length = strlen(src); } @@ -605,20 +590,20 @@ Tcl_UtfToUniCharDString( oldLength = Tcl_DStringLength(dsPtr); Tcl_DStringSetLength(dsPtr, - oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar))); - wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength); + oldLength + ((length + 1) * sizeof(int))); + wString = (int *) (Tcl_DStringValue(dsPtr) + oldLength); w = wString; p = src; end = src + length - 4; while (p < end) { - p += TclUtfToUniChar(p, &ch); + p += Tcl_UtfToUniChar(p, &ch); *w++ = ch; } end += 4; while (p < end) { if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToUniChar(p, &ch); + p += Tcl_UtfToUniChar(p, &ch); } else { ch = UCHAR(*p++); } @@ -631,9 +616,8 @@ Tcl_UtfToUniCharDString( return wString; } -#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32)) -WCHAR * -TclUtfToWCharDString( +unsigned short * +Tcl_UtfToChar16DString( const char *src, /* UTF-8 string to convert to Unicode. */ int length, /* Length of UTF-8 string in bytes, or -1 for * strlen(). */ @@ -641,10 +625,14 @@ TclUtfToWCharDString( * appended to this previously initialized * DString. */ { - WCHAR ch = 0, *w, *wString; + unsigned short ch = 0; + unsigned short *w, *wString; const char *p, *end; int oldLength; + if (src == NULL) { + return NULL; + } if (length < 0) { length = strlen(src); } @@ -657,20 +645,20 @@ TclUtfToWCharDString( oldLength = Tcl_DStringLength(dsPtr); Tcl_DStringSetLength(dsPtr, - oldLength + (int) ((length + 1) * sizeof(WCHAR))); - wString = (WCHAR *) (Tcl_DStringValue(dsPtr) + oldLength); + oldLength + ((length + 1) * sizeof(unsigned short))); + wString = (unsigned short *) (Tcl_DStringValue(dsPtr) + oldLength); w = wString; p = src; end = src + length - 4; while (p < end) { - p += TclUtfToWChar(p, &ch); + p += Tcl_UtfToChar16(p, &ch); *w++ = ch; } end += 4; while (p < end) { if (Tcl_UtfCharComplete(p, end-p)) { - p += TclUtfToWChar(p, &ch); + p += Tcl_UtfToChar16(p, &ch); } else { ch = UCHAR(*p++); } @@ -682,7 +670,6 @@ TclUtfToWCharDString( return wString; } -#endif /* *--------------------------------------------------------------------------- * @@ -2151,7 +2138,7 @@ Tcl_UniCharCaseMatch( if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*uniStr && (p != *uniStr) - && (p != (Tcl_UniChar)Tcl_UniCharToLower(*uniStr))) { + && (p != Tcl_UniCharToLower(*uniStr))) { uniStr++; } } else { @@ -2191,13 +2178,13 @@ Tcl_UniCharCaseMatch( Tcl_UniChar startChar, endChar; uniPattern++; - ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniStr) : *uniStr); + ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); uniStr++; while (1) { if ((*uniPattern == ']') || (*uniPattern == 0)) { return 0; } - startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern) + startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) : *uniPattern); uniPattern++; if (*uniPattern == '-') { @@ -2205,7 +2192,7 @@ Tcl_UniCharCaseMatch( if (*uniPattern == 0) { return 0; } - endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*uniPattern) + endChar = (nocase ? Tcl_UniCharToLower(*uniPattern) : *uniPattern); uniPattern++; if (((startChar <= ch1) && (ch1 <= endChar)) @@ -2343,7 +2330,7 @@ TclUniCharMatch( if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while ((string < stringEnd) && (p != *string) - && (p != (Tcl_UniChar)Tcl_UniCharToLower(*string))) { + && (p != Tcl_UniCharToLower(*string))) { string++; } } else { @@ -2384,20 +2371,20 @@ TclUniCharMatch( Tcl_UniChar ch1, startChar, endChar; pattern++; - ch1 = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*string) : *string); + ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string); string++; while (1) { if ((*pattern == ']') || (pattern == patternEnd)) { return 0; } - startChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern) : *pattern); + startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (*pattern == '-') { pattern++; if (pattern == patternEnd) { return 0; } - endChar = (nocase ? (Tcl_UniChar)Tcl_UniCharToLower(*pattern) + endChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (((startChar <= ch1) && (ch1 <= endChar)) diff --git a/generic/tclUtil.c b/generic/tclUtil.c index c5c816f..0e4bb18 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2304,7 +2304,7 @@ Tcl_StringCaseMatch( if (nocase) { while (*str) { charLen = TclUtfToUniChar(str, &ch1); - if (ch2==ch1 || ch2==(Tcl_UniChar)Tcl_UniCharToLower(ch1)) { + if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { break; } str += charLen; diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index d842289..d59d893 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -4927,7 +4927,8 @@ TclZipfs_AppHook( #ifdef _WIN32 Tcl_DString ds; - archive = Tcl_WinTCharToUtf((*argvPtr)[1], -1, &ds); + Tcl_DStringInit(&ds); + archive = Tcl_WCharToUtfDString((*argvPtr)[1], -1, &ds); #else /* !_WIN32 */ archive = (*argvPtr)[1]; #endif /* _WIN32 */ |