diff options
Diffstat (limited to 'generic/tclEncoding.c')
| -rw-r--r-- | generic/tclEncoding.c | 150 |
1 files changed, 24 insertions, 126 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7569ce4..c682128 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -220,14 +220,7 @@ static size_t unilen(const char *src); static Tcl_EncodingConvertProc Utf16ToUtfProc; static Tcl_EncodingConvertProc UtfToUtf16Proc; static Tcl_EncodingConvertProc UtfToUcs2Proc; -static int UtfToUtfProc(ClientData clientData, - const char *src, int srcLen, int flags, - Tcl_EncodingState *statePtr, char *dst, - int dstLen, int *srcReadPtr, - int *dstWrotePtr, int *dstCharsPtr, - int pureNullMode); -static Tcl_EncodingConvertProc UtfIntToUtfExtProc; -static Tcl_EncodingConvertProc UtfExtToUtfIntProc; +static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; @@ -517,6 +510,10 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ +/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ +#define TCL_ENCODING_LE 0x40 /* Little-endian encoding */ +#define TCL_ENCODING_EXTERNAL 0x80 /* Converting from internal to external variant */ + void TclInitEncodingSubsystem(void) { @@ -525,7 +522,7 @@ TclInitEncodingSubsystem(void) unsigned size; unsigned short i; union { - char c; + unsigned char c; short s; } isLe; @@ -533,7 +530,7 @@ TclInitEncodingSubsystem(void) return; } - isLe.s = 1; + isLe.s = TCL_ENCODING_LE; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); @@ -553,8 +550,8 @@ TclInitEncodingSubsystem(void) tclIdentityEncoding = Tcl_CreateEncoding(&type); type.encodingName = "utf-8"; - type.toUtfProc = UtfExtToUtfIntProc; - type.fromUtfProc = UtfIntToUtfExtProc; + type.toUtfProc = UtfToUtfProc; + type.fromUtfProc = UtfToUtfProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; @@ -565,7 +562,7 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; - type.clientData = INT2PTR(1); + type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; type.clientData = INT2PTR(0); @@ -579,7 +576,7 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; - type.clientData = INT2PTR(1); + type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; type.clientData = INT2PTR(0); @@ -1331,7 +1328,7 @@ Tcl_UtfToExternalDString( flags = TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, + srcLen, flags | TCL_ENCODING_EXTERNAL, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); @@ -1433,7 +1430,7 @@ Tcl_UtfToExternal( dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, - flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, + flags | TCL_ENCODING_EXTERNAL, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (encodingPtr->nullSize == 2) { dst[*dstWrotePtr + 1] = '\0'; @@ -2156,104 +2153,6 @@ BinaryProc( /* *------------------------------------------------------------------------- * - * UtfIntToUtfExtProc -- - * - * Convert from UTF-8 to UTF-8. While converting null-bytes from the - * Tcl's internal representation (0xC0, 0x80) to the official - * representation (0x00). See UtfToUtfProc for details. - * - * Results: - * Returns TCL_OK if conversion was successful. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------- - */ - -static int -UtfIntToUtfExtProc( - ClientData clientData, - const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes. */ - int flags, /* Conversion control flags. */ - Tcl_EncodingState *statePtr,/* Place for conversion routine to store state - * information used during a piecewise - * conversion. Contents of statePtr are - * initialized and/or reset by conversion - * routine under control of flags argument. */ - char *dst, /* Output buffer in which converted string - * is stored. */ - int dstLen, /* The maximum length of output buffer in - * bytes. */ - int *srcReadPtr, /* Filled with the number of bytes from the - * source string that were converted. This may - * be less than the original source length if - * there was a problem converting some source - * characters. */ - int *dstWrotePtr, /* Filled with the number of bytes that were - * stored in the output buffer as a result of - * the conversion. */ - int *dstCharsPtr) /* Filled with the number of characters that - * correspond to the bytes stored in the - * output buffer. */ -{ - return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, 1); -} - -/* - *------------------------------------------------------------------------- - * - * UtfExtToUtfIntProc -- - * - * Convert from UTF-8 to UTF-8 while converting null-bytes from the - * official representation (0x00) to Tcl's internal representation (0xC0, - * 0x80). See UtfToUtfProc for details. - * - * Results: - * Returns TCL_OK if conversion was successful. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------- - */ - -static int -UtfExtToUtfIntProc( - ClientData clientData, - const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes. */ - int flags, /* Conversion control flags. */ - Tcl_EncodingState *statePtr,/* Place for conversion routine to store state - * information used during a piecewise - * conversion. Contents of statePtr are - * initialized and/or reset by conversion - * routine under control of flags argument. */ - char *dst, /* Output buffer in which converted string is - * stored. */ - int dstLen, /* The maximum length of output buffer in - * bytes. */ - int *srcReadPtr, /* Filled with the number of bytes from the - * source string that were converted. This may - * be less than the original source length if - * there was a problem converting some source - * characters. */ - int *dstWrotePtr, /* Filled with the number of bytes that were - * stored in the output buffer as a result of - * the conversion. */ - int *dstCharsPtr) /* Filled with the number of characters that - * correspond to the bytes stored in the - * output buffer. */ -{ - return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, 0); -} - -/* - *------------------------------------------------------------------------- - * * UtfToUtfProc -- * * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation @@ -2288,12 +2187,9 @@ UtfToUtfProc( int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr, /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ - int pureNullMode) /* Convert embedded nulls from internal - * representation to real null-bytes or vice - * versa. Also combine or separate surrogate pairs */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; @@ -2329,15 +2225,15 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) { + if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. */ *dst++ = *src++; - } else if (pureNullMode == 1 && UCHAR(*src) == 0xC0 && - (src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) { + } else if (UCHAR(*src) == 0xC0 && (src + 1 < srcEnd) + && UCHAR(src[1]) == 0x80 && (flags & TCL_ENCODING_EXTERNAL)) { /* * Convert 0xC080 to real nulls when we are in output mode. */ @@ -2398,7 +2294,7 @@ UtfToUtfProc( static int Utf16ToUtfProc( - ClientData clientData, /* != NULL means LE, == NUL means BE */ + ClientData clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2424,6 +2320,7 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch; + flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -2435,7 +2332,7 @@ Utf16ToUtfProc( srcLen--; } /* If last code point is a high surrogate, we cannot handle that yet */ - if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { + if ((srcLen >= 2) && ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } @@ -2452,7 +2349,7 @@ Utf16ToUtfProc( break; } - if (clientData) { + if (flags & TCL_ENCODING_LE) { ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); @@ -2590,7 +2487,7 @@ UtfToUtf16Proc( static int UtfToUcs2Proc( - ClientData clientData, /* != NULL means LE, == NUL means BE */ + ClientData clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2618,6 +2515,7 @@ UtfToUcs2Proc( #endif Tcl_UniChar ch = 0; + flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2661,7 +2559,7 @@ UtfToUcs2Proc( * casting dst to a Tcl_UniChar. [Bug 1122671] */ - if (clientData) { + if (flags & TCL_ENCODING_LE) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { |
