diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-05-10 16:35:53 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-05-10 16:35:53 (GMT) |
commit | 8ffde10c063dd49dd207d2c8cf8b09e4487edf18 (patch) | |
tree | 103b667a0137ede85b2de0abd509bd7a12e87cd9 /generic/tclEncoding.c | |
parent | d50da922b1c1a3043e6ee9f24282a638ee143b48 (diff) | |
parent | b1139d3d2099aad8ad1981deaa0f689e1b4c322a (diff) | |
download | tcl-8ffde10c063dd49dd207d2c8cf8b09e4487edf18.zip tcl-8ffde10c063dd49dd207d2c8cf8b09e4487edf18.tar.gz tcl-8ffde10c063dd49dd207d2c8cf8b09e4487edf18.tar.bz2 |
Merge 8.7
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 85 |
1 files changed, 46 insertions, 39 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ea89c74..590b0de 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -234,12 +234,12 @@ static int TableToUtfProc(ClientData clientData, const char *src, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); -static int UnicodeToUtfProc(ClientData clientData, +static int UniCharToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); -static int UtfToUnicodeProc(ClientData clientData, +static int UtfToUniCharProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, @@ -596,8 +596,8 @@ TclInitEncodingSubsystem(void) Tcl_CreateEncoding(&type); type.encodingName = "unicode"; - type.toUtfProc = UnicodeToUtfProc; - type.fromUtfProc = UtfToUnicodeProc; + type.toUtfProc = UniCharToUtfProc; + type.fromUtfProc = UtfToUniCharProc; type.freeProc = NULL; type.nullSize = 2; type.clientData = NULL; @@ -2073,9 +2073,9 @@ LoadEscapeEncoding( + Tcl_DStringLength(&escapeData); dataPtr = ckalloc(size); dataPtr->initLen = strlen(init); - memcpy(dataPtr->init, init, (unsigned) dataPtr->initLen + 1); + memcpy(dataPtr->init, init, dataPtr->initLen + 1); dataPtr->finalLen = strlen(final); - memcpy(dataPtr->final, final, (unsigned) dataPtr->finalLen + 1); + memcpy(dataPtr->final, final, dataPtr->finalLen + 1); dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData), @@ -2167,7 +2167,7 @@ BinaryProc( *srcReadPtr = srcLen; *dstWrotePtr = srcLen; *dstCharsPtr = srcLen; - memcpy(dst, src, (size_t) srcLen); + memcpy(dst, src, srcLen); return result; } @@ -2384,8 +2384,8 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(*chPtr, dst); #if TCL_UTF_MAX <= 4 - if (!len) { - src += TclUtfToUniChar(src, chPtr); + if ((*chPtr >= 0xD800) && (len < 3)) { + src += TclUtfToUniChar(src + len, chPtr); dst += Tcl_UniCharToUtf(*chPtr, dst); } #endif @@ -2401,7 +2401,7 @@ UtfToUtfProc( /* *------------------------------------------------------------------------- * - * UnicodeToUtfProc -- + * UniCharToUtfProc -- * * Convert from Unicode to UTF-8. * @@ -2415,7 +2415,7 @@ UtfToUtfProc( */ static int -UnicodeToUtfProc( +UniCharToUtfProc( ClientData clientData, /* Not used. */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ @@ -2444,19 +2444,16 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + unsigned short ch; - if (flags & TCL_ENCODING_START) { - *statePtr = 0; - } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + if ((srcLen % sizeof(unsigned short)) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen /= sizeof(unsigned short); + srcLen *= sizeof(unsigned short); } srcStart = src; @@ -2473,16 +2470,16 @@ UnicodeToUtfProc( /* * Special case for 1-byte utf chars for speed. Make sure we work with - * Tcl_UniChar-size data. + * unsigned short-size data. */ - *chPtr = *(Tcl_UniChar *)src; - if (*chPtr && *chPtr < 0x80) { - *dst++ = (*chPtr & 0xFF); + ch = *(unsigned short *)src; + if (ch && ch < 0x80) { + *dst++ = (ch & 0xFF); } else { - dst += Tcl_UniCharToUtf(*chPtr, dst); + dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(Tcl_UniChar); + src += sizeof(unsigned short); } *srcReadPtr = src - srcStart; @@ -2494,7 +2491,7 @@ UnicodeToUtfProc( /* *------------------------------------------------------------------------- * - * UtfToUnicodeProc -- + * UtfToUniCharProc -- * * Convert from UTF-8 to Unicode. * @@ -2508,7 +2505,7 @@ UnicodeToUtfProc( */ static int -UtfToUnicodeProc( +UtfToUniCharProc( ClientData clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ @@ -2576,20 +2573,30 @@ UtfToUnicodeProc( #ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 4 - *dst++ = (*chPtr >> 24); - *dst++ = ((*chPtr >> 16) & 0xFF); - *dst++ = ((*chPtr >> 8) & 0xFF); - *dst++ = (*chPtr & 0xFF); + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); + } else { + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + *dst++ = (*chPtr & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + } #else *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); #endif #else #if TCL_UTF_MAX > 4 - *dst++ = (*chPtr & 0xFF); - *dst++ = ((*chPtr >> 8) & 0xFF); - *dst++ = ((*chPtr >> 16) & 0xFF); - *dst++ = (*chPtr >> 24); + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); + } else { + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (*chPtr & 0xFF); + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + } #else *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); @@ -3006,7 +3013,7 @@ Iso88591FromUtfProc( if (ch > 0xff #if TCL_UTF_MAX <= 4 - || !len + || ((ch >= 0xD800) && (len < 3)) #endif ) { if (flags & TCL_ENCODING_STOPONERROR) { @@ -3014,7 +3021,7 @@ Iso88591FromUtfProc( break; } #if TCL_UTF_MAX <= 4 - if (!len) len = 4; + if ((ch >= 0xD800) && (len < 3)) len = 4; #endif /* * Plunge on, using '?' as a fallback character. @@ -3364,7 +3371,7 @@ EscapeFromUtfProc( *dstWrotePtr = 0; return TCL_CONVERT_NOSPACE; } - memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen); + memcpy(dst, dataPtr->init, dataPtr->initLen); dst += dataPtr->initLen; } else { state = PTR2INT(*statePtr); @@ -3443,7 +3450,7 @@ EscapeFromUtfProc( break; } memcpy(dst, subTablePtr->sequence, - (size_t) subTablePtr->sequenceLen); + subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } } @@ -3486,7 +3493,7 @@ EscapeFromUtfProc( memcpy(dst, dataPtr->subTables[0].sequence, len); dst += len; } - memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen); + memcpy(dst, dataPtr->final, dataPtr->finalLen); dst += dataPtr->finalLen; state &= ~TCL_ENCODING_END; } |