summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-04-01 10:23:28 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-04-01 10:23:28 (GMT)
commit1befe1f9d741f7f036a26e0ecfef303983d58f46 (patch)
treeb382267d95572c13e9fdc7b6d0147636c8ad55e9
parent7bb6d9ce572c47747256f1e1edcb4a29cd7ac279 (diff)
parentd1bd0f28efd01c9832cbc0b0c5b87ee82badd8b7 (diff)
downloadtcl-1befe1f9d741f7f036a26e0ecfef303983d58f46.zip
tcl-1befe1f9d741f7f036a26e0ecfef303983d58f46.tar.gz
tcl-1befe1f9d741f7f036a26e0ecfef303983d58f46.tar.bz2
Merge 8.7
-rw-r--r--generic/tclEncoding.c51
1 files changed, 33 insertions, 18 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 054504f..1c03fec 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -510,9 +510,11 @@ FillEncodingFileMap(void)
*---------------------------------------------------------------------------
*/
-/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
+/* This flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */
-#define TCL_ENCODING_LE 0x80 /* Little-endian encoding, for ucs-2/utf-16 only */
+/* Since TCL_ENCODING_MODIFIED is only used for utf-8 and
+ * TCL_ENCODING_LE is only used for utf-16/ucs-2, re-use the same value */
+#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */
void
TclInitEncodingSubsystem(void)
@@ -522,7 +524,7 @@ TclInitEncodingSubsystem(void)
unsigned size;
unsigned short i;
union {
- unsigned char c;
+ char c;
short s;
} isLe;
@@ -1074,20 +1076,22 @@ Tcl_ExternalToUtfDString(
srcLen = encodingPtr->lengthProc(src);
}
- flags = TCL_ENCODING_START | TCL_ENCODING_END | TCL_ENCODING_MODIFIED;
+ flags = TCL_ENCODING_START | TCL_ENCODING_END;
+ if (encodingPtr->toUtfProc == UtfToUtfProc) {
+ flags |= TCL_ENCODING_MODIFIED;
+ }
while (1) {
result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+ src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
Tcl_DStringSetLength(dstPtr, soFar);
return Tcl_DStringValue(dstPtr);
}
-
flags &= ~TCL_ENCODING_START;
- src += srcRead;
srcLen -= srcRead;
if (Tcl_DStringLength(dstPtr) == 0) {
Tcl_DStringSetLength(dstPtr, dstLen);
@@ -1190,12 +1194,14 @@ Tcl_ExternalToUtf(
dstLen--;
}
- flags |= TCL_ENCODING_MODIFIED;
+ if (encodingPtr->toUtfProc == UtfToUtfProc) {
+ flags |= TCL_ENCODING_MODIFIED;
+ }
do {
Tcl_EncodingState savedState = *statePtr;
result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
- flags , statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
+ flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
dstCharsPtr);
if (*dstCharsPtr <= maxChars) {
break;
@@ -2107,7 +2113,7 @@ BinaryProc(
static int
UtfToUtfProc(
- TCL_UNUSED(ClientData),
+ ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2146,6 +2152,7 @@ UtfToUtfProc(
}
dstStart = dst;
+ flags |= PTR2INT(clientData);
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
@@ -2185,16 +2192,23 @@ UtfToUtfProc(
* unless the user has explicitly asked to be told.
*/
- if (flags & TCL_ENCODING_STOPONERROR) {
- result = TCL_CONVERT_MULTIBYTE;
- break;
+ if (flags & TCL_ENCODING_MODIFIED) {
+ if (flags & TCL_ENCODING_STOPONERROR) {
+ result = TCL_CONVERT_MULTIBYTE;
+ break;
+ }
+ ch = UCHAR(*src++);
+ } else {
+ char chbuf[2];
+ chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
+ TclUtfToUCS4(chbuf, &ch);
}
- ch = UCHAR(*src);
- src += 1;
dst += Tcl_UniCharToUtf(ch, dst);
} else {
+ int low;
size_t len = TclUtfToUCS4(src, &ch);
- if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) {
+ if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)
+ && (flags & TCL_ENCODING_MODIFIED)) {
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -2204,7 +2218,7 @@ UtfToUtfProc(
* A surrogate character is detected, handle especially.
*/
- int low = ch;
+ low = ch;
len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
@@ -2351,7 +2365,7 @@ Utf16ToUtfProc(
static int
UtfToUtf16Proc(
- ClientData clientData, /* != NULL means LE, == NUL means BE */
+ ClientData clientData, /* additional flags, e.g. TCL_ENCODING_LE */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2385,6 +2399,7 @@ UtfToUtf16Proc(
dstStart = dst;
dstEnd = dst + dstLen - sizeof(Tcl_UniChar);
+ flags |= PTR2INT(clientData);
result = TCL_OK;
for (numChars = 0; src < srcEnd; numChars++) {
@@ -2402,7 +2417,7 @@ UtfToUtf16Proc(
break;
}
src += TclUtfToUCS4(src, &ch);
- if (clientData) {
+ if (flags & TCL_ENCODING_LE) {
if (ch <= 0xFFFF) {
*dst++ = (ch & 0xFF);
*dst++ = (ch >> 8);