diff options
author | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-20 15:41:15 (GMT) |
---|---|---|
committer | apnadkarni <apnmbx-wits@yahoo.com> | 2023-02-20 15:41:15 (GMT) |
commit | 17c94ddce302cc681bfaecc5c4720c5147222754 (patch) | |
tree | e51955e397f7c2b542efb61cd8b4972ad4e3f289 | |
parent | 3441ef00fc1dc6cde12633023dcdc720a17b924d (diff) | |
download | tcl-17c94ddce302cc681bfaecc5c4720c5147222754.zip tcl-17c94ddce302cc681bfaecc5c4720c5147222754.tar.gz tcl-17c94ddce302cc681bfaecc5c4720c5147222754.tar.bz2 |
Fix replace profile handling of truncated surrogates
-rw-r--r-- | generic/tclCmdAH.c | 9 | ||||
-rw-r--r-- | generic/tclEncoding.c | 42 |
2 files changed, 41 insertions, 10 deletions
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 692c75b..4dfb541 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -695,7 +695,8 @@ EncodingConvertfromObjCmd( } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); - if (result != TCL_INDEX_NONE) { + if (result != TCL_INDEX_NONE && + TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; @@ -776,7 +777,8 @@ EncodingConverttoObjCmd( stringPtr = TclGetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); - if (result != TCL_INDEX_NONE) { + if (result != TCL_INDEX_NONE && + TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { /* I hope, wide int will cover size_t data type */ if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { @@ -795,8 +797,7 @@ EncodingConverttoObjCmd( Tcl_DStringFree(&ds); return TCL_ERROR; } - } - else if (failVarObj != NULL) { + } else if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7e5ec22..024570a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2594,7 +2594,7 @@ Utf32ToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars, charLimit = INT_MAX; + int result, extra, numChars, charLimit = INT_MAX; int ch = 0; flags |= PTR2INT(clientData); @@ -2606,8 +2606,9 @@ Utf32ToUtfProc( /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ - - if ((srcLen % 4) != 0) { + extra = srcLen % 4; + if (extra != 0) { + /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen &= -4; } @@ -2669,13 +2670,27 @@ Utf32ToUtfProc( } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(unsigned int); + src += 4; } if ((ch & ~0x3FF) == 0xD800) { /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is "replace", stick FFFD in its place. + */ + if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) { + src += extra; /* Go past truncated code unit */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + result = TCL_OK; + } + } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2822,7 +2837,7 @@ Utf16ToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars, charLimit = INT_MAX; + int result, extra, numChars, charLimit = INT_MAX; unsigned short ch = 0; flags |= PTR2INT(clientData); @@ -2835,7 +2850,8 @@ Utf16ToUtfProc( * Check alignment with utf-16 (2 == sizeof(UTF-16)) */ - if ((srcLen % 2) != 0) { + extra = srcLen % 2; + if (extra != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } @@ -2891,6 +2907,20 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is "replace", stick FFFD in its place. + */ + if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) { + ++src;/* Go past the truncated code unit */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + result = TCL_OK; + } + } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; |