summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-20 15:41:15 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-20 15:41:15 (GMT)
commit17c94ddce302cc681bfaecc5c4720c5147222754 (patch)
treee51955e397f7c2b542efb61cd8b4972ad4e3f289
parent3441ef00fc1dc6cde12633023dcdc720a17b924d (diff)
downloadtcl-17c94ddce302cc681bfaecc5c4720c5147222754.zip
tcl-17c94ddce302cc681bfaecc5c4720c5147222754.tar.gz
tcl-17c94ddce302cc681bfaecc5c4720c5147222754.tar.bz2
Fix replace profile handling of truncated surrogates
-rw-r--r--generic/tclCmdAH.c9
-rw-r--r--generic/tclEncoding.c42
2 files changed, 41 insertions, 10 deletions
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 692c75b..4dfb541 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -695,7 +695,8 @@ EncodingConvertfromObjCmd(
}
result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length,
flags, &ds);
- if (result != TCL_INDEX_NONE) {
+ if (result != TCL_INDEX_NONE &&
+ TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) {
if (failVarObj != NULL) {
if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) {
return TCL_ERROR;
@@ -776,7 +777,8 @@ EncodingConverttoObjCmd(
stringPtr = TclGetStringFromObj(data, &length);
result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length,
flags, &ds);
- if (result != TCL_INDEX_NONE) {
+ if (result != TCL_INDEX_NONE &&
+ TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) {
if (failVarObj != NULL) {
/* I hope, wide int will cover size_t data type */
if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) {
@@ -795,8 +797,7 @@ EncodingConverttoObjCmd(
Tcl_DStringFree(&ds);
return TCL_ERROR;
}
- }
- else if (failVarObj != NULL) {
+ } else if (failVarObj != NULL) {
if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) {
return TCL_ERROR;
}
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 7e5ec22..024570a 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2594,7 +2594,7 @@ Utf32ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars, charLimit = INT_MAX;
+ int result, extra, numChars, charLimit = INT_MAX;
int ch = 0;
flags |= PTR2INT(clientData);
@@ -2606,8 +2606,9 @@ Utf32ToUtfProc(
/*
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
-
- if ((srcLen % 4) != 0) {
+ extra = srcLen % 4;
+ if (extra != 0) {
+ /* We have a truncated code unit */
result = TCL_CONVERT_MULTIBYTE;
srcLen &= -4;
}
@@ -2669,13 +2670,27 @@ Utf32ToUtfProc(
} else {
dst += Tcl_UniCharToUtf(ch, dst);
}
- src += sizeof(unsigned int);
+ src += 4;
}
if ((ch & ~0x3FF) == 0xD800) {
/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
dst += Tcl_UniCharToUtf(-1, dst);
}
+ /*
+ * If we had a truncated code unit at the end AND this is the last
+ * fragment AND profile is "replace", stick FFFD in its place.
+ */
+ if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) {
+ src += extra; /* Go past truncated code unit */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ result = TCL_OK;
+ }
+ }
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2822,7 +2837,7 @@ Utf16ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars, charLimit = INT_MAX;
+ int result, extra, numChars, charLimit = INT_MAX;
unsigned short ch = 0;
flags |= PTR2INT(clientData);
@@ -2835,7 +2850,8 @@ Utf16ToUtfProc(
* Check alignment with utf-16 (2 == sizeof(UTF-16))
*/
- if ((srcLen % 2) != 0) {
+ extra = srcLen % 2;
+ if (extra != 0) {
result = TCL_CONVERT_MULTIBYTE;
srcLen--;
}
@@ -2891,6 +2907,20 @@ Utf16ToUtfProc(
/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
dst += Tcl_UniCharToUtf(-1, dst);
}
+ /*
+ * If we had a truncated code unit at the end AND this is the last
+ * fragment AND profile is "replace", stick FFFD in its place.
+ */
+ if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) {
+ ++src;/* Go past the truncated code unit */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ result = TCL_OK;
+ }
+ }
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;