summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 20:27:34 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-02-22 20:27:34 (GMT)
commit7c6f71019a9ff3158e1f4f54f0166aaf9eacf7e5 (patch)
tree983cccadd07ba4bd2cd7c452ab8be1967181982a
parente10652549884dc52a643e650c96ae56f7e6b19ee (diff)
parent8f37d5c24ef176d5df911feafb5f8159098a5cc2 (diff)
downloadtcl-7c6f71019a9ff3158e1f4f54f0166aaf9eacf7e5.zip
tcl-7c6f71019a9ff3158e1f4f54f0166aaf9eacf7e5.tar.gz
tcl-7c6f71019a9ff3158e1f4f54f0166aaf9eacf7e5.tar.bz2
Merge 8.7
-rw-r--r--generic/tclEncoding.c50
-rw-r--r--tests/encoding.test15
2 files changed, 51 insertions, 14 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index e79cefd..8d4cd3e 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2368,10 +2368,10 @@ UtfToUtfProc(
if (flags & ENCODING_INPUT) {
if ((len < 2) && (ch != 0)
&& (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
- result = TCL_CONVERT_SYNTAX;
- break;
+ goto utf8Syntax;
} else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)
&& (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
+ utf8Syntax:
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -2483,7 +2483,7 @@ Utf32ToUtfProc(
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
int result, numChars, charLimit = INT_MAX;
- int ch;
+ int ch, bytesLeft = srcLen % 4;
flags |= PTR2INT(clientData);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
@@ -2495,9 +2495,9 @@ Utf32ToUtfProc(
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
- if ((srcLen % 4) != 0) {
+ if (bytesLeft != 0) {
result = TCL_CONVERT_MULTIBYTE;
- srcLen &= -4;
+ srcLen -= bytesLeft;
}
srcStart = src;
@@ -2544,6 +2544,22 @@ Utf32ToUtfProc(
src += sizeof(unsigned int);
}
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+ /* We have a single byte left-over at the end */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ /* destination is not full, so we really are at the end now */
+ if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(0xFFFD, dst);
+ numChars++;
+ src += bytesLeft;
+ }
+ }
+ }
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2756,6 +2772,22 @@ Utf16ToUtfProc(
/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
dst += Tcl_UniCharToUtf(-1, dst);
}
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
+ /* We have a single byte left-over at the end */
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ } else {
+ /* destination is not full, so we really are at the end now */
+ if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(0xFFFD, dst);
+ numChars++;
+ src++;
+ }
+ }
+ }
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2940,8 +2972,8 @@ UtfToUcs2Proc(
len = TclUtfToUniChar(src, &ch);
if ((ch >= 0xD800) && (len < 3)) {
if (STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- break;
+ result = TCL_CONVERT_UNKNOWN;
+ break;
}
src += len;
src += TclUtfToUniChar(src, &ch);
@@ -2951,8 +2983,8 @@ UtfToUcs2Proc(
len = TclUtfToUniChar(src, &ch);
if (ch > 0xFFFF) {
if (STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- break;
+ result = TCL_CONVERT_UNKNOWN;
+ break;
}
ch = 0xFFFD;
}
diff --git a/tests/encoding.test b/tests/encoding.test
index f59c6d8..e0e1598 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -535,7 +535,7 @@ test encoding-16.17 {Utf32ToUtfProc} -body {
list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx]
} -result {A 4}
-test encoding-16.9 {
+test encoding-16.18 {
Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
} -body {
apply [list {} {
@@ -554,10 +554,15 @@ test encoding-16.9 {
return done
} [namespace current]]
} -result done
-
-
-
-
+test encoding-16.19 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-16 "\x41\x41\x41"
+} -result \u4141\uFFFD
+test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body {
+ encoding convertfrom utf-16 "\xD8\xD8"
+} -result \uD8D8
+test encoding-16.21 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41"
+} -result \x00\uFFFD
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"