summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-23 11:22:22 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-23 11:22:22 (GMT)
commite469cdf1901f3bbd96dd0dc2f72cf443a1fe4833 (patch)
tree427fc351f8fc984f078632fb85f33e51ade56cf0
parentd1920b380d4a987240715b3ce72f7d68dfca2b09 (diff)
parent1d76ffb03b359c7f557943523fd9b0c49a312554 (diff)
downloadtcl-e469cdf1901f3bbd96dd0dc2f72cf443a1fe4833.zip
tcl-e469cdf1901f3bbd96dd0dc2f72cf443a1fe4833.tar.gz
tcl-e469cdf1901f3bbd96dd0dc2f72cf443a1fe4833.tar.bz2
Merge 8.7
-rw-r--r--generic/tclEncoding.c68
-rw-r--r--tests/cmdAH.test14
-rw-r--r--tests/encoding.test19
3 files changed, 61 insertions, 40 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index a877468..daab3a9 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -264,8 +264,13 @@ static Tcl_EncodingConvertProc Iso88591ToUtfProc;
*/
static const Tcl_ObjType encodingType = {
- "encoding", FreeEncodingInternalRep, DupEncodingInternalRep, NULL, NULL
+ "encoding",
+ FreeEncodingInternalRep,
+ DupEncodingInternalRep,
+ NULL,
+ NULL
};
+
#define EncodingSetInternalRep(objPtr, encoding) \
do { \
Tcl_ObjInternalRep ir; \
@@ -488,7 +493,7 @@ FillEncodingFileMap(void)
map = Tcl_NewDictObj();
Tcl_IncrRefCount(map);
- for (i = numDirs-1; i >= 0; i--) {
+ for (i = numDirs-1; i != TCL_INDEX_NONE; i--) {
/*
* Iterate backwards through the search path so as we overwrite
* entries found, we favor files earlier on the search path.
@@ -1209,7 +1214,7 @@ Tcl_ExternalToUtfDString(
* Tcl_ExternalToUtfDStringEx --
*
* Convert a source buffer from the specified encoding into UTF-8.
-* The parameter flags controls the behavior, if any of the bytes in
+ * The parameter flags controls the behavior, if any of the bytes in
* the source buffer are invalid or cannot be represented in utf-8.
* Possible flags values:
* target encoding. It should be composed by OR-ing the following:
@@ -1482,8 +1487,9 @@ Tcl_UtfToExternalDStringEx(
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ int result, soFar, srcRead, dstWrote, dstChars;
const char *srcStart = src;
+ int dstLen;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -2594,8 +2600,8 @@ Utf32ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, extra, numChars, charLimit = INT_MAX;
- int ch = 0;
+ int result, numChars, charLimit = INT_MAX;
+ int ch = 0, bytesLeft = srcLen % 4;
flags |= PTR2INT(clientData);
if (flags & TCL_ENCODING_CHAR_LIMIT) {
@@ -2606,11 +2612,10 @@ Utf32ToUtfProc(
/*
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
- extra = srcLen % 4;
- if (extra != 0) {
- /* We have a truncated code unit */
+ if (bytesLeft != 0) {
+ /* We have a truncated code unit */
result = TCL_CONVERT_MULTIBYTE;
- srcLen &= -4;
+ srcLen -= bytesLeft;
}
/*
@@ -2648,7 +2653,7 @@ Utf32ToUtfProc(
/* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */
dst += Tcl_UniCharToUtf(-1, dst);
}
-
+
if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) {
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_SYNTAX;
@@ -2679,16 +2684,22 @@ Utf32ToUtfProc(
}
/*
* If we had a truncated code unit at the end AND this is the last
- * fragment AND profile is "replace", stick FFFD in its place.
+ * fragment AND profile is not "strict", stick FFFD in its place.
*/
- if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) {
- src += extra; /* Go past truncated code unit */
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
} else {
- dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
- result = TCL_OK;
- }
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ /* PROFILE_REPLACE or PROFILE_TCL8 */
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ numChars++;
+ src += bytesLeft; /* Go past truncated code unit */
+ }
+ }
}
*srcReadPtr = src - srcStart;
@@ -2837,7 +2848,7 @@ Utf16ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, extra, numChars, charLimit = INT_MAX;
+ int result, numChars, charLimit = INT_MAX;
unsigned short ch = 0;
flags |= PTR2INT(clientData);
@@ -2850,8 +2861,7 @@ Utf16ToUtfProc(
* Check alignment with utf-16 (2 == sizeof(UTF-16))
*/
- extra = srcLen % 2;
- if (extra != 0) {
+ if ((srcLen % 2) != 0) {
result = TCL_CONVERT_MULTIBYTE;
srcLen--;
}
@@ -2909,16 +2919,22 @@ Utf16ToUtfProc(
}
/*
* If we had a truncated code unit at the end AND this is the last
- * fragment AND profile is "replace", stick FFFD in its place.
+ * fragment AND profile is not "strict", stick FFFD in its place.
*/
- if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) {
- ++src;/* Go past the truncated code unit */
+ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
} else {
- dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
- result = TCL_OK;
- }
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ /* PROFILE_REPLACE or PROFILE_TCL8 */
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ numChars++;
+ src++; /* Go past truncated code unit */
+ }
+ }
}
*srcReadPtr = src - srcStart;
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index d76607c..f8eba4e 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -703,7 +703,7 @@ lappend encInvalidBytes {*}{
# happen when the sequence is at the end (including by itself) Thus {solo tail}
# in some cases.
lappend encInvalidBytes {*}{
- utf-16le 41 tcl8 {} -1 {solo tail} {Truncated}
+ utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated}
utf-16le 41 strict {} 0 {solo tail} {Truncated}
utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate}
@@ -719,13 +719,13 @@ lappend encInvalidBytes {*}{
# happen when the sequence is at the end (including by itself) Thus {solo tail}
# in some cases.
lappend encInvalidBytes {*}{
- utf-32le 41 tcl8 {} -1 {solo tail} {Truncated}
+ utf-32le 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
utf-32le 41 replace \uFFFD -1 {solo} {Truncated}
utf-32le 41 strict {} 0 {solo tail} {Truncated}
- utf-32le 4100 tcl8 {} -1 {solo tail} {Truncated}
+ utf-32le 4100 tcl8 \uFFFD -1 {solo tail} {Truncated}
utf-32le 4100 replace \uFFFD -1 {solo} {Truncated}
utf-32le 4100 strict {} 0 {solo tail} {Truncated}
- utf-32le 410000 tcl8 {} -1 {solo tail} {Truncated}
+ utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated}
utf-32le 410000 replace \uFFFD -1 {solo} {Truncated}
utf-32le 410000 strict {} 0 {solo tail} {Truncated}
utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate}
@@ -744,9 +744,9 @@ lappend encInvalidBytes {*}{
utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range}
utf-32le FFFFFFFF strict {} 0 {} {Out of range}
- utf-32be 41 tcl8 {} -1 {solo tail} {Truncated}
- utf-32be 0041 tcl8 {} -1 {solo tail} {Truncated}
- utf-32be 000041 tcl8 {} -1 {solo tail} {Truncated}
+ utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated}
+ utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated}
+ utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated}
utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate}
utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate}
utf-32be 0000D800 strict {} 0 {} {High-surrogate}
diff --git a/tests/encoding.test b/tests/encoding.test
index 0abd193..87da880 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -534,7 +534,7 @@ test encoding-16.17 {Utf32ToUtfProc} -body {
list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx]
} -result {A 4}
-test encoding-16.9 {
+test encoding-16.18 {
Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16
} -body {
apply [list {} {
@@ -553,10 +553,15 @@ test encoding-16.9 {
return done
} [namespace current]]
} -result done
-
-
-
-
+test encoding-16.19 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-16 "\x41\x41\x41"
+} -result \u4141\uFFFD
+test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body {
+ encoding convertfrom utf-16 "\xD8\xD8"
+} -result \uD8D8
+test encoding-16.21 {UnicodeToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41"
+} -result \x00\uFFFD
test encoding-17.1 {UtfToUtf16Proc} -body {
encoding convertto utf-16 "\U460DC"
@@ -783,10 +788,10 @@ test encoding-24.19 {Parse valid or invalid utf-8} -constraints deprecated -body
} -result ZX\xED\xA0\x80
test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body {
encoding convertfrom -profile tcl8 "\x20"
-} -result {wrong # args: should be "::tcl::encoding::convertfrom ??-profile profile? ?-failindex var? ?encoding?? data"} -returnCodes error
+} -result {wrong # args: should be "::tcl::encoding::convertfrom ? ?-profile profile? ?-failindex var? encoding ? data"} -returnCodes error
test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body {
string length [encoding convertto -profile tcl8 "\x20"]
-} -result {wrong # args: should be "::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data"} -returnCodes error
+} -result {wrong # args: should be "::tcl::encoding::convertto ? ?-profile profile? ?-failindex var? encoding ? data"} -returnCodes error
test encoding-24.22 {Syntax error, two encodings} -body {
encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error