summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-11-15 09:41:03 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-11-15 09:41:03 (GMT)
commite00ef49560bb5bd4349d017887c7cd5a2d0ba38e (patch)
treeac3011b78c83bee36277f74ac8ce73b47cb1d606
parent4a0d55e4c90514af7ad345fdc8a3e5c54100197c (diff)
downloadtcl-e00ef49560bb5bd4349d017887c7cd5a2d0ba38e.zip
tcl-e00ef49560bb5bd4349d017887c7cd5a2d0ba38e.tar.gz
tcl-e00ef49560bb5bd4349d017887c7cd5a2d0ba38e.tar.bz2
Protect additional Tcl_UtfToUniChar() call, for the case when not enough bytes are available in the buffer any more.
Add additional test-cases for those situations (upper surrogate followed by somthing other than lower surrogate)
-rw-r--r--generic/tclEncoding.c2
-rw-r--r--tests/encoding.test24
2 files changed, 22 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 00b97f5..9e1d262 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2365,7 +2365,7 @@ UtfToUtfProc(
if ((*chPtr & 0xFC00) == 0xD800) {
/* A high surrogate character is detected, handle especially */
Tcl_UniChar low = *chPtr;
- size_t len = Tcl_UtfToUniChar(src, &low);
+ size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0;
if ((low & 0xFC00) != 0xDC00) {
*dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
diff --git a/tests/encoding.test b/tests/encoding.test
index cf27190..36fcff6 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -339,11 +339,29 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
list [string length $x] [string length $y] $y
} "4 2 \uD83D\uDE02"
test encoding-15.6 {UtfToUtfProc emoji character output} {
- set x \uD83D\uDE02
- set y [encoding convertto utf-8 \uD83D\uDE02]
+ set x \uDE02\uD83D\uDE02\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
-} {2 4 f09f9882}
+} {4 10 edb882f09f9882eda0bd}
+test encoding-15.7 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 9 edb882eda0bdeda0bd}
+test encoding-15.8 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\xE9
+ set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 8 edb882eda0bdc3a9}
+test encoding-15.9 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83DX
+ set y [encoding convertto utf-8 \uDE02\uD83DX]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 7 edb882eda0bd58}
test encoding-16.1 {UnicodeToUtfProc} {
set val [encoding convertfrom unicode NN]