From e00ef49560bb5bd4349d017887c7cd5a2d0ba38e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 15 Nov 2019 09:41:03 +0000 Subject: Protect additional Tcl_UtfToUniChar() call, for the case when not enough bytes are available in the buffer any more. Add additional test-cases for those situations (upper surrogate followed by somthing other than lower surrogate) --- generic/tclEncoding.c | 2 +- tests/encoding.test | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 00b97f5..9e1d262 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2365,7 +2365,7 @@ UtfToUtfProc( if ((*chPtr & 0xFC00) == 0xD800) { /* A high surrogate character is detected, handle especially */ Tcl_UniChar low = *chPtr; - size_t len = Tcl_UtfToUniChar(src, &low); + size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; if ((low & 0xFC00) != 0xDC00) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); diff --git a/tests/encoding.test b/tests/encoding.test index cf27190..36fcff6 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -339,11 +339,29 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { list [string length $x] [string length $y] $y } "4 2 \uD83D\uDE02" test encoding-15.6 {UtfToUtfProc emoji character output} { - set x \uD83D\uDE02 - set y [encoding convertto utf-8 \uD83D\uDE02] + set x \uDE02\uD83D\uDE02\uD83D + set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z -} {2 4 f09f9882} +} {4 10 edb882f09f9882eda0bd} +test encoding-15.7 {UtfToUtfProc emoji character output} { + set x \uDE02\uD83D\uD83D + set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {3 9 edb882eda0bdeda0bd} +test encoding-15.8 {UtfToUtfProc emoji character output} { + set x \uDE02\uD83D\xE9 + set y [encoding convertto utf-8 \uDE02\uD83D\xE9] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {3 8 edb882eda0bdc3a9} +test encoding-15.9 {UtfToUtfProc emoji character output} { + set x \uDE02\uD83DX + set y [encoding convertto utf-8 \uDE02\uD83DX] + binary scan $y H* z + list [string length $x] [string length $y] $z +} {3 7 edb882eda0bd58} test encoding-16.1 {UnicodeToUtfProc} { set val [encoding convertfrom unicode NN] -- cgit v0.12