diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-01-10 08:25:13 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2018-01-10 08:25:13 (GMT) |
| commit | 498f069f0e0b2f665d04a2a3b1b691f53297fe2c (patch) | |
| tree | 356e27dc3e74178c5a3fd616a6554f0f802575c2 | |
| parent | 6d9902632c27a3a3a46f9ea9506555027acac8ac (diff) | |
| parent | 48456596fff560a8a2e3bb709c7683d07874d306 (diff) | |
| download | tcl-498f069f0e0b2f665d04a2a3b1b691f53297fe2c.zip tcl-498f069f0e0b2f665d04a2a3b1b691f53297fe2c.tar.gz tcl-498f069f0e0b2f665d04a2a3b1b691f53297fe2c.tar.bz2 | |
Fix [https://core.tcl.tk/tk/info/00a27923ee26437611e1ed83f96e15b6caabcd8b|00a27923ee]: (Tcl part, remaining is in Tk) text/entry dysfunctional when pasting an emoji on MacOSX.
This changes the handling of incoming valid 4-byte UTF-8 sequences: Those are no longer split in 4 separate characters (as was done for invalid byte sequences) but replaced by a single ' replacement character' .
| -rw-r--r-- | generic/tclUtf.c | 24 |
1 files changed, 14 insertions, 10 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 43636b4..2d8750d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -68,11 +68,7 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -#if TCL_UTF_MAX > 3 4,4,4,4,4,4,4,4, -#else - 1,1,1,1,1,1,1,1, -#endif 1,1,1,1,1,1,1,1 }; @@ -328,13 +324,22 @@ Tcl_UtfToUniChar( * represents itself. */ } -#if TCL_UTF_MAX > 3 else if (byte < 0xF8) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. */ -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX == 3 + byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; + if (byte & 0x100000) { + /* out of range, < 0x10000 or > 0x10ffff */ + } else { + /* produce replacement character, and advance source pointer */ + *chPtr = (Tcl_UniChar) 0xFFFD; + return 4; + } +#elif TCL_UTF_MAX == 4 Tcl_UniChar surrogate; byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) @@ -365,7 +370,6 @@ Tcl_UtfToUniChar( * represents itself. */ } -#endif *chPtr = (Tcl_UniChar) byte; return 1; @@ -499,13 +503,13 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + register const char *endPtr = src + length - 4; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); i++; } - endPtr += TCL_UTF_MAX; + endPtr += 4; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { src += TclUtfToUniChar(src, &ch); i++; @@ -677,7 +681,7 @@ Tcl_UtfPrev( int i, byte; look = --src; - for (i = 0; i < TCL_UTF_MAX; i++) { + for (i = 0; i < 4; i++) { if (look < start) { if (src < start) { src = start; |
