Merge 8.7

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-03-02 16:53:42 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-03-02 16:53:42 (GMT)
commit: 8c315fd31ff823b217374dd32577e04c42674249 (patch)
tree: 85b723f08e36160bc0b6f437d9cd6bced8d061dd
parent: c0c278ccb909abc9b83305b8873e3171f5d9ab02 (diff)
parent: d0eefe67c87f69a16ae393d0ab5eb0847292c340 (diff)
download: tcl-8c315fd31ff823b217374dd32577e04c42674249.zip
tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.gz
tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.bz2
2 files changed, 18 insertions, 5 deletions
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 3529951..775e42a 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -881,8 +881,8 @@ Tcl_ScanObjCmd(
 
 	    offset = TclUtfToUniChar(string, &sch);
 	    i = (int)sch;
-#if TCL_UTF_MAX == 4
-	    if (((sch & 0xFC00) == 0xD800) && (offset < 3)) {
+#if TCL_UTF_MAX <= 4
+	    if ((sch >= 0xD800) && (offset < 3)) {
 		offset += TclUtfToUniChar(string+offset, &sch);
 		i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
 	    }
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e9e4432..4d9edf1 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -112,6 +112,19 @@ TclUtfCount(
  *	Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
  *	provided buffer. Equivalent to Plan 9 runetochar().
  *
+ *	Special handling of Surrogate pairs is handled as follows:
+ *	When this function is called for ch being a high surrogate,
+ *	the first byte of the 4-byte UTF-8 sequence is produced and
+ *	the function returns 1. Calling the function again with a
+ *	low surrogate, the remaining 3 bytes of the 4-byte UTF-8
+ *	sequence is produced, and the function returns 3. The buffer
+ *	is used to remember the high surrogate between the two calls.
+ *
+ *	If no low surrogate follows the high surrogate (which is actually
+ *	illegal), this can be handled reasonably by calling Tcl_UniCharToUtf
+ *	again with ch = -1. This will produce a 3-byte UTF-8 sequence
+ *	representing the high surrogate.
+ *
  * Results:
  *	The return values is the number of bytes in the buffer that were
  *	consumed.
@@ -270,11 +283,11 @@ Tcl_UniCharToUtfDString(
  *	Tcl_UtfCharComplete() before calling this routine to ensure that
  *	enough bytes remain in the string.
  *
- *	If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
+ *	Special handling of Surrogate pairs is handled as follows:
  *	For any UTF-8 string containing a character outside of the BMP, the
  *	first call to this function will fill *chPtr with the high surrogate
- *	and generate a return value of 0. Calling Tcl_UtfToUniChar again
- *	will produce the low surrogate and a return value of 4. Because *chPtr
+ *	and generate a return value of 1. Calling Tcl_UtfToUniChar again
+ *	will produce the low surrogate and a return value of 3. Because *chPtr
  *	is used to remember whether the high surrogate is already produced, it
  *	is recommended to initialize the variable it points to as 0 before
  *	the first call to Tcl_UtfToUniChar is done.
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-03-02 16:53:42 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-03-02 16:53:42 (GMT)
commit	8c315fd31ff823b217374dd32577e04c42674249 (patch)
tree	85b723f08e36160bc0b6f437d9cd6bced8d061dd
parent	c0c278ccb909abc9b83305b8873e3171f5d9ab02 (diff)
parent	d0eefe67c87f69a16ae393d0ab5eb0847292c340 (diff)
download	tcl-8c315fd31ff823b217374dd32577e04c42674249.zip tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.gz tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.bz2