summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-02 16:53:42 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-02 16:53:42 (GMT)
commit8c315fd31ff823b217374dd32577e04c42674249 (patch)
tree85b723f08e36160bc0b6f437d9cd6bced8d061dd /generic/tclUtf.c
parentc0c278ccb909abc9b83305b8873e3171f5d9ab02 (diff)
parentd0eefe67c87f69a16ae393d0ab5eb0847292c340 (diff)
downloadtcl-8c315fd31ff823b217374dd32577e04c42674249.zip
tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.gz
tcl-8c315fd31ff823b217374dd32577e04c42674249.tar.bz2
Merge 8.7
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e9e4432..4d9edf1 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -112,6 +112,19 @@ TclUtfCount(
* Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
* provided buffer. Equivalent to Plan 9 runetochar().
*
+ * Special handling of Surrogate pairs is handled as follows:
+ * When this function is called for ch being a high surrogate,
+ * the first byte of the 4-byte UTF-8 sequence is produced and
+ * the function returns 1. Calling the function again with a
+ * low surrogate, the remaining 3 bytes of the 4-byte UTF-8
+ * sequence is produced, and the function returns 3. The buffer
+ * is used to remember the high surrogate between the two calls.
+ *
+ * If no low surrogate follows the high surrogate (which is actually
+ * illegal), this can be handled reasonably by calling Tcl_UniCharToUtf
+ * again with ch = -1. This will produce a 3-byte UTF-8 sequence
+ * representing the high surrogate.
+ *
* Results:
* The return values is the number of bytes in the buffer that were
* consumed.
@@ -270,11 +283,11 @@ Tcl_UniCharToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
- * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
+ * Special handling of Surrogate pairs is handled as follows:
* For any UTF-8 string containing a character outside of the BMP, the
* first call to this function will fill *chPtr with the high surrogate
- * and generate a return value of 0. Calling Tcl_UtfToUniChar again
- * will produce the low surrogate and a return value of 4. Because *chPtr
+ * and generate a return value of 1. Calling Tcl_UtfToUniChar again
+ * will produce the low surrogate and a return value of 3. Because *chPtr
* is used to remember whether the high surrogate is already produced, it
* is recommended to initialize the variable it points to as 0 before
* the first call to Tcl_UtfToUniChar is done.