summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-22 19:30:02 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-03-22 19:30:02 (GMT)
commitf4a64c89a2d0d854544f1a2bba43bca04c8268ea (patch)
treedd3118abb7e27b7db5448e5c2f59ffb646836583 /generic/tclUtf.c
parent6fec8f2b6ceb11f6c1cfe52126e45005b4376d98 (diff)
downloadtcl-f4a64c89a2d0d854544f1a2bba43bca04c8268ea.zip
tcl-f4a64c89a2d0d854544f1a2bba43bca04c8268ea.tar.gz
tcl-f4a64c89a2d0d854544f1a2bba43bca04c8268ea.tar.bz2
Forgot one line in previous commit, and indenting
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c19
1 files changed, 13 insertions, 6 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b153dc9..1fb8847 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -192,7 +192,7 @@ Invalid(
* UTF-8 sequence are produced.
*
* If no low surrogate follows the high surrogate (which is actually illegal),
- * calling Tcl_UniCharToUtf again with ch = -1 produces a 3-byte UTF-8
+ * calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8
* sequence representing the high surrogate.
*
* Results:
@@ -207,11 +207,13 @@ Invalid(
#undef Tcl_UniCharToUtf
size_t
Tcl_UniCharToUtf(
- int ch, /* The Tcl_UniChar to be stored in the
- * buffer. Can be or'ed with flag TCL_COMBINE */
- char *buf) /* Buffer in which the UTF-8 representation of
- * the ch is stored. Must be large enough to hold the UTF-8
- * character (at most 4 bytes). */
+ int ch, /* The Tcl_UniChar to be stored in the
+ * buffer. Can be or'ed with flag TCL_COMBINE
+ */
+ char *buf) /* Buffer in which the UTF-8 representation of
+ * ch is stored. Must be large enough to hold the UTF-8
+ * character (at most 4 bytes).
+ */
{
#if TCL_UTF_MAX > 3
int flags = ch;
@@ -248,7 +250,12 @@ Tcl_UniCharToUtf(
/* Previous Tcl_UniChar was not a high surrogate, so just output */
} else {
/* High surrogate */
+
+ /* Add 0x10000 to the raw number encoded in the surrogate
+ * pair in order to get the code point.
+ */
ch += 0x40;
+
/* Fill buffer with specific 3-byte (invalid) byte combination,
so following low surrogate can recognize it and combine */
buf[2] = (char) ((ch << 4) & 0x30);