diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b153dc9..1fb8847 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -192,7 +192,7 @@ Invalid( * UTF-8 sequence are produced. * * If no low surrogate follows the high surrogate (which is actually illegal), - * calling Tcl_UniCharToUtf again with ch = -1 produces a 3-byte UTF-8 + * calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8 * sequence representing the high surrogate. * * Results: @@ -207,11 +207,13 @@ Invalid( #undef Tcl_UniCharToUtf size_t Tcl_UniCharToUtf( - int ch, /* The Tcl_UniChar to be stored in the - * buffer. Can be or'ed with flag TCL_COMBINE */ - char *buf) /* Buffer in which the UTF-8 representation of - * the ch is stored. Must be large enough to hold the UTF-8 - * character (at most 4 bytes). */ + int ch, /* The Tcl_UniChar to be stored in the + * buffer. Can be or'ed with flag TCL_COMBINE + */ + char *buf) /* Buffer in which the UTF-8 representation of + * ch is stored. Must be large enough to hold the UTF-8 + * character (at most 4 bytes). + */ { #if TCL_UTF_MAX > 3 int flags = ch; @@ -248,7 +250,12 @@ Tcl_UniCharToUtf( /* Previous Tcl_UniChar was not a high surrogate, so just output */ } else { /* High surrogate */ + + /* Add 0x10000 to the raw number encoded in the surrogate + * pair in order to get the code point. + */ ch += 0x40; + /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ buf[2] = (char) ((ch << 4) & 0x30); |