1 files changed, 23 insertions, 27 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index b0c7f64..5f75a3e 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -15,16 +15,16 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar
 .sp
 typedef ... \fBTcl_UniChar\fR;
 .sp
-int
+Tcl_Size
 \fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
 .sp
-int
+Tcl_Size
 \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
 .sp
-int
+Tcl_Size
 \fBTcl_UtfToChar16\fR(\fIsrc, uPtr\fR)
 .sp
-int
+Tcl_Size
 \fBTcl_UtfToWChar\fR(\fIsrc, wPtr\fR)
 .sp
 char *
@@ -55,19 +55,19 @@ int
 \fBTcl_UniCharLen\fR(\fIuniStr\fR)
 .sp
 int
-\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
+\fBTcl_UniCharNcmp\fR(\fIucs, uct, uniLength\fR)
 .sp
 int
-\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
+\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, uniLength\fR)
 .sp
 int
 \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
 .sp
 int
-\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
+\fBTcl_UtfNcmp\fR(\fIcs, ct, length\fR)
 .sp
 int
-\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
+\fBTcl_UtfNcasecmp\fR(\fIcs, ct, length\fR)
 .sp
 int
 \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
@@ -93,7 +93,7 @@ int
 const char *
 \fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
 .sp
-int
+Tcl_Size
 \fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
 .SH ARGUMENTS
 .AS "const Tcl_UniChar" *uniPattern in/out
@@ -132,18 +132,16 @@ A null-terminated utf-16 string.
 A null-terminated utf-16 string.
 .AP "const unsigned short" *utf16Pattern in
 A null-terminated utf-16 string.
-.AP int length in
+.AP Tcl_Size length in
 The length of the UTF-8 string in bytes (not UTF-8 characters).  If
 negative, all bytes up to the first null byte are used.
-.AP int uniLength in
+.AP Tcl_Size uniLength in
 The length of the Unicode string in characters.
 .AP "Tcl_DString" *dsPtr in/out
 A pointer to a previously initialized \fBTcl_DString\fR.
-.AP "unsigned long" numChars in
-The number of characters to compare.
 .AP "const char" *start in
 Pointer to the beginning of a UTF-8 string.
-.AP int index in
+.AP Tcl_Size index in
 The index of a character (not byte) in the UTF-8 string.
 .AP int *readPtr out
 If non-NULL, filled with the number of bytes in the backslash sequence,
@@ -168,11 +166,12 @@ can consume in a single call.
 .PP
 \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
 in starting at \fIbuf\fR.  The return value is the number of bytes stored
-in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
-the return value will be 1 and a single byte in the range 0xF0 - 0xF4
-will be stored. If you still want to produce UTF-8 output for it (even
-though knowing it's an illegal code-point on its own), just call
-\fBTcl_UniCharToUtf\fR again specifying ch = -1.
+in \fIbuf\fR. The character \fIch\fR can be or'ed with the value TCL_COMBINE
+to enable special behavior, compatible with Tcl 8.x. Then, if ch is a high
+surrogate (range U+D800 - U+DBFF), the return value will be 1 and a single
+byte in the range 0xF0 - 0xF4 will be stored. If \fIch\fR is a low surrogate
+(range U+DC00 - U+DFFF), an attempt is made to combine the result with
+the earlier produced bytes, resulting in a 4-byte UTF-8 byte sequence.
 .PP
 \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
 and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
@@ -219,7 +218,7 @@ the number of Unicode characters (not bytes) in that string.
 \fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to
 \fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters.
 They accept two null-terminated Unicode strings and the number of characters
-to compare.  Both strings are assumed to be at least \fInumChars\fR characters
+to compare.  Both strings are assumed to be at least \fIuniLength\fR characters
 long. \fBTcl_UniCharNcmp\fR  compares the two strings character-by-character
 according to the Unicode character ordering.  It returns an integer greater
 than, equal to, or less than 0 if the first string is greater than, equal
@@ -233,7 +232,7 @@ be case sensitive and returns whether the string matches the pattern.
 .PP
 \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
 accepts two null-terminated UTF-8 strings and the number of characters
-to compare.  (Both strings are assumed to be at least \fInumChars\fR
+to compare.  (Both strings are assumed to be at least \fIlength\fR
 characters long.)  \fBTcl_UtfNcmp\fR compares the two strings
 character-by-character according to the Unicode character ordering.
 It returns an integer greater than, equal to, or less than 0 if the
@@ -300,17 +299,14 @@ byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte
 Pascal Ord() function.  It returns the Unicode character represented at the
 specified character (not byte) \fIindex\fR in the UTF-8 string
 \fIsrc\fR.  The source string must contain at least \fIindex\fR
-characters.  If a negative \fIindex\fR is given or \fIindex\fR points
+characters.  If \fIindex\fR is negative or \fIindex\fR points
 to the second half of a surrogate pair, it returns -1.
 .PP
 \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not
 byte) \fIindex\fR in the UTF-8 string \fIsrc\fR.  The source string must
 contain at least \fIindex\fR characters.  This is equivalent to calling
-\fBTcl_UtfToUniChar\fR \fIindex\fR times, except if that would return
-a pointer to the second byte of a valid 4-byte UTF-8 sequence, in which
-case, \fBTcl_UtfToUniChar\fR will be called once more to find the end
-of the sequence. If a negative \fIindex\fR is given, the returned pointer
-points to the first character in the source string.
+\fBTcl_UtfToUniChar\fR \fIindex\fR times.  If \fIindex\fR is negative,
+the return pointer points to the first character in the source string.
 .PP
 \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl
 commands.  It parses a backslash sequence and stores the properly formed