summaryrefslogtreecommitdiffstats
path: root/doc/Utf.3
diff options
context:
space:
mode:
Diffstat (limited to 'doc/Utf.3')
-rw-r--r--doc/Utf.350
1 files changed, 23 insertions, 27 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index b0c7f64..5f75a3e 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -15,16 +15,16 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar
.sp
typedef ... \fBTcl_UniChar\fR;
.sp
-int
+Tcl_Size
\fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
.sp
-int
+Tcl_Size
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp
-int
+Tcl_Size
\fBTcl_UtfToChar16\fR(\fIsrc, uPtr\fR)
.sp
-int
+Tcl_Size
\fBTcl_UtfToWChar\fR(\fIsrc, wPtr\fR)
.sp
char *
@@ -55,19 +55,19 @@ int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
-\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
+\fBTcl_UniCharNcmp\fR(\fIucs, uct, uniLength\fR)
.sp
int
-\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
+\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, uniLength\fR)
.sp
int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
.sp
int
-\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
+\fBTcl_UtfNcmp\fR(\fIcs, ct, length\fR)
.sp
int
-\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
+\fBTcl_UtfNcasecmp\fR(\fIcs, ct, length\fR)
.sp
int
\fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
@@ -93,7 +93,7 @@ int
const char *
\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
.sp
-int
+Tcl_Size
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
.SH ARGUMENTS
.AS "const Tcl_UniChar" *uniPattern in/out
@@ -132,18 +132,16 @@ A null-terminated utf-16 string.
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16Pattern in
A null-terminated utf-16 string.
-.AP int length in
+.AP Tcl_Size length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
-.AP int uniLength in
+.AP Tcl_Size uniLength in
The length of the Unicode string in characters.
.AP "Tcl_DString" *dsPtr in/out
A pointer to a previously initialized \fBTcl_DString\fR.
-.AP "unsigned long" numChars in
-The number of characters to compare.
.AP "const char" *start in
Pointer to the beginning of a UTF-8 string.
-.AP int index in
+.AP Tcl_Size index in
The index of a character (not byte) in the UTF-8 string.
.AP int *readPtr out
If non-NULL, filled with the number of bytes in the backslash sequence,
@@ -168,11 +166,12 @@ can consume in a single call.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR. The return value is the number of bytes stored
-in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
-the return value will be 1 and a single byte in the range 0xF0 - 0xF4
-will be stored. If you still want to produce UTF-8 output for it (even
-though knowing it's an illegal code-point on its own), just call
-\fBTcl_UniCharToUtf\fR again specifying ch = -1.
+in \fIbuf\fR. The character \fIch\fR can be or'ed with the value TCL_COMBINE
+to enable special behavior, compatible with Tcl 8.x. Then, if ch is a high
+surrogate (range U+D800 - U+DBFF), the return value will be 1 and a single
+byte in the range 0xF0 - 0xF4 will be stored. If \fIch\fR is a low surrogate
+(range U+DC00 - U+DFFF), an attempt is made to combine the result with
+the earlier produced bytes, resulting in a 4-byte UTF-8 byte sequence.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the
@@ -219,7 +218,7 @@ the number of Unicode characters (not bytes) in that string.
\fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to
\fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters.
They accept two null-terminated Unicode strings and the number of characters
-to compare. Both strings are assumed to be at least \fInumChars\fR characters
+to compare. Both strings are assumed to be at least \fIuniLength\fR characters
long. \fBTcl_UniCharNcmp\fR compares the two strings character-by-character
according to the Unicode character ordering. It returns an integer greater
than, equal to, or less than 0 if the first string is greater than, equal
@@ -233,7 +232,7 @@ be case sensitive and returns whether the string matches the pattern.
.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
-to compare. (Both strings are assumed to be at least \fInumChars\fR
+to compare. (Both strings are assumed to be at least \fIlength\fR
characters long.) \fBTcl_UtfNcmp\fR compares the two strings
character-by-character according to the Unicode character ordering.
It returns an integer greater than, equal to, or less than 0 if the
@@ -300,17 +299,14 @@ byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte
Pascal Ord() function. It returns the Unicode character represented at the
specified character (not byte) \fIindex\fR in the UTF-8 string
\fIsrc\fR. The source string must contain at least \fIindex\fR
-characters. If a negative \fIindex\fR is given or \fIindex\fR points
+characters. If \fIindex\fR is negative or \fIindex\fR points
to the second half of a surrogate pair, it returns -1.
.PP
\fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not
byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must
contain at least \fIindex\fR characters. This is equivalent to calling
-\fBTcl_UtfToUniChar\fR \fIindex\fR times, except if that would return
-a pointer to the second byte of a valid 4-byte UTF-8 sequence, in which
-case, \fBTcl_UtfToUniChar\fR will be called once more to find the end
-of the sequence. If a negative \fIindex\fR is given, the returned pointer
-points to the first character in the source string.
+\fBTcl_UtfToUniChar\fR \fIindex\fR times. If \fIindex\fR is negative,
+the return pointer points to the first character in the source string.
.PP
\fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl
commands. It parses a backslash sequence and stores the properly formed