summaryrefslogtreecommitdiffstats
path: root/doc/Utf.3
diff options
context:
space:
mode:
Diffstat (limited to 'doc/Utf.3')
-rw-r--r--doc/Utf.398
1 files changed, 48 insertions, 50 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index af472ae..8e39fef 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings.
+Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -20,91 +20,91 @@ int
.sp
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
-.VS 8.4
.sp
char *
-\fBTcl_UniCharToUtfDString\fR(\fIuniStr, numChars, dstPtr\fR)
+\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
Tcl_UniChar *
-\fBTcl_UtfToUniCharDString\fR(\fIsrc, len, dstPtr\fR)
-.VE 8.4
+\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
-\fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR)
-.VS 8.4
+\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
-\fBTcl_UniCharNcasecmp\fR(\fIuniStr, uniStr, num\fR)
+\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
-.VE 8.4
.sp
int
-\fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR)
+\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
-\fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR)
+\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
.sp
int
-\fBTcl_UtfCharComplete\fR(\fIsrc, len\fR)
+\fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
.sp
int
-\fBTcl_NumUtfChars\fR(\fIsrc, len\fR)
-.VS 8.4
+\fBTcl_NumUtfChars\fR(\fIsrc, length\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfFindFirst\fR(\fIsrc, ch\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfFindLast\fR(\fIsrc, ch\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfNext\fR(\fIsrc\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfPrev\fR(\fIsrc, start\fR)
-.VE 8.4
.sp
Tcl_UniChar
\fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR)
-.VS 8.4
.sp
-CONST char *
+const char *
\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
-.VE 8.4
.sp
int
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
.SH ARGUMENTS
-.AS "CONST Tcl_UniChar" numChars in/out
+.AS "const Tcl_UniChar" *uniPattern in/out
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
-TCL_UTF_MAX bytes are stored in the buffer.
+\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The Tcl_UniChar to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
-.AP "CONST char" *src in
+.AP "const char" *src in
Pointer to a UTF-8 string.
-.AP "CONST Tcl_UniChar" *uniStr in
+.AP "const char" *cs in
+Pointer to a UTF-8 string.
+.AP "const char" *ct in
+Pointer to a UTF-8 string.
+.AP "const Tcl_UniChar" *uniStr in
+A null-terminated Unicode string.
+.AP "const Tcl_UniChar" *ucs in
+A null-terminated Unicode string.
+.AP "const Tcl_UniChar" *uct in
A null-terminated Unicode string.
-.AP "CONST Tcl_UniChar" *uniPattern in
+.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
-.AP int len in
+.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
-.AP int numChars in
+.AP int uniLength in
The length of the Unicode string in characters. Must be greater than or
equal to 0.
-.AP "Tcl_DString" *dstPtr in/out
-A pointer to a previously-initialized \fBTcl_DString\fR.
-.AP "unsigned long" num in
+.AP "Tcl_DString" *dsPtr in/out
+A pointer to a previously initialized \fBTcl_DString\fR.
+.AP "unsigned long" numChars in
The number of characters to compare.
-.AP "CONST char" *start in
+.AP "const char" *start in
Pointer to the beginning of a UTF-8 string.
.AP int index in
The index of a character (not byte) in the UTF-8 string.
@@ -113,12 +113,10 @@ If non-NULL, filled with the number of bytes in the backslash sequence,
including the backslash character.
.AP char *dst out
Buffer in which the bytes represented by the backslash sequence are stored.
-At most TCL_UTF_MAX bytes are stored in the buffer.
-.VS 8.4
+At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int nocase in
Specifies whether the match should be done case-sensitive (0) or
case-insensitive (1).
-.VE 8.4
.BE
.SH DESCRIPTION
@@ -126,7 +124,7 @@ case-insensitive (1).
These routines convert between UTF-8 strings and Tcl_UniChars. A
Tcl_UniChar is a Unicode character represented as an unsigned, fixed-size
quantity. A UTF-8 character is a Unicode character represented as
-a varying-length sequence of up to TCL_UTF_MAX bytes. A multibyte UTF-8
+a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
@@ -138,7 +136,7 @@ in \fIbuf\fR.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the
-number of bytes read from \fIsrc\fR.. The caller must ensure that the
+number of bytes read from \fIsrc\fR. The caller must ensure that the
source buffer is long enough such that this routine does not run off the
end and dereference non-existent or random memory; if the source buffer
is known to be null-terminated, this will not happen. If the input is
@@ -147,15 +145,17 @@ byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and
0x00ff and return 1.
.PP
\fBTcl_UniCharToUtfDString\fR converts the given Unicode string
-to UTF-8, storing the result in a previously-initialized \fBTcl_DString\fR.
-You must specify the length of the given Unicode string.
+to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
+You must specify \fIuniLength\fR, the length of the given Unicode string.
The return value is a pointer to the UTF-8 representation of the
Unicode string. Storage for the return value is appended to the
end of the \fBTcl_DString\fR.
.PP
\fBTcl_UtfToUniCharDString\fR converts the given UTF-8 string to Unicode,
-storing the result in the previously-initialized \fBTcl_DString\fR.
-you may either specify the length of the given UTF-8 string or "-1",
+storing the result in the previously initialized \fBTcl_DString\fR.
+In the argument \fIlength\fR, you may either specify the length of
+the given UTF-8 string in bytes or
+.QW \-1 ,
in which case \fBTcl_UtfToUniCharDString\fR uses \fBstrlen\fR to
calculate the length. The return value is a pointer to the Unicode
representation of the UTF-8 string. Storage for the return value
@@ -168,24 +168,22 @@ the number of Unicode characters (not bytes) in that string.
.PP
\fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to
\fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters.
-They accepts two null-terminated Unicode strings and the number of characters
-to compare. Both strings are assumed to be at least \fIlen\fR characters
+They accept two null-terminated Unicode strings and the number of characters
+to compare. Both strings are assumed to be at least \fInumChars\fR characters
long. \fBTcl_UniCharNcmp\fR compares the two strings character-by-character
according to the Unicode character ordering. It returns an integer greater
than, equal to, or less than 0 if the first string is greater than, equal
to, or less than the second string respectively. \fBTcl_UniCharNcasecmp\fR
is the Unicode case insensitive version.
.PP
-.VS 8.4
\fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to
\fBTcl_StringCaseMatch\fR. It accepts a null-terminated Unicode string,
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
-.VE 8.4
.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
-to compare. (Both strings are assumed to be at least \fIlen\fR
+to compare. (Both strings are assumed to be at least \fInumChars\fR
characters long.) \fBTcl_UtfNcmp\fR compares the two strings
character-by-character according to the Unicode character ordering.
It returns an integer greater than, equal to, or less than 0 if the
@@ -198,7 +196,7 @@ differences in case when comparing upper, lower or title case
characters.
.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
-of length \fIlen\fR bytes is long enough to be decoded by
+of \fIlength\fR bytes is long enough to be decoded by
\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
that the UTF-8 string is properly formed. This routine is used by
procedures that are operating on a byte at a time and need to know if a
@@ -206,7 +204,7 @@ full Tcl_UniChar has been seen.
.PP
\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It
returns the number of Tcl_UniChars that are represented by the UTF-8 string
-\fIsrc\fR. The length of the source string is \fIlen\fR bytes. If the
+\fIsrc\fR. The length of the source string is \fIlength\fR bytes. If the
length is negative, all bytes up to the first null byte are used.
.PP
\fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It
@@ -248,7 +246,7 @@ the return pointer points to the first character in the source string.
\fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl
commands. It parses a backslash sequence and stores the properly formed
UTF-8 character represented by the backslash sequence in the output
-buffer \fIdst\fR. At most TCL_UTF_MAX bytes are stored in the buffer.
+buffer \fIdst\fR. At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer.
\fBTcl_UtfBackslash\fR modifies \fI*readPtr\fR to contain the number
of bytes in the backslash sequence, including the backslash character.
The return value is the number of bytes stored in the output buffer.