diff options
Diffstat (limited to 'doc/Utf.3')
-rw-r--r-- | doc/Utf.3 | 98 |
1 files changed, 48 insertions, 50 deletions
@@ -8,7 +8,7 @@ .TH Utf 3 "8.1" Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings. +Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -20,91 +20,91 @@ int .sp int \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR) -.VS 8.4 .sp char * -\fBTcl_UniCharToUtfDString\fR(\fIuniStr, numChars, dstPtr\fR) +\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR) .sp Tcl_UniChar * -\fBTcl_UtfToUniCharDString\fR(\fIsrc, len, dstPtr\fR) -.VE 8.4 +\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR) .sp int \fBTcl_UniCharLen\fR(\fIuniStr\fR) .sp int -\fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR) -.VS 8.4 +\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR) .sp int -\fBTcl_UniCharNcasecmp\fR(\fIuniStr, uniStr, num\fR) +\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR) .sp int \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR) -.VE 8.4 .sp int -\fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR) +\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR) .sp int -\fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR) +\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR) .sp int -\fBTcl_UtfCharComplete\fR(\fIsrc, len\fR) +\fBTcl_UtfCharComplete\fR(\fIsrc, length\fR) .sp int -\fBTcl_NumUtfChars\fR(\fIsrc, len\fR) -.VS 8.4 +\fBTcl_NumUtfChars\fR(\fIsrc, length\fR) .sp -CONST char * +const char * \fBTcl_UtfFindFirst\fR(\fIsrc, ch\fR) .sp -CONST char * +const char * \fBTcl_UtfFindLast\fR(\fIsrc, ch\fR) .sp -CONST char * +const char * \fBTcl_UtfNext\fR(\fIsrc\fR) .sp -CONST char * +const char * \fBTcl_UtfPrev\fR(\fIsrc, start\fR) -.VE 8.4 .sp Tcl_UniChar \fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR) -.VS 8.4 .sp -CONST char * +const char * \fBTcl_UtfAtIndex\fR(\fIsrc, index\fR) -.VE 8.4 .sp int \fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR) .SH ARGUMENTS -.AS "CONST Tcl_UniChar" numChars in/out +.AS "const Tcl_UniChar" *uniPattern in/out .AP char *buf out Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most -TCL_UTF_MAX bytes are stored in the buffer. +\fBTCL_UTF_MAX\fR bytes are stored in the buffer. .AP int ch in The Tcl_UniChar to be converted or examined. .AP Tcl_UniChar *chPtr out Filled with the Tcl_UniChar represented by the head of the UTF-8 string. -.AP "CONST char" *src in +.AP "const char" *src in Pointer to a UTF-8 string. -.AP "CONST Tcl_UniChar" *uniStr in +.AP "const char" *cs in +Pointer to a UTF-8 string. +.AP "const char" *ct in +Pointer to a UTF-8 string. +.AP "const Tcl_UniChar" *uniStr in +A null-terminated Unicode string. +.AP "const Tcl_UniChar" *ucs in +A null-terminated Unicode string. +.AP "const Tcl_UniChar" *uct in A null-terminated Unicode string. -.AP "CONST Tcl_UniChar" *uniPattern in +.AP "const Tcl_UniChar" *uniPattern in A null-terminated Unicode string. -.AP int len in +.AP int length in The length of the UTF-8 string in bytes (not UTF-8 characters). If negative, all bytes up to the first null byte are used. -.AP int numChars in +.AP int uniLength in The length of the Unicode string in characters. Must be greater than or equal to 0. -.AP "Tcl_DString" *dstPtr in/out -A pointer to a previously-initialized \fBTcl_DString\fR. -.AP "unsigned long" num in +.AP "Tcl_DString" *dsPtr in/out +A pointer to a previously initialized \fBTcl_DString\fR. +.AP "unsigned long" numChars in The number of characters to compare. -.AP "CONST char" *start in +.AP "const char" *start in Pointer to the beginning of a UTF-8 string. .AP int index in The index of a character (not byte) in the UTF-8 string. @@ -113,12 +113,10 @@ If non-NULL, filled with the number of bytes in the backslash sequence, including the backslash character. .AP char *dst out Buffer in which the bytes represented by the backslash sequence are stored. -At most TCL_UTF_MAX bytes are stored in the buffer. -.VS 8.4 +At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer. .AP int nocase in Specifies whether the match should be done case-sensitive (0) or case-insensitive (1). -.VE 8.4 .BE .SH DESCRIPTION @@ -126,7 +124,7 @@ case-insensitive (1). These routines convert between UTF-8 strings and Tcl_UniChars. A Tcl_UniChar is a Unicode character represented as an unsigned, fixed-size quantity. A UTF-8 character is a Unicode character represented as -a varying-length sequence of up to TCL_UTF_MAX bytes. A multibyte UTF-8 +a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence consists of a lead byte followed by some number of trail bytes. .PP \fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to @@ -138,7 +136,7 @@ in \fIbuf\fR. .PP \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the -number of bytes read from \fIsrc\fR.. The caller must ensure that the +number of bytes read from \fIsrc\fR. The caller must ensure that the source buffer is long enough such that this routine does not run off the end and dereference non-existent or random memory; if the source buffer is known to be null-terminated, this will not happen. If the input is @@ -147,15 +145,17 @@ byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and 0x00ff and return 1. .PP \fBTcl_UniCharToUtfDString\fR converts the given Unicode string -to UTF-8, storing the result in a previously-initialized \fBTcl_DString\fR. -You must specify the length of the given Unicode string. +to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR. +You must specify \fIuniLength\fR, the length of the given Unicode string. The return value is a pointer to the UTF-8 representation of the Unicode string. Storage for the return value is appended to the end of the \fBTcl_DString\fR. .PP \fBTcl_UtfToUniCharDString\fR converts the given UTF-8 string to Unicode, -storing the result in the previously-initialized \fBTcl_DString\fR. -you may either specify the length of the given UTF-8 string or "-1", +storing the result in the previously initialized \fBTcl_DString\fR. +In the argument \fIlength\fR, you may either specify the length of +the given UTF-8 string in bytes or +.QW \-1 , in which case \fBTcl_UtfToUniCharDString\fR uses \fBstrlen\fR to calculate the length. The return value is a pointer to the Unicode representation of the UTF-8 string. Storage for the return value @@ -168,24 +168,22 @@ the number of Unicode characters (not bytes) in that string. .PP \fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to \fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters. -They accepts two null-terminated Unicode strings and the number of characters -to compare. Both strings are assumed to be at least \fIlen\fR characters +They accept two null-terminated Unicode strings and the number of characters +to compare. Both strings are assumed to be at least \fInumChars\fR characters long. \fBTcl_UniCharNcmp\fR compares the two strings character-by-character according to the Unicode character ordering. It returns an integer greater than, equal to, or less than 0 if the first string is greater than, equal to, or less than the second string respectively. \fBTcl_UniCharNcasecmp\fR is the Unicode case insensitive version. .PP -.VS 8.4 \fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to \fBTcl_StringCaseMatch\fR. It accepts a null-terminated Unicode string, a Unicode pattern, and a boolean value specifying whether the match should be case sensitive and returns whether the string matches the pattern. -.VE 8.4 .PP \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It accepts two null-terminated UTF-8 strings and the number of characters -to compare. (Both strings are assumed to be at least \fIlen\fR +to compare. (Both strings are assumed to be at least \fInumChars\fR characters long.) \fBTcl_UtfNcmp\fR compares the two strings character-by-character according to the Unicode character ordering. It returns an integer greater than, equal to, or less than 0 if the @@ -198,7 +196,7 @@ differences in case when comparing upper, lower or title case characters. .PP \fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR -of length \fIlen\fR bytes is long enough to be decoded by +of \fIlength\fR bytes is long enough to be decoded by \fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee that the UTF-8 string is properly formed. This routine is used by procedures that are operating on a byte at a time and need to know if a @@ -206,7 +204,7 @@ full Tcl_UniChar has been seen. .PP \fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It returns the number of Tcl_UniChars that are represented by the UTF-8 string -\fIsrc\fR. The length of the source string is \fIlen\fR bytes. If the +\fIsrc\fR. The length of the source string is \fIlength\fR bytes. If the length is negative, all bytes up to the first null byte are used. .PP \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It @@ -248,7 +246,7 @@ the return pointer points to the first character in the source string. \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl commands. It parses a backslash sequence and stores the properly formed UTF-8 character represented by the backslash sequence in the output -buffer \fIdst\fR. At most TCL_UTF_MAX bytes are stored in the buffer. +buffer \fIdst\fR. At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer. \fBTcl_UtfBackslash\fR modifies \fI*readPtr\fR to contain the number of bytes in the backslash sequence, including the backslash character. The return value is the number of bytes stored in the output buffer. |