diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-18 20:07:15 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-18 20:07:15 (GMT) |
commit | 301062d3fba2d66db58bb0d3df8a8abc83bf1dce (patch) | |
tree | e2721cf766f08971cbca314b903ee7ec99122481 /doc/Utf.3 | |
parent | 6d0e92b89ee6d83f8255334bf2f2a7c23e19c009 (diff) | |
parent | c92353bea97dda2c6b840f308c549492629f9698 (diff) | |
download | tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.zip tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.gz tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.bz2 |
Add 4 new encodings, and add documentation.
Diffstat (limited to 'doc/Utf.3')
-rw-r--r-- | doc/Utf.3 | 49 |
1 files changed, 43 insertions, 6 deletions
@@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings +Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -21,25 +21,46 @@ int int \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR) .sp +int +\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR) +.sp char * \fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR) .sp +char * +\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR) +.sp Tcl_UniChar * \fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR) .sp +unsigned short * +\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR) +.sp int \fBTcl_UniCharLen\fR(\fIuniStr\fR) .sp int +\fBTcl_Utf16Len\fR(\fIutf16Str\fR) +.sp +int \fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR) .sp int \fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR) .sp int +\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR) +.sp +int +\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR) +.sp +int \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR) .sp int +\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR) +.sp +int \fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR) .sp int @@ -80,6 +101,8 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most The Unicode character to be converted or examined. .AP Tcl_UniChar *chPtr out Filled with the Tcl_UniChar represented by the head of the UTF-8 string. +.AP unsigned short *utf16Ptr out +Filled with the utf-16 represented by the head of the UTF-8 string. .AP "const char" *src in Pointer to a UTF-8 string. .AP "const char" *cs in @@ -94,6 +117,14 @@ A null-terminated Unicode string. A null-terminated Unicode string. .AP "const Tcl_UniChar" *uniPattern in A null-terminated Unicode string. +.AP "const unsigned short" *utf16Str in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16s in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16t in +A null-terminated utf-16 string. +.AP "const unsigned short" *utf16Pattern in +A null-terminated utf-16 string. .AP int length in The length of the UTF-8 string in bytes (not UTF-8 characters). If negative, all bytes up to the first null byte are used. @@ -121,8 +152,8 @@ case-insensitive (1). .SH DESCRIPTION .PP -These routines convert between UTF-8 strings and Unicode characters. An -Unicode character represented as an unsigned, fixed-size +These routines convert between UTF-8 strings and Unicode/Utf-16 characters. +An Unicode character represented as an unsigned, fixed-size quantity. A UTF-8 character is a Unicode character represented as a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence consists of a lead byte followed by some number of trail bytes. @@ -133,9 +164,10 @@ represent one Unicode character in the UTF-8 representation. \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string in starting at \fIbuf\fR. The return value is the number of bytes stored in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then -the return value will be 0 and nothing will be stored. If you still -want to produce UTF-8 output for it (even though knowing it's an illegal -code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1. +the return value will be 1 and a single byte in the range 0xF0 - 0xF4 +will be stored. If you still want to produce UTF-8 output for it (even +though knowing it's an illegal code-point on its own), just call +\fBTcl_UniCharToUtf\fR again specifying ch = -1. .PP \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the @@ -187,6 +219,11 @@ is the Unicode case insensitive version. a Unicode pattern, and a boolean value specifying whether the match should be case sensitive and returns whether the string matches the pattern. .PP +\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to +\fBTcl_StringCaseMatch\fR. It accepts a null-terminated utf-16 string, +a utf-16 pattern, and a boolean value specifying whether the match should +be case sensitive and returns whether the string matches the pattern. +.PP \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It accepts two null-terminated UTF-8 strings and the number of characters to compare. (Both strings are assumed to be at least \fInumChars\fR |