summaryrefslogtreecommitdiffstats
path: root/doc/Utf.3
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-18 20:07:15 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-18 20:07:15 (GMT)
commit301062d3fba2d66db58bb0d3df8a8abc83bf1dce (patch)
treee2721cf766f08971cbca314b903ee7ec99122481 /doc/Utf.3
parent6d0e92b89ee6d83f8255334bf2f2a7c23e19c009 (diff)
parentc92353bea97dda2c6b840f308c549492629f9698 (diff)
downloadtcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.zip
tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.gz
tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.bz2
Add 4 new encodings, and add documentation.
Diffstat (limited to 'doc/Utf.3')
-rw-r--r--doc/Utf.349
1 files changed, 43 insertions, 6 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index afcff79..f638f84 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
+Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -21,25 +21,46 @@ int
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp
+int
+\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR)
+.sp
char *
\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
+char *
+\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR)
+.sp
Tcl_UniChar *
\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
+unsigned short *
+\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR)
+.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
+\fBTcl_Utf16Len\fR(\fIutf16Str\fR)
+.sp
+int
\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp
int
+\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR)
+.sp
+int
+\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR)
+.sp
+int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
.sp
int
+\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR)
+.sp
+int
\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
@@ -80,6 +101,8 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
+.AP unsigned short *utf16Ptr out
+Filled with the utf-16 represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
@@ -94,6 +117,14 @@ A null-terminated Unicode string.
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
+.AP "const unsigned short" *utf16Str in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16s in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16t in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16Pattern in
+A null-terminated utf-16 string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
@@ -121,8 +152,8 @@ case-insensitive (1).
.SH DESCRIPTION
.PP
-These routines convert between UTF-8 strings and Unicode characters. An
-Unicode character represented as an unsigned, fixed-size
+These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
+An Unicode character represented as an unsigned, fixed-size
quantity. A UTF-8 character is a Unicode character represented as
a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
@@ -133,9 +164,10 @@ represent one Unicode character in the UTF-8 representation.
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR. The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
-the return value will be 0 and nothing will be stored. If you still
-want to produce UTF-8 output for it (even though knowing it's an illegal
-code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.
+the return value will be 1 and a single byte in the range 0xF0 - 0xF4
+will be stored. If you still want to produce UTF-8 output for it (even
+though knowing it's an illegal code-point on its own), just call
+\fBTcl_UniCharToUtf\fR again specifying ch = -1.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the
@@ -187,6 +219,11 @@ is the Unicode case insensitive version.
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
.PP
+\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to
+\fBTcl_StringCaseMatch\fR. It accepts a null-terminated utf-16 string,
+a utf-16 pattern, and a boolean value specifying whether the match should
+be case sensitive and returns whether the string matches the pattern.
+.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
to compare. (Both strings are assumed to be at least \fInumChars\fR