Add 4 new encodings, and add documentation.

author: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-03-18 20:07:15 (GMT)
committer: jan.nijtmans <nijtmans@users.sourceforge.net> 2019-03-18 20:07:15 (GMT)
commit: 301062d3fba2d66db58bb0d3df8a8abc83bf1dce (patch)
tree: e2721cf766f08971cbca314b903ee7ec99122481 /doc/Utf.3
parent: 6d0e92b89ee6d83f8255334bf2f2a7c23e19c009 (diff)
parent: c92353bea97dda2c6b840f308c549492629f9698 (diff)
download: tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.zip
tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.gz
tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.bz2
1 files changed, 43 insertions, 6 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index afcff79..f638f84 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
 .so man.macros
 .BS
 .SH NAME
-Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
+Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
 .SH SYNOPSIS
 .nf
 \fB#include <tcl.h>\fR
@@ -21,25 +21,46 @@ int
 int
 \fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
 .sp
+int
+\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR)
+.sp
 char *
 \fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
 .sp
+char *
+\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR)
+.sp
 Tcl_UniChar *
 \fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
 .sp
+unsigned short *
+\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR)
+.sp
 int
 \fBTcl_UniCharLen\fR(\fIuniStr\fR)
 .sp
 int
+\fBTcl_Utf16Len\fR(\fIutf16Str\fR)
+.sp
+int
 \fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
 .sp
 int
 \fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
 .sp
 int
+\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR)
+.sp
+int
+\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR)
+.sp
+int
 \fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
 .sp
 int
+\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR)
+.sp
+int
 \fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
 .sp
 int
@@ -80,6 +101,8 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored.  At most
 The Unicode character to be converted or examined.
 .AP Tcl_UniChar *chPtr out
 Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
+.AP unsigned short *utf16Ptr out
+Filled with the utf-16 represented by the head of the UTF-8 string.
 .AP "const char" *src in
 Pointer to a UTF-8 string.
 .AP "const char" *cs in
@@ -94,6 +117,14 @@ A null-terminated Unicode string.
 A null-terminated Unicode string.
 .AP "const Tcl_UniChar" *uniPattern in
 A null-terminated Unicode string.
+.AP "const unsigned short" *utf16Str in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16s in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16t in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16Pattern in
+A null-terminated utf-16 string.
 .AP int length in
 The length of the UTF-8 string in bytes (not UTF-8 characters).  If
 negative, all bytes up to the first null byte are used.
@@ -121,8 +152,8 @@ case-insensitive (1).
 
 .SH DESCRIPTION
 .PP
-These routines convert between UTF-8 strings and Unicode characters.  An
-Unicode character represented as an unsigned, fixed-size
+These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
+An Unicode character represented as an unsigned, fixed-size
 quantity.  A UTF-8 character is a Unicode character represented as
 a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes.  A multibyte UTF-8
 sequence consists of a lead byte followed by some number of trail bytes.
@@ -133,9 +164,10 @@ represent one Unicode character in the UTF-8 representation.
 \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
 in starting at \fIbuf\fR.  The return value is the number of bytes stored
 in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
-the return value will be 0 and nothing will be stored. If you still
-want to produce UTF-8 output for it (even though knowing it's an illegal
-code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.
+the return value will be 1 and a single byte in the range 0xF0 - 0xF4
+will be stored. If you still want to produce UTF-8 output for it (even
+though knowing it's an illegal code-point on its own), just call
+\fBTcl_UniCharToUtf\fR again specifying ch = -1.
 .PP
 \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
 and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
@@ -187,6 +219,11 @@ is the Unicode case insensitive version.
 a Unicode pattern, and a boolean value specifying whether the match should
 be case sensitive and returns whether the string matches the pattern.
 .PP
+\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to
+\fBTcl_StringCaseMatch\fR.  It accepts a null-terminated utf-16 string,
+a utf-16 pattern, and a boolean value specifying whether the match should
+be case sensitive and returns whether the string matches the pattern.
+.PP
 \fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
 accepts two null-terminated UTF-8 strings and the number of characters
 to compare.  (Both strings are assumed to be at least \fInumChars\fR
author	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-03-18 20:07:15 (GMT)
committer	jan.nijtmans <nijtmans@users.sourceforge.net>	2019-03-18 20:07:15 (GMT)
commit	301062d3fba2d66db58bb0d3df8a8abc83bf1dce (patch)
tree	e2721cf766f08971cbca314b903ee7ec99122481 /doc/Utf.3
parent	6d0e92b89ee6d83f8255334bf2f2a7c23e19c009 (diff)
parent	c92353bea97dda2c6b840f308c549492629f9698 (diff)
download	tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.zip tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.gz tcl-301062d3fba2d66db58bb0d3df8a8abc83bf1dce.tar.bz2