summaryrefslogtreecommitdiffstats
path: root/doc
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-14 12:41:37 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-09-14 12:41:37 (GMT)
commit93022718af12833e135ad743bc6169bcfd443ddf (patch)
treeb16d8ef4b2c6dd1fc739b04a568e411969721611 /doc
parent5406b207723fa1acec5df7441387d1c9229a88ac (diff)
parent0ef77b52637aa508cfcf98f6fb583cbeca47b5a4 (diff)
downloadtcl-93022718af12833e135ad743bc6169bcfd443ddf.zip
tcl-93022718af12833e135ad743bc6169bcfd443ddf.tar.gz
tcl-93022718af12833e135ad743bc6169bcfd443ddf.tar.bz2
TIP #548 implementation: Support `wchar_t` conversion functions and deprecate `Tcl_WinUtfToTChar()` and `Tcl_WinTCharToUtf()`
Diffstat (limited to 'doc')
-rw-r--r--doc/Encoding.318
-rw-r--r--doc/Utf.341
2 files changed, 44 insertions, 15 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3
index 79fca0f..2d2461e 100644
--- a/doc/Encoding.3
+++ b/doc/Encoding.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings
+Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -255,11 +255,17 @@ is filled with the corresponding number of bytes that were stored in
\fIdst\fR. The return values are the same as the return values for
\fBTcl_ExternalToUtf\fR.
.PP
-\fBTcl_WinUtfToTChar\fR and \fBTcl_WinTCharToUtf\fR are
-Windows-only convenience
-functions for converting between UTF-8 and Windows strings
-based on the TCHAR type which is by convention
-a Unicode character on Windows NT.
+\fBTcl_WinUtfToTChar\fR and \fBTcl_WinTCharToUtf\fR are Windows-only
+convenience functions for converting between UTF-8 and Windows strings
+based on the TCHAR type which is by convention a Unicode character on
+Windows NT. Those functions are deprecated. You can use
+\fBTcl_UtfToWCharDString\fR resp. \fBTcl_WCharToUtfDString\fR as replacement.
+If you want compatibility with earlier Tcl releases than 8.7, use
+\fBTcl_UtfToUniCharDString\fR resp. \fBTcl_UniCharToUtfDString\fR as
+replacement, and make sure you compile your extension with -DTCL_UTF_MAX=3.
+Beware: Those replacement functions don't initialize their Tcl_DString (you'll
+have to do that yourself), and \fBTcl_UniCharToUtfDString\fR from Tcl 8.6
+doesn't accept -1 as length parameter.
.PP
\fBTcl_GetEncodingName\fR is roughly the inverse of \fBTcl_GetEncoding\fR.
Given an \fIencoding\fR, the return value is the \fIname\fR argument that
diff --git a/doc/Utf.3 b/doc/Utf.3
index da2083f..9b0ec3c 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
+Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_WCharToUtfDString, Tcl_UtfToWCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -21,12 +21,30 @@ int
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp
+int
+\fBTcl_UtfToChar16\fR(\fIsrc, uPtr\fR)
+.sp
+int
+\fBTcl_UtfToWChar\fR(\fIsrc, wPtr\fR)
+.sp
char *
\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
+char *
+\fBTcl_Char16ToUtfDString\fR(\fIuStr, uniLength, dsPtr\fR)
+.sp
+char *
+\fBTcl_WCharToUtfDString\fR(\fIwStr, uniLength, dsPtr\fR)
+.sp
Tcl_UniChar *
\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
+unsigned short *
+\fBTcl_UtfToChar16DString\fR(\fIsrc, length, dsPtr\fR)
+.sp
+wchar_t *
+\fBTcl_UtfToWCharDString\fR(\fIsrc, length, dsPtr\fR)
+.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
@@ -80,6 +98,10 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
+.AP unsigned short *uPtr out
+Filled with the utf-16 represented by the head of the UTF-8 string.
+.AP wchar_t *wPtr out
+Filled with the wchar_t represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
@@ -94,12 +116,15 @@ A null-terminated Unicode string.
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
+.AP "const unsigned short" *uStr in
+A null-terminated UTF-16 string.
+.AP "const wchar_t" *wStr in
+A null-terminated wchar_t string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
.AP int uniLength in
-The length of the Unicode string in characters. Must be greater than or
-equal to 0.
+The length of the Unicode string in characters.
.AP "Tcl_DString" *dsPtr in/out
A pointer to a previously initialized \fBTcl_DString\fR.
.AP "unsigned long" numChars in
@@ -121,11 +146,10 @@ case-insensitive (1).
.SH DESCRIPTION
.PP
-These routines convert between UTF-8 strings and Unicode characters. An
-Unicode character represented as an unsigned, fixed-size
-quantity. A UTF-8 character is a Unicode character represented as
-a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
-sequence consists of a lead byte followed by some number of trail bytes.
+These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
+A UTF-8 character is a Unicode character represented as a varying-length
+sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8 sequence
+consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
@@ -153,7 +177,6 @@ byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x00A0 and
.PP
\fBTcl_UniCharToUtfDString\fR converts the given Unicode string
to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
-You must specify \fIuniLength\fR, the length of the given Unicode string.
The return value is a pointer to the UTF-8 representation of the
Unicode string. Storage for the return value is appended to the
end of the \fBTcl_DString\fR.