summaryrefslogtreecommitdiffstats
path: root/doc/Encoding.3
diff options
context:
space:
mode:
Diffstat (limited to 'doc/Encoding.3')
-rw-r--r--doc/Encoding.387
1 files changed, 62 insertions, 25 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3
index 7b5e9d4..92268e8 100644
--- a/doc/Encoding.3
+++ b/doc/Encoding.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath \- procedures for creating and using encodings
+Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_UtfToExternalDStringEx, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternalDStringEx, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath \- procedures for creating and using encodings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -26,13 +26,13 @@ char *
\fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
size_t
-\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR)
+\fBTcl_ExternalToUtfDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR)
.sp
char *
\fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
size_t
-\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR)
+\fBTcl_UtfToExternalDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR)
.sp
int
\fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr,
@@ -93,7 +93,7 @@ encoding-specific length of the string is used.
Pointer to an uninitialized or free \fBTcl_DString\fR in which the converted
result will be stored.
.AP int flags in
-Various flag bits OR-ed together.
+This is a bit mask passed in to control the operation of the encoding functions.
\fBTCL_ENCODING_START\fR signifies that the
source buffer is the first block in a (potentially multi-block) input
stream, telling the conversion routine to reset to an initial state and
@@ -101,16 +101,15 @@ perform any initialization that needs to occur before the first byte is
converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last
block in a (potentially multi-block) input stream, telling the conversion
routine to perform any finalization that needs to occur after the last
-byte is converted and then to reset to an initial state.
-\fBTCL_ENCODING_NOCOMPLAIN\fR signifies that the conversion routine should
-not return immediately upon reading a source character that does not exist in
-the target encoding, but it will substitute a default fallback character for
-all of such characters. The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect,
-it only has meaning in Tcl 8.x. The flag \fBTCL_ENCODING_STRICT\fR makes the
-encoder/decoder more strict in what it considers to be an invalid byte
-sequence. The flag \fBTCL_ENCODING_MODIFIED\fR makes
-\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the byte
-sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders.
+byte is converted and then to reset to an initial state. The
+\fBTCL_PROFILE_*\fR bits defined in the \fBPROFILES\fR section below
+control the encoding profile to be used for dealing with invalid data or
+other errors in the encoding transform.
+\fBTCL_ENCODING_STOPONERROR\fR is present for backward compatibility with
+Tcl 8.6 and forces the encoding profile to \fBstrict\fR.
+
+Some flags bits may not be usable with some functions as noted in the
+function descriptions below.
.AP Tcl_EncodingState *statePtr in/out
Used when converting a (generally long or indefinite length) byte stream
in a piece-by-piece fashion. The conversion routine stores its current
@@ -136,6 +135,9 @@ buffer as a result of the conversion. May be NULL.
.AP int *dstCharsPtr out
Filled with the number of characters that correspond to the number of bytes
stored in the output buffer. May be NULL.
+.AP Tcl_Size *errorIdxPtr out
+Filled with the index of the byte or character that caused the encoding transform
+to fail. May be NULL.
.AP Tcl_DString *bufPtr out
Storage for the prescribed system encoding name.
.AP "const Tcl_EncodingType" *typePtr in
@@ -209,11 +211,30 @@ call \fBTcl_DStringFree\fR to free any information stored in \fIdstPtr\fR.
When converting, if any of the characters in the source buffer cannot be
represented in the target encoding, a default fallback character will be
used. The return value is a pointer to the value stored in the DString.
-.PP
-\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR,
-but it has an additional flags parameter. The return value is the index of
-the first byte in the input string causing a conversion error.
-Or TCL_INDEX_NONE if all is OK.
+
+.PP
+\fBTcl_ExternalToUtfDStringEx\fR is a more flexible version of older
+\fBTcl_ExternalToUtfDString\fR function. It takes three additional parameters,
+\fBinterp\fR, \fBflags\fR and \fBerrorIdxPtr\fR. The \fBflags\fR parameter may
+be used to specify the profile to be used for the transform. The
+\fBTCL_ENCODING_START\fR and \fBTCL_ENCODING_END\fR bits in \fBflags\fR are
+ignored as the function assumes the entire source string to be decoded is passed
+into the function. On success, the function returns \fBTCL_ERROR\fR with the
+converted string stored in \fB*dstPtr\fR. For errors other than conversion
+errors, such as invalid flags, the function returns \fBTCL_OK\fR with an error
+message in \fBinterp\fR if it is not NULL.
+
+For conversion errors, \fBTcl_ExternalToUtfDStringEx\fR returns one
+of the \fBTCL_CONVERT_*\fR errors listed below for \fBTcl_ExternalToUtf\fR.
+When one of these conversion errors is returned, an error message is
+stored in \fBinterp\fR only if \fBerrorIdxPtr\fR is NULL. Otherwise, no error message
+is stored as the function expects the caller is interested whatever is
+decoded to that point and not treating this as an immediate error condition.
+The index of the error location is stored in \fB*errorIdxPtr\fR.
+
+The caller must call \fBTcl_DStringFree\fR to free up the \fB*dstPtr\fR resources
+irrespective of the return value from the function.
+
.PP
\fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified
\fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the
@@ -236,12 +257,12 @@ the unconverted bytes that remained in \fIsrc\fR plus some further bytes
from the source stream to properly convert the formerly split-up multibyte
sequence.
.IP \fBTCL_CONVERT_SYNTAX\fR 29
-The source buffer contained an invalid character sequence. This may occur
+The source buffer contained an invalid byte or character sequence. This may occur
if the input stream has been damaged or if the input encoding method was
misidentified.
.IP \fBTCL_CONVERT_UNKNOWN\fR 29
The source buffer contained a character that could not be represented in
-the target encoding and \fBTCL_ENCODING_NOCOMPLAIN\fR was not specified.
+the target encoding.
.RE
.LP
\fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8
@@ -253,10 +274,14 @@ characters in the source buffer cannot be represented in the target
encoding, a default fallback character will be used. The return value is
a pointer to the value stored in the DString.
.PP
-\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR,
-but it has an additional flags parameter. The return value is the index of
-the first byte of an utf-8 byte-sequence in the input string causing a
-conversion error. Or TCL_INDEX_NONE if all is OK.
+\fBTcl_UtfToExternalDStringEx\fR is an enhanced version of
+\fBTcl_UtfToExternalDString\fR that transforms UTF-8 encoded source data to a specified
+\fIencoding\fR. Except for the direction of the transform, the parameters and
+return values are identical to those of \fBTcl_ExternalToUtfDStringEx\fR. See
+that function above for details about the same.
+
+Irrespective of the return code from the function, the caller must free
+resources associated with \fB*dstPtr\fR when the function returns.
.PP
\fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into
the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from
@@ -559,6 +584,18 @@ to the object, it will be deleted.
.PP
\fBTcl_GetEncodingSearchPath\fR returns an object with a reference count of at
least 1.
+.SH "PROFILES"
+Encoding profiles define the manner in which errors in the encoding transforms
+are handled by the encoding functions. An application can specify the profile
+to be used by OR-ing the \fBflags\fR parameter passed to the function
+with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR,
+\fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR.
+These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles
+respectively. If none are specified, a version-dependent default profile is used.
+For Tcl 8.7, the default profile is \fBtcl8\fR.
+
+For details about profiles, see the \fBPROFILES\fR section in
+the documentation of the \fBencoding\fR command.
.SH "SEE ALSO"
encoding(n)
.SH KEYWORDS