summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-30 11:26:36 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-03-30 11:26:36 (GMT)
commit664b7500abd51bfa6257c7e3e8fc5846d18d522b (patch)
tree80d9a3db6f30221321687209d4320cdb9649246b /generic
parent42b07af6c35e293f6f7ecdf76ca84495f67b87e4 (diff)
downloadtcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.zip
tcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.tar.gz
tcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.tar.bz2
Add documentation. Do a better job of counting exactly which byte/character caused the encoding/decoding error
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls8
-rw-r--r--generic/tcl.h5
-rw-r--r--generic/tclCmdAH.c31
-rw-r--r--generic/tclDecls.h16
-rw-r--r--generic/tclEncoding.c30
5 files changed, 51 insertions, 39 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index c2a4abd..8cd5bc9 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2425,12 +2425,12 @@ declare 656 {
}
declare 657 {
- int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
- const char *src, int srcLen, Tcl_DString *dsPtr, int flags)
+ size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
+ const char *src, int srcLen, int flags, Tcl_DString *dsPtr)
}
declare 658 {
- int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
- const char *src, int srcLen, Tcl_DString *dsPtr, int flags)
+ size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
+ const char *src, int srcLen, int flags, Tcl_DString *dsPtr)
}
diff --git a/generic/tcl.h b/generic/tcl.h
index 38dda28..f783f4f 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2069,6 +2069,10 @@ typedef struct Tcl_EncodingType {
* content. Otherwise, the number of chars
* produced is controlled only by other limiting
* factors.
+ * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of
+ * 0x00. Only valid for "utf-8", "wtf-8 and "cesu-8".
+ * This flag is implicit for external -> internal conversions,
+ * optional for internal -> external conversions.
*/
#define TCL_ENCODING_START 0x01
@@ -2076,6 +2080,7 @@ typedef struct Tcl_EncodingType {
#define TCL_ENCODING_STOPONERROR 0x04
#define TCL_ENCODING_NO_TERMINATE 0x08
#define TCL_ENCODING_CHAR_LIMIT 0x10
+#define TCL_ENCODING_MODIFIED 0x20
/*
* The following definitions are the error codes returned by the conversion
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index ee329ec..cd77e06 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -551,7 +551,7 @@ EncodingConvertfromObjCmd(
int length; /* Length of the byte array being converted */
const char *bytesPtr; /* Pointer to the first byte of the array */
const char *stopOnError = NULL;
- int result;
+ size_t result;
if (objc == 2) {
encoding = Tcl_GetEncoding(interp, NULL);
@@ -563,7 +563,9 @@ EncodingConvertfromObjCmd(
data = objv[2];
if (objc > 3) {
stopOnError = Tcl_GetString(objv[3]);
- if (stopOnError[0] != '-' || stopOnError[1] != 's'
+ if (!stopOnError[0]) {
+ stopOnError = NULL;
+ } else if (stopOnError[0] != '-' || stopOnError[1] != 's'
|| strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) {
goto encConvFromError;
}
@@ -578,10 +580,11 @@ EncodingConvertfromObjCmd(
* Convert the string into a byte array in 'ds'
*/
bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length);
- result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, &ds,
- stopOnError ? TCL_ENCODING_STOPONERROR : 0);
- if (stopOnError && (result != TCL_OK)) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d characters", Tcl_DStringLength(&ds)));
+ result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length,
+ stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds);
+ if (stopOnError && (result != (size_t)-1)) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %"
+ TCL_LL_MODIFIER "u byte%s", (long long)result, (result != 1)?"s":""));
Tcl_DStringFree(&ds);
return TCL_ERROR;
}
@@ -628,7 +631,7 @@ EncodingConverttoObjCmd(
Tcl_Encoding encoding; /* Encoding to use */
int length; /* Length of the string being converted */
const char *stringPtr; /* Pointer to the first byte of the string */
- int result;
+ size_t result;
const char *stopOnError = NULL;
/* TODO - ADJUST OBJ INDICES WHEN ENSEMBLIFYING THIS */
@@ -643,7 +646,9 @@ EncodingConverttoObjCmd(
data = objv[2];
if (objc > 3) {
stopOnError = Tcl_GetString(objv[3]);
- if (stopOnError[0] != '-' || stopOnError[1] != 's'
+ if (!stopOnError[0]) {
+ stopOnError = NULL;
+ } else if (stopOnError[0] != '-' || stopOnError[1] != 's'
|| strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) {
goto encConvToError;
}
@@ -659,10 +664,12 @@ EncodingConverttoObjCmd(
*/
stringPtr = TclGetStringFromObj(data, &length);
- result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, &ds,
- stopOnError ? TCL_ENCODING_STOPONERROR : 0);
- if (stopOnError && (result != TCL_OK)) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d bytes", Tcl_DStringLength(&ds)));
+ result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length,
+ stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds);
+ if (stopOnError && (result != (size_t)-1)) {
+ result = Tcl_NumUtfChars(stringPtr, result);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %"
+ TCL_LL_MODIFIER "u character%s", (long long)result, (result != 1)?"s":""));
Tcl_DStringFree(&ds);
return TCL_ERROR;
}
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 6ba39d5..24760f9 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1938,13 +1938,13 @@ EXTERN const char * Tcl_UtfNext(const char *src);
/* 656 */
EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
/* 657 */
-EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
- const char *src, int srcLen,
- Tcl_DString *dsPtr, int flags);
+EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
+ const char *src, int srcLen, int flags,
+ Tcl_DString *dsPtr);
/* 658 */
-EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
- const char *src, int srcLen,
- Tcl_DString *dsPtr, int flags);
+EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
+ const char *src, int srcLen, int flags,
+ Tcl_DString *dsPtr);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2637,8 +2637,8 @@ typedef struct TclStubs {
int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */
const char * (*tcl_UtfNext) (const char *src); /* 655 */
const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
- int (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 657 */
- int (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 658 */
+ size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 657 */
+ size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 658 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 72f7690..0bce51b 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -511,7 +511,6 @@ FillEncodingFileMap(void)
*/
/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
-#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */
#define TCL_ENCODING_LE 0x80 /* Little-endian encoding, for ucs-2/utf-16 only */
void
@@ -1117,26 +1116,27 @@ Tcl_ExternalToUtfDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
- Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, dstPtr, 0);
+ Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, 0, dstPtr);
return Tcl_DStringValue(dstPtr);
}
-int
+size_t
Tcl_ExternalToUtfDStringEx(
Tcl_Encoding encoding, /* The encoding for the source string, or NULL
* for the default system encoding. */
const char *src, /* Source string in specified encoding. */
int srcLen, /* Source string length in bytes, or < 0 for
* encoding-specific string length. */
- Tcl_DString *dstPtr, /* Uninitialized or free DString in which the
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
- int flags) /* Conversion control flags. */
{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
int dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ const char *srcStart = src;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1160,13 +1160,12 @@ Tcl_ExternalToUtfDStringEx(
flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+ src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
Tcl_DStringSetLength(dstPtr, soFar);
- return result;
+ return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart);
}
-
flags &= ~TCL_ENCODING_START;
- src += srcRead;
srcLen -= srcRead;
if (Tcl_DStringLength(dstPtr) == 0) {
Tcl_DStringSetLength(dstPtr, dstLen);
@@ -1321,25 +1320,26 @@ Tcl_UtfToExternalDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
- Tcl_UtfToExternalDStringEx(encoding, src, srcLen, dstPtr, 0);
+ Tcl_UtfToExternalDStringEx(encoding, src, srcLen, 0, dstPtr);
return Tcl_DStringValue(dstPtr);
}
-int
+size_t
Tcl_UtfToExternalDStringEx(
Tcl_Encoding encoding, /* The encoding for the converted string, or
* NULL for the default system encoding. */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes, or < 0 for
* strlen(). */
- Tcl_DString *dstPtr, /* Uninitialized or free DString in which the
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
- int flags) /* Conversion control flags. */
{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
int dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ const char *srcStart = src;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1355,23 +1355,23 @@ Tcl_UtfToExternalDStringEx(
} else if (srcLen < 0) {
srcLen = strlen(src);
}
- flags |= TCL_ENCODING_START | TCL_ENCODING_END | TCL_ENCODING_EXTERNAL;
+ flags |= TCL_ENCODING_START | TCL_ENCODING_END;
while (1) {
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
srcLen, flags, &state, dst, dstLen,
&srcRead, &dstWrote, &dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+ src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
if (encodingPtr->nullSize == 2) {
Tcl_DStringSetLength(dstPtr, soFar + 1);
}
Tcl_DStringSetLength(dstPtr, soFar);
- return result;
+ return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart);
}
flags &= ~TCL_ENCODING_START;
- src += srcRead;
srcLen -= srcRead;
if (Tcl_DStringLength(dstPtr) == 0) {
Tcl_DStringSetLength(dstPtr, dstLen);