diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-30 11:26:36 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-03-30 11:26:36 (GMT) |
commit | 664b7500abd51bfa6257c7e3e8fc5846d18d522b (patch) | |
tree | 80d9a3db6f30221321687209d4320cdb9649246b /generic | |
parent | 42b07af6c35e293f6f7ecdf76ca84495f67b87e4 (diff) | |
download | tcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.zip tcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.tar.gz tcl-664b7500abd51bfa6257c7e3e8fc5846d18d522b.tar.bz2 |
Add documentation. Do a better job of counting exactly which byte/character caused the encoding/decoding error
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.decls | 8 | ||||
-rw-r--r-- | generic/tcl.h | 5 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 31 | ||||
-rw-r--r-- | generic/tclDecls.h | 16 | ||||
-rw-r--r-- | generic/tclEncoding.c | 30 |
5 files changed, 51 insertions, 39 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index c2a4abd..8cd5bc9 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2425,12 +2425,12 @@ declare 656 { } declare 657 { - int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, Tcl_DString *dsPtr, int flags) + size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } declare 658 { - int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, Tcl_DString *dsPtr, int flags) + size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } diff --git a/generic/tcl.h b/generic/tcl.h index 38dda28..f783f4f 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2069,6 +2069,10 @@ typedef struct Tcl_EncodingType { * content. Otherwise, the number of chars * produced is controlled only by other limiting * factors. + * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of + * 0x00. Only valid for "utf-8", "wtf-8 and "cesu-8". + * This flag is implicit for external -> internal conversions, + * optional for internal -> external conversions. */ #define TCL_ENCODING_START 0x01 @@ -2076,6 +2080,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_STOPONERROR 0x04 #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 +#define TCL_ENCODING_MODIFIED 0x20 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index ee329ec..cd77e06 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -551,7 +551,7 @@ EncodingConvertfromObjCmd( int length; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ const char *stopOnError = NULL; - int result; + size_t result; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); @@ -563,7 +563,9 @@ EncodingConvertfromObjCmd( data = objv[2]; if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); - if (stopOnError[0] != '-' || stopOnError[1] != 's' + if (!stopOnError[0]) { + stopOnError = NULL; + } else if (stopOnError[0] != '-' || stopOnError[1] != 's' || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { goto encConvFromError; } @@ -578,10 +580,11 @@ EncodingConvertfromObjCmd( * Convert the string into a byte array in 'ds' */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); - result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, &ds, - stopOnError ? TCL_ENCODING_STOPONERROR : 0); - if (stopOnError && (result != TCL_OK)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d characters", Tcl_DStringLength(&ds))); + result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, + stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + if (stopOnError && (result != (size_t)-1)) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" + TCL_LL_MODIFIER "u byte%s", (long long)result, (result != 1)?"s":"")); Tcl_DStringFree(&ds); return TCL_ERROR; } @@ -628,7 +631,7 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - int result; + size_t result; const char *stopOnError = NULL; /* TODO - ADJUST OBJ INDICES WHEN ENSEMBLIFYING THIS */ @@ -643,7 +646,9 @@ EncodingConverttoObjCmd( data = objv[2]; if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); - if (stopOnError[0] != '-' || stopOnError[1] != 's' + if (!stopOnError[0]) { + stopOnError = NULL; + } else if (stopOnError[0] != '-' || stopOnError[1] != 's' || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { goto encConvToError; } @@ -659,10 +664,12 @@ EncodingConverttoObjCmd( */ stringPtr = TclGetStringFromObj(data, &length); - result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, &ds, - stopOnError ? TCL_ENCODING_STOPONERROR : 0); - if (stopOnError && (result != TCL_OK)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d bytes", Tcl_DStringLength(&ds))); + result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, + stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + if (stopOnError && (result != (size_t)-1)) { + result = Tcl_NumUtfChars(stringPtr, result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" + TCL_LL_MODIFIER "u character%s", (long long)result, (result != 1)?"s":"")); Tcl_DStringFree(&ds); return TCL_ERROR; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 6ba39d5..24760f9 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1938,13 +1938,13 @@ EXTERN const char * Tcl_UtfNext(const char *src); /* 656 */ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); /* 657 */ -EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, - Tcl_DString *dsPtr, int flags); +EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, + Tcl_DString *dsPtr); /* 658 */ -EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, - Tcl_DString *dsPtr, int flags); +EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, + Tcl_DString *dsPtr); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2637,8 +2637,8 @@ typedef struct TclStubs { int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ - int (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 657 */ - int (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 658 */ + size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 657 */ + size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ } TclStubs; extern const TclStubs *tclStubsPtr; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 72f7690..0bce51b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -511,7 +511,6 @@ FillEncodingFileMap(void) */ /* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ -#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */ #define TCL_ENCODING_LE 0x80 /* Little-endian encoding, for ucs-2/utf-16 only */ void @@ -1117,26 +1116,27 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, dstPtr, 0); + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, 0, dstPtr); return Tcl_DStringValue(dstPtr); } -int +size_t Tcl_ExternalToUtfDStringEx( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ - int flags) /* Conversion control flags. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1160,13 +1160,12 @@ Tcl_ExternalToUtfDStringEx( flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return result; + return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart); } - flags &= ~TCL_ENCODING_START; - src += srcRead; srcLen -= srcRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); @@ -1321,25 +1320,26 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_UtfToExternalDStringEx(encoding, src, srcLen, dstPtr, 0); + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, 0, dstPtr); return Tcl_DStringValue(dstPtr); } -int +size_t Tcl_UtfToExternalDStringEx( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ - int flags) /* Conversion control flags. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1355,23 +1355,23 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen < 0) { srcLen = strlen(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END | TCL_ENCODING_EXTERNAL; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + src += srcRead; if (result != TCL_CONVERT_NOSPACE) { if (encodingPtr->nullSize == 2) { Tcl_DStringSetLength(dstPtr, soFar + 1); } Tcl_DStringSetLength(dstPtr, soFar); - return result; + return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart); } flags &= ~TCL_ENCODING_START; - src += srcRead; srcLen -= srcRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); |