diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-26 07:29:02 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-04-26 07:29:02 (GMT) |
| commit | a4c35fb82f99d990ca1ef877cb9917c7f55151ff (patch) | |
| tree | cdd02327448cf2c0393cbf3cea4fd48bb17e52c4 /generic/tclEncoding.c | |
| parent | 81a213ed4ce77b15fbc6d5d97cc1235b689ad482 (diff) | |
| parent | e12b1646c6f675cf09d5f1a72d4ecdffa5da7396 (diff) | |
| download | tcl-a4c35fb82f99d990ca1ef877cb9917c7f55151ff.zip tcl-a4c35fb82f99d990ca1ef877cb9917c7f55151ff.tar.gz tcl-a4c35fb82f99d990ca1ef877cb9917c7f55151ff.tar.bz2 | |
Merge 8.7
Diffstat (limited to 'generic/tclEncoding.c')
| -rw-r--r-- | generic/tclEncoding.c | 166 |
1 files changed, 131 insertions, 35 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6fe81e8..fd0386c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -370,7 +370,7 @@ Tcl_SetEncodingSearchPath( { int dummy; - if (TCL_ERROR == Tcl_ListObjLength(NULL, searchPath, &dummy)) { + if (TCL_ERROR == TclListObjLength(NULL, searchPath, &dummy)) { return TCL_ERROR; } TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL); @@ -417,7 +417,7 @@ TclSetLibraryPath( { int dummy; - if (TCL_ERROR == Tcl_ListObjLength(NULL, path, &dummy)) { + if (TCL_ERROR == TclListObjLength(NULL, path, &dummy)) { return; } TclSetProcessGlobalValue(&libraryPath, path, NULL); @@ -456,7 +456,7 @@ FillEncodingFileMap(void) searchPath = Tcl_GetEncodingSearchPath(); Tcl_IncrRefCount(searchPath); - Tcl_ListObjLength(NULL, searchPath, &numDirs); + TclListObjLength(NULL, searchPath, &numDirs); map = Tcl_NewDictObj(); Tcl_IncrRefCount(map); @@ -480,7 +480,7 @@ FillEncodingFileMap(void) Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc", &readableFiles); - Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev); + TclListObjGetElements(NULL, matchFileList, &numFiles, &filev); for (j=0; j<numFiles; j++) { Tcl_Obj *encodingName, *fileObj; @@ -515,10 +515,8 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ -/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ -#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ #define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ @@ -728,7 +726,7 @@ Tcl_GetDefaultEncodingDir(void) int numDirs; Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath(); - Tcl_ListObjLength(NULL, searchPath, &numDirs); + TclListObjLength(NULL, searchPath, &numDirs); if (numDirs == 0) { return NULL; } @@ -1144,10 +1142,56 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + return Tcl_DStringValue(dstPtr); +} + + +/* + *------------------------------------------------------------------------- + * + * Tcl_ExternalToUtfDStringEx -- + * + * Convert a source buffer from the specified encoding into UTF-8. +* The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in utf-8. + * Possible flags values: + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +int +Tcl_ExternalToUtfDStringEx( + Tcl_Encoding encoding, /* The encoding for the source string, or NULL + * for the default system encoding. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for + * encoding-specific string length. */ + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1164,7 +1208,7 @@ Tcl_ExternalToUtfDString( srcLen = encodingPtr->lengthProc(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; } @@ -1177,7 +1221,7 @@ Tcl_ExternalToUtfDString( src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return Tcl_DStringValue(dstPtr); + return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); } flags &= ~TCL_ENCODING_START; srcLen -= srcRead; @@ -1336,10 +1380,57 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + return Tcl_DStringValue(dstPtr); +} + + +/* + *------------------------------------------------------------------------- + * + * Tcl_UtfToExternalDStringEx -- + * + * Convert a source buffer from UTF-8 to the specified encoding. + * The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in the + * target encoding. + * Possible flags values: + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +int +Tcl_UtfToExternalDStringEx( + Tcl_Encoding encoding, /* The encoding for the converted string, or + * NULL for the default system encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for + * strlen(). */ + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1355,7 +1446,7 @@ Tcl_UtfToExternalDString( } else if (srcLen < 0) { srcLen = strlen(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, @@ -1368,7 +1459,7 @@ Tcl_UtfToExternalDString( while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return Tcl_DStringValue(dstPtr); + return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); } flags &= ~TCL_ENCODING_START; @@ -1485,14 +1576,15 @@ Tcl_UtfToExternal( *--------------------------------------------------------------------------- */ #undef Tcl_FindExecutable -void +const char * Tcl_FindExecutable( const char *argv0) /* The value of the application's argv[0] * (native). */ { - Tcl_InitSubsystems(); + const char *version = Tcl_InitSubsystems(); TclpSetInitialEncodings(); TclpFindExecutable(argv0); + return version; } /* @@ -1528,7 +1620,7 @@ OpenEncodingFileChannel( Tcl_Channel chan = NULL; int i, numDirs; - Tcl_ListObjGetElements(NULL, searchPath, &numDirs, &dir); + TclListObjGetElements(NULL, searchPath, &numDirs, &dir); Tcl_IncrRefCount(nameObj); Tcl_AppendToObj(fileNameObj, ".enc", -1); Tcl_IncrRefCount(fileNameObj); @@ -2195,6 +2287,12 @@ BinaryProc( *------------------------------------------------------------------------- */ +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) +# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN) +#else +# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) +#endif + static int UtfToUtfProc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ @@ -2277,7 +2375,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if (flags & TCL_ENCODING_STOPONERROR) { + if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -2292,7 +2390,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR) + if ((len < 2) && (ch != 0) && STOPONERROR && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; @@ -2317,14 +2415,12 @@ UtfToUtfProc( len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { - if (flags & TCL_ENCODING_STOPONERROR) { + + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } - if (!(flags & TCL_ENCODING_MODIFIED)) { - ch = 0xFFFD; - } cesu8: *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF); @@ -2335,7 +2431,7 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); ch = low; } else if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2521,7 +2617,7 @@ UtfToUtf32Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2724,7 +2820,7 @@ UtfToUtf16Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2800,7 +2896,7 @@ UtfToUcs2Proc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; -#if TCL_UTF_MAX <= 3 +#if TCL_UTF_MAX < 4 int len; #endif Tcl_UniChar ch = 0; @@ -2831,7 +2927,7 @@ UtfToUcs2Proc( result = TCL_CONVERT_NOSPACE; break; } -#if TCL_UTF_MAX <= 3 +#if TCL_UTF_MAX < 4 src += (len = TclUtfToUniChar(src, &ch)); if ((ch >= 0xD800) && (len < 3)) { src += TclUtfToUniChar(src, &ch); @@ -2944,7 +3040,7 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; } @@ -3060,7 +3156,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3244,15 +3340,15 @@ Iso88591FromUtfProc( */ if (ch > 0xFF -#if TCL_UTF_MAX <= 3 +#if TCL_UTF_MAX < 4 || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } -#if TCL_UTF_MAX <= 3 +#if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (len < 3)) { len = 4; } @@ -3475,7 +3571,7 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if ((flags & TCL_ENCODING_STOPONERROR) == 0) { + if (!STOPONERROR) { /* * Skip the unknown escape sequence. */ @@ -3650,7 +3746,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3915,7 +4011,7 @@ InitializeEncodingSearchPath( Tcl_IncrRefCount(searchPathObj); libPathObj = TclGetLibraryPath(); Tcl_IncrRefCount(libPathObj); - Tcl_ListObjLength(NULL, libPathObj, &numDirs); + TclListObjLength(NULL, libPathObj, &numDirs); for (i = 0; i < numDirs; i++) { Tcl_Obj *directoryObj, *pathObj; |
