summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c166
1 files changed, 131 insertions, 35 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 6fe81e8..fd0386c 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -370,7 +370,7 @@ Tcl_SetEncodingSearchPath(
{
int dummy;
- if (TCL_ERROR == Tcl_ListObjLength(NULL, searchPath, &dummy)) {
+ if (TCL_ERROR == TclListObjLength(NULL, searchPath, &dummy)) {
return TCL_ERROR;
}
TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL);
@@ -417,7 +417,7 @@ TclSetLibraryPath(
{
int dummy;
- if (TCL_ERROR == Tcl_ListObjLength(NULL, path, &dummy)) {
+ if (TCL_ERROR == TclListObjLength(NULL, path, &dummy)) {
return;
}
TclSetProcessGlobalValue(&libraryPath, path, NULL);
@@ -456,7 +456,7 @@ FillEncodingFileMap(void)
searchPath = Tcl_GetEncodingSearchPath();
Tcl_IncrRefCount(searchPath);
- Tcl_ListObjLength(NULL, searchPath, &numDirs);
+ TclListObjLength(NULL, searchPath, &numDirs);
map = Tcl_NewDictObj();
Tcl_IncrRefCount(map);
@@ -480,7 +480,7 @@ FillEncodingFileMap(void)
Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc",
&readableFiles);
- Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev);
+ TclListObjGetElements(NULL, matchFileList, &numFiles, &filev);
for (j=0; j<numFiles; j++) {
Tcl_Obj *encodingName, *fileObj;
@@ -515,10 +515,8 @@ FillEncodingFileMap(void)
*---------------------------------------------------------------------------
*/
-/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
* TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */
-#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */
#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */
#define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */
@@ -728,7 +726,7 @@ Tcl_GetDefaultEncodingDir(void)
int numDirs;
Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath();
- Tcl_ListObjLength(NULL, searchPath, &numDirs);
+ TclListObjLength(NULL, searchPath, &numDirs);
if (numDirs == 0) {
return NULL;
}
@@ -1144,10 +1142,56 @@ Tcl_ExternalToUtfDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
+ Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+ return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_ExternalToUtfDStringEx --
+ *
+ * Convert a source buffer from the specified encoding into UTF-8.
+* The parameter flags controls the behavior, if any of the bytes in
+ * the source buffer are invalid or cannot be represented in utf-8.
+ * Possible flags values:
+ * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but
+ * return the first error position (Default in Tcl 9.0).
+ * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
+ * fallback character. Always return -1 (Default in Tcl 8.7).
+ * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
+ * Only valid for "utf-8" and "cesu-8". This flag may be used together
+ * with the other flags.
+ *
+ * Results:
+ * The converted bytes are stored in the DString, which is then NULL
+ * terminated in an encoding-specific manner. The return value is
+ * the error position in the source string or -1 if no conversion error
+ * is reported.
+ *
+ * Side effects:
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+int
+Tcl_ExternalToUtfDStringEx(
+ Tcl_Encoding encoding, /* The encoding for the source string, or NULL
+ * for the default system encoding. */
+ const char *src, /* Source string in specified encoding. */
+ int srcLen, /* Source string length in bytes, or < 0 for
+ * encoding-specific string length. */
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ * converted string is stored. */
+{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ int dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ const char *srcStart = src;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1164,7 +1208,7 @@ Tcl_ExternalToUtfDString(
srcLen = encodingPtr->lengthProc(src);
}
- flags = TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START | TCL_ENCODING_END;
if (encodingPtr->toUtfProc == UtfToUtfProc) {
flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
}
@@ -1177,7 +1221,7 @@ Tcl_ExternalToUtfDString(
src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
Tcl_DStringSetLength(dstPtr, soFar);
- return Tcl_DStringValue(dstPtr);
+ return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart);
}
flags &= ~TCL_ENCODING_START;
srcLen -= srcRead;
@@ -1336,10 +1380,57 @@ Tcl_UtfToExternalDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
+ Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+ return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_UtfToExternalDStringEx --
+ *
+ * Convert a source buffer from UTF-8 to the specified encoding.
+ * The parameter flags controls the behavior, if any of the bytes in
+ * the source buffer are invalid or cannot be represented in the
+ * target encoding.
+ * Possible flags values:
+ * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but
+ * return the first error position (Default in Tcl 9.0).
+ * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
+ * fallback character. Always return -1 (Default in Tcl 8.7).
+ * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
+ * Only valid for "utf-8" and "cesu-8". This flag may be used together
+ * with the other flags.
+ *
+ * Results:
+ * The converted bytes are stored in the DString, which is then NULL
+ * terminated in an encoding-specific manner. The return value is
+ * the error position in the source string or -1 if no conversion error
+ * is reported.
+ *
+ * Side effects:
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+int
+Tcl_UtfToExternalDStringEx(
+ Tcl_Encoding encoding, /* The encoding for the converted string, or
+ * NULL for the default system encoding. */
+ const char *src, /* Source string in UTF-8. */
+ int srcLen, /* Source string length in bytes, or < 0 for
+ * strlen(). */
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ * converted string is stored. */
+{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ int dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ const char *srcStart = src;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1355,7 +1446,7 @@ Tcl_UtfToExternalDString(
} else if (srcLen < 0) {
srcLen = strlen(src);
}
- flags = TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START | TCL_ENCODING_END;
while (1) {
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
srcLen, flags, &state, dst, dstLen,
@@ -1368,7 +1459,7 @@ Tcl_UtfToExternalDString(
while (i >= soFar) {
Tcl_DStringSetLength(dstPtr, i--);
}
- return Tcl_DStringValue(dstPtr);
+ return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart);
}
flags &= ~TCL_ENCODING_START;
@@ -1485,14 +1576,15 @@ Tcl_UtfToExternal(
*---------------------------------------------------------------------------
*/
#undef Tcl_FindExecutable
-void
+const char *
Tcl_FindExecutable(
const char *argv0) /* The value of the application's argv[0]
* (native). */
{
- Tcl_InitSubsystems();
+ const char *version = Tcl_InitSubsystems();
TclpSetInitialEncodings();
TclpFindExecutable(argv0);
+ return version;
}
/*
@@ -1528,7 +1620,7 @@ OpenEncodingFileChannel(
Tcl_Channel chan = NULL;
int i, numDirs;
- Tcl_ListObjGetElements(NULL, searchPath, &numDirs, &dir);
+ TclListObjGetElements(NULL, searchPath, &numDirs, &dir);
Tcl_IncrRefCount(nameObj);
Tcl_AppendToObj(fileNameObj, ".enc", -1);
Tcl_IncrRefCount(fileNameObj);
@@ -2195,6 +2287,12 @@ BinaryProc(
*-------------------------------------------------------------------------
*/
+#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
+# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN)
+#else
+# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR)
+#endif
+
static int
UtfToUtfProc(
ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */
@@ -2277,7 +2375,7 @@ UtfToUtfProc(
*/
if (flags & TCL_ENCODING_MODIFIED) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
result = TCL_CONVERT_MULTIBYTE;
break;
}
@@ -2292,7 +2390,7 @@ UtfToUtfProc(
int low;
const char *saveSrc = src;
size_t len = TclUtfToUCS4(src, &ch);
- if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)
+ if ((len < 2) && (ch != 0) && STOPONERROR
&& (flags & TCL_ENCODING_MODIFIED)) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -2317,14 +2415,12 @@ UtfToUtfProc(
len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
}
- if (!(flags & TCL_ENCODING_MODIFIED)) {
- ch = 0xFFFD;
- }
cesu8:
*dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
@@ -2335,7 +2431,7 @@ UtfToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
ch = low;
} else if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
@@ -2521,7 +2617,7 @@ UtfToUtf32Proc(
}
len = TclUtfToUCS4(src, &ch);
if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -2724,7 +2820,7 @@ UtfToUtf16Proc(
}
len = TclUtfToUCS4(src, &ch);
if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -2800,7 +2896,7 @@ UtfToUcs2Proc(
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
int len;
#endif
Tcl_UniChar ch = 0;
@@ -2831,7 +2927,7 @@ UtfToUcs2Proc(
result = TCL_CONVERT_NOSPACE;
break;
}
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
src += (len = TclUtfToUniChar(src, &ch));
if ((ch >= 0xD800) && (len < 3)) {
src += TclUtfToUniChar(src, &ch);
@@ -2944,7 +3040,7 @@ TableToUtfProc(
ch = pageZero[byte];
}
if ((ch == 0) && (byte != 0)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -3060,7 +3156,7 @@ TableFromUtfProc(
word = fromUnicode[(ch >> 8)][ch & 0xFF];
if ((word == 0) && (ch != 0)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -3244,15 +3340,15 @@ Iso88591FromUtfProc(
*/
if (ch > 0xFF
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
|| ((ch >= 0xD800) && (len < 3))
#endif
) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
if ((ch >= 0xD800) && (len < 3)) {
len = 4;
}
@@ -3475,7 +3571,7 @@ EscapeToUtfProc(
if ((checked == dataPtr->numSubTables + 2)
|| (flags & TCL_ENCODING_END)) {
- if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
+ if (!STOPONERROR) {
/*
* Skip the unknown escape sequence.
*/
@@ -3650,7 +3746,7 @@ EscapeFromUtfProc(
if (word == 0) {
state = oldState;
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -3915,7 +4011,7 @@ InitializeEncodingSearchPath(
Tcl_IncrRefCount(searchPathObj);
libPathObj = TclGetLibraryPath();
Tcl_IncrRefCount(libPathObj);
- Tcl_ListObjLength(NULL, libPathObj, &numDirs);
+ TclListObjLength(NULL, libPathObj, &numDirs);
for (i = 0; i < numDirs; i++) {
Tcl_Obj *directoryObj, *pathObj;