summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c119
1 files changed, 91 insertions, 28 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 2cc55d6..4edebcf 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -180,8 +180,9 @@ TCL_DECLARE_MUTEX(encodingMutex)
* the system encoding will be used to perform the conversion.
*/
-static Tcl_Encoding defaultEncoding;
-static Tcl_Encoding systemEncoding;
+static Tcl_Encoding defaultEncoding = NULL;
+static Tcl_Encoding systemEncoding = NULL;
+Tcl_Encoding tclIdentityEncoding = NULL;
/*
* The following variable is used in the sparse matrix code for a
@@ -567,7 +568,7 @@ TclInitEncodingSubsystem(void)
type.freeProc = NULL;
type.nullSize = 1;
type.clientData = NULL;
- Tcl_CreateEncoding(&type);
+ tclIdentityEncoding = Tcl_CreateEncoding(&type);
type.encodingName = "utf-8";
type.toUtfProc = UtfExtToUtfIntProc;
@@ -651,6 +652,10 @@ TclFinalizeEncodingSubsystem(void)
Tcl_MutexLock(&encodingMutex);
encodingsInitialized = 0;
FreeEncoding(systemEncoding);
+ systemEncoding = NULL;
+ defaultEncoding = NULL;
+ FreeEncoding(tclIdentityEncoding);
+ tclIdentityEncoding = NULL;
hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
while (hPtr != NULL) {
@@ -1201,7 +1206,10 @@ Tcl_ExternalToUtf(
* output buffer. */
{
const Encoding *encodingPtr;
- int result, srcRead, dstWrote, dstChars;
+ int result, srcRead, dstWrote, dstChars = 0;
+ int noTerminate = flags & TCL_ENCODING_NO_TERMINATE;
+ int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr;
+ int maxChars = INT_MAX;
Tcl_EncodingState state;
if (encoding == NULL) {
@@ -1226,19 +1234,40 @@ Tcl_ExternalToUtf(
}
if (dstCharsPtr == NULL) {
dstCharsPtr = &dstChars;
+ flags &= ~TCL_ENCODING_CHAR_LIMIT;
+ } else if (charLimited) {
+ maxChars = *dstCharsPtr;
}
- /*
- * If there are any null characters in the middle of the buffer, they will
- * converted to the UTF-8 null character (\xC080). To get the actual \0 at
- * the end of the destination buffer, we need to append it manually.
- */
+ if (!noTerminate) {
+ /*
+ * If there are any null characters in the middle of the buffer,
+ * they will converted to the UTF-8 null character (\xC080). To get
+ * the actual \0 at the end of the destination buffer, we need to
+ * append it manually. First make room for it...
+ */
- dstLen--;
- result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
- flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
- dstCharsPtr);
- dst[*dstWrotePtr] = '\0';
+ dstLen--;
+ }
+ do {
+ int savedFlags = flags;
+ Tcl_EncodingState savedState = *statePtr;
+
+ result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
+ flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
+ dstCharsPtr);
+ if (*dstCharsPtr <= maxChars) {
+ break;
+ }
+ dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
+ flags = savedFlags;
+ *statePtr = savedState;
+ } while (1);
+ if (!noTerminate) {
+ /* ...and then append it */
+
+ dst[*dstWrotePtr] = '\0';
+ }
return result;
}
@@ -1421,7 +1450,7 @@ Tcl_UtfToExternal(
*
* Side effects:
* The absolute pathname for the application is computed and stored to be
- * returned later be [info nameofexecutable].
+ * returned later by [info nameofexecutable].
*
*---------------------------------------------------------------------------
*/
@@ -2102,6 +2131,9 @@ BinaryProc(
if (dstLen < 0) {
dstLen = 0;
}
+ if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
+ srcLen = *dstCharsPtr;
+ }
if (srcLen > dstLen) {
srcLen = dstLen;
result = TCL_CONVERT_NOSPACE;
@@ -2262,7 +2294,7 @@ UtfToUtfProc(
{
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
result = TCL_OK;
@@ -2273,11 +2305,14 @@ UtfToUtfProc(
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
@@ -2299,7 +2334,7 @@ UtfToUtfProc(
*dst++ = *src++;
} else if (pureNullMode == 1 && UCHAR(*src) == 0xc0 &&
- UCHAR(*(src+1)) == 0x80) {
+ (src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) {
/*
* Convert 0xc080 to real nulls when we are in output mode.
*/
@@ -2373,9 +2408,12 @@ UnicodeToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
result = TCL_OK;
if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
result = TCL_CONVERT_MULTIBYTE;
@@ -2389,7 +2427,7 @@ UnicodeToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
@@ -2487,22 +2525,35 @@ UtfToUnicodeProc(
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
- }
+ }
src += TclUtfToUniChar(src, &ch);
/*
* Need to handle this in a way that won't cause misalignment by
* casting dst to a Tcl_UniChar. [Bug 1122671]
- * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
*/
#ifdef WORDS_BIGENDIAN
+#if TCL_UTF_MAX > 4
+ *dst++ = (ch >> 24);
+ *dst++ = ((ch >> 16) & 0xFF);
+ *dst++ = ((ch >> 8) & 0xFF);
+ *dst++ = (ch & 0xFF);
+#else
*dst++ = (ch >> 8);
*dst++ = (ch & 0xFF);
+#endif
+#else
+#if TCL_UTF_MAX > 4
+ *dst++ = (ch & 0xFF);
+ *dst++ = ((ch >> 8) & 0xFF);
+ *dst++ = ((ch >> 16) & 0xFF);
+ *dst++ = (ch >> 24);
#else
*dst++ = (ch & 0xFF);
*dst++ = (ch >> 8);
#endif
+#endif
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
@@ -2557,12 +2608,15 @@ TableToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart, *prefixBytes;
- int result, byte, numChars;
+ int result, byte, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
const unsigned short *const *toUnicode;
const unsigned short *pageZero;
TableEncodingData *dataPtr = clientData;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
srcStart = src;
srcEnd = src + srcLen;
@@ -2574,7 +2628,7 @@ TableToUtfProc(
pageZero = toUnicode[0];
result = TCL_OK;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
@@ -2788,8 +2842,11 @@ Iso88591ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
srcStart = src;
srcEnd = src + srcLen;
@@ -2797,7 +2854,7 @@ Iso88591ToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
result = TCL_OK;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
Tcl_UniChar ch;
if (dst > dstEnd) {
@@ -2958,7 +3015,9 @@ TableFreeProc(
*/
ckfree(dataPtr->toUnicode);
+ dataPtr->toUnicode = NULL;
ckfree(dataPtr->fromUnicode);
+ dataPtr->fromUnicode = NULL;
ckfree(dataPtr);
}
@@ -3011,9 +3070,12 @@ EscapeToUtfProc(
const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
const unsigned short *const *tableToUnicode;
const Encoding *encodingPtr;
- int state, result, numChars;
+ int state, result, numChars, charLimit = INT_MAX;
const char *dstStart, *dstEnd;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
result = TCL_OK;
tablePrefixBytes = NULL; /* lint. */
tableToUnicode = NULL; /* lint. */
@@ -3031,7 +3093,7 @@ EscapeToUtfProc(
state = 0;
}
- for (numChars = 0; src < srcEnd; ) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; ) {
int byte, hi, lo, ch;
if (dst > dstEnd) {
@@ -3431,6 +3493,7 @@ EscapeFreeProc(
subTablePtr = dataPtr->subTables;
for (i = 0; i < dataPtr->numSubTables; i++) {
FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr);
+ subTablePtr->encodingPtr = NULL;
subTablePtr++;
}
}