summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c84
1 files changed, 63 insertions, 21 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 95c59c0..179ca17 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -1206,7 +1206,10 @@ Tcl_ExternalToUtf(
* output buffer. */
{
const Encoding *encodingPtr;
- int result, srcRead, dstWrote, dstChars;
+ int result, srcRead, dstWrote, dstChars = 0;
+ int noTerminate = flags & TCL_ENCODING_NO_TERMINATE;
+ int charLimited = (flags & TCL_ENCODING_CHAR_LIMIT) && dstCharsPtr;
+ int maxChars = INT_MAX;
Tcl_EncodingState state;
if (encoding == NULL) {
@@ -1231,19 +1234,40 @@ Tcl_ExternalToUtf(
}
if (dstCharsPtr == NULL) {
dstCharsPtr = &dstChars;
+ flags &= ~TCL_ENCODING_CHAR_LIMIT;
+ } else if (charLimited) {
+ maxChars = *dstCharsPtr;
}
- /*
- * If there are any null characters in the middle of the buffer, they will
- * converted to the UTF-8 null character (\xC080). To get the actual \0 at
- * the end of the destination buffer, we need to append it manually.
- */
+ if (!noTerminate) {
+ /*
+ * If there are any null characters in the middle of the buffer,
+ * they will converted to the UTF-8 null character (\xC080). To get
+ * the actual \0 at the end of the destination buffer, we need to
+ * append it manually. First make room for it...
+ */
- dstLen--;
- result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
- flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
- dstCharsPtr);
- dst[*dstWrotePtr] = '\0';
+ dstLen--;
+ }
+ do {
+ int savedFlags = flags;
+ Tcl_EncodingState savedState = *statePtr;
+
+ result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
+ flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
+ dstCharsPtr);
+ if (*dstCharsPtr <= maxChars) {
+ break;
+ }
+ dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
+ flags = savedFlags;
+ *statePtr = savedState;
+ } while (1);
+ if (!noTerminate) {
+ /* ...and then append it */
+
+ dst[*dstWrotePtr] = '\0';
+ }
return result;
}
@@ -2107,6 +2131,9 @@ BinaryProc(
if (dstLen < 0) {
dstLen = 0;
}
+ if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) {
+ srcLen = *dstCharsPtr;
+ }
if (srcLen > dstLen) {
srcLen = dstLen;
result = TCL_CONVERT_NOSPACE;
@@ -2267,7 +2294,7 @@ UtfToUtfProc(
{
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
result = TCL_OK;
@@ -2278,11 +2305,14 @@ UtfToUtfProc(
if ((flags & TCL_ENCODING_END) == 0) {
srcClose -= TCL_UTF_MAX;
}
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
@@ -2378,9 +2408,12 @@ UnicodeToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
result = TCL_OK;
if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
result = TCL_CONVERT_MULTIBYTE;
@@ -2394,7 +2427,7 @@ UnicodeToUtfProc(
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
@@ -2562,12 +2595,15 @@ TableToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart, *prefixBytes;
- int result, byte, numChars;
+ int result, byte, numChars, charLimit = INT_MAX;
Tcl_UniChar ch;
const unsigned short *const *toUnicode;
const unsigned short *pageZero;
TableEncodingData *dataPtr = clientData;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
srcStart = src;
srcEnd = src + srcLen;
@@ -2579,7 +2615,7 @@ TableToUtfProc(
pageZero = toUnicode[0];
result = TCL_OK;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
@@ -2793,8 +2829,11 @@ Iso88591ToUtfProc(
{
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
- int result, numChars;
+ int result, numChars, charLimit = INT_MAX;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
srcStart = src;
srcEnd = src + srcLen;
@@ -2802,7 +2841,7 @@ Iso88591ToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
result = TCL_OK;
- for (numChars = 0; src < srcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
Tcl_UniChar ch;
if (dst > dstEnd) {
@@ -3018,9 +3057,12 @@ EscapeToUtfProc(
const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd;
const unsigned short *const *tableToUnicode;
const Encoding *encodingPtr;
- int state, result, numChars;
+ int state, result, numChars, charLimit = INT_MAX;
const char *dstStart, *dstEnd;
+ if (flags & TCL_ENCODING_CHAR_LIMIT) {
+ charLimit = *dstCharsPtr;
+ }
result = TCL_OK;
tablePrefixBytes = NULL; /* lint. */
tableToUnicode = NULL; /* lint. */
@@ -3038,7 +3080,7 @@ EscapeToUtfProc(
state = 0;
}
- for (numChars = 0; src < srcEnd; ) {
+ for (numChars = 0; src < srcEnd && numChars <= charLimit; ) {
int byte, hi, lo, ch;
if (dst > dstEnd) {