summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c199
1 files changed, 122 insertions, 77 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 2548b73..6d32676 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src);
* convert between various character sets and UTF-8.
*/
-typedef struct Encoding {
+typedef struct {
char *name; /* Name of encoding. Malloced because (1) hash
* table entry that owns this encoding may be
* freed prior to this encoding being freed,
@@ -46,7 +46,7 @@ typedef struct Encoding {
* nullSize is 2, this is a function that
* returns the number of bytes in a 0x0000
* terminated string. */
- int refCount; /* Number of uses of this structure. */
+ size_t refCount; /* Number of uses of this structure. */
Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */
} Encoding;
@@ -57,7 +57,7 @@ typedef struct Encoding {
* encoding.
*/
-typedef struct TableEncodingData {
+typedef struct {
int fallback; /* Character (in this encoding) to substitute
* when this encoding cannot represent a UTF-8
* character. */
@@ -91,7 +91,7 @@ typedef struct TableEncodingData {
* for switching character sets.
*/
-typedef struct EscapeSubTable {
+typedef struct {
unsigned sequenceLen; /* Length of following string. */
char sequence[16]; /* Escape code that marks this encoding. */
char name[32]; /* Name for encoding. */
@@ -100,7 +100,7 @@ typedef struct EscapeSubTable {
* yet. */
} EscapeSubTable;
-typedef struct EscapeEncodingData {
+typedef struct {
int fallback; /* Character (in this encoding) to substitute
* when this encoding cannot represent a UTF-8
* character. */
@@ -279,6 +279,21 @@ static int Iso88591ToUtfProc(ClientData clientData,
static const Tcl_ObjType encodingType = {
"encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
};
+#define EncodingSetIntRep(objPtr, encoding) \
+ do { \
+ Tcl_ObjIntRep ir; \
+ ir.twoPtrValue.ptr1 = (encoding); \
+ ir.twoPtrValue.ptr2 = NULL; \
+ Tcl_StoreIntRep((objPtr), &encodingType, &ir); \
+ } while (0)
+
+#define EncodingGetIntRep(objPtr, encoding) \
+ do { \
+ const Tcl_ObjIntRep *irPtr; \
+ irPtr = Tcl_FetchIntRep ((objPtr), &encodingType); \
+ (encoding) = irPtr ? irPtr->twoPtrValue.ptr1 : NULL; \
+ } while (0)
+
/*
*----------------------------------------------------------------------
@@ -305,17 +320,16 @@ Tcl_GetEncodingFromObj(
Tcl_Obj *objPtr,
Tcl_Encoding *encodingPtr)
{
- const char *name = Tcl_GetString(objPtr);
-
- if (objPtr->typePtr != &encodingType) {
- Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
+ Tcl_Encoding encoding;
+ const char *name = TclGetString(objPtr);
+ EncodingGetIntRep(objPtr, encoding);
+ if (encoding == NULL) {
+ encoding = Tcl_GetEncoding(interp, name);
if (encoding == NULL) {
return TCL_ERROR;
}
- TclFreeIntRep(objPtr);
- objPtr->internalRep.twoPtrValue.ptr1 = encoding;
- objPtr->typePtr = &encodingType;
+ EncodingSetIntRep(objPtr, encoding);
}
*encodingPtr = Tcl_GetEncoding(NULL, name);
return TCL_OK;
@@ -335,8 +349,10 @@ static void
FreeEncodingIntRep(
Tcl_Obj *objPtr)
{
- Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1);
- objPtr->typePtr = NULL;
+ Tcl_Encoding encoding;
+
+ EncodingGetIntRep(objPtr, encoding);
+ Tcl_FreeEncoding(encoding);
}
/*
@@ -354,7 +370,8 @@ DupEncodingIntRep(
Tcl_Obj *srcPtr,
Tcl_Obj *dupPtr)
{
- dupPtr->internalRep.twoPtrValue.ptr1 = Tcl_GetEncoding(NULL, srcPtr->bytes);
+ Tcl_Encoding encoding = Tcl_GetEncoding(NULL, TclGetString(srcPtr));
+ EncodingSetIntRep(dupPtr, encoding);
}
/*
@@ -562,7 +579,7 @@ TclInitEncodingSubsystem(void)
* formed UTF-8 into a properly formed stream.
*/
- type.encodingName = "identity";
+ type.encodingName = NULL;
type.toUtfProc = BinaryProc;
type.fromUtfProc = BinaryProc;
type.freeProc = NULL;
@@ -692,6 +709,7 @@ TclFinalizeEncodingSubsystem(void)
*-------------------------------------------------------------------------
*/
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
const char *
Tcl_GetDefaultEncodingDir(void)
{
@@ -704,7 +722,7 @@ Tcl_GetDefaultEncodingDir(void)
}
Tcl_ListObjIndex(NULL, searchPath, 0, &first);
- return Tcl_GetString(first);
+ return TclGetString(first);
}
/*
@@ -735,6 +753,7 @@ Tcl_SetDefaultEncodingDir(
Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory);
Tcl_SetEncodingSearchPath(searchPath);
}
+#endif
/*
*-------------------------------------------------------------------------
@@ -843,18 +862,16 @@ FreeEncoding(
if (encodingPtr == NULL) {
return;
}
- if (encodingPtr->refCount<=0) {
- Tcl_Panic("FreeEncoding: refcount problem !!!");
- }
- encodingPtr->refCount--;
- if (encodingPtr->refCount == 0) {
+ if (encodingPtr->refCount-- <= 1) {
if (encodingPtr->freeProc != NULL) {
encodingPtr->freeProc(encodingPtr->clientData);
}
if (encodingPtr->hPtr != NULL) {
Tcl_DeleteHashEntry(encodingPtr->hPtr);
}
- ckfree(encodingPtr->name);
+ if (encodingPtr->name) {
+ ckfree(encodingPtr->name);
+ }
ckfree(encodingPtr);
}
}
@@ -1043,9 +1060,24 @@ Tcl_CreateEncoding(
const Tcl_EncodingType *typePtr)
/* The encoding type. */
{
+ Encoding *encodingPtr = ckalloc(sizeof(Encoding));
+ encodingPtr->name = NULL;
+ encodingPtr->toUtfProc = typePtr->toUtfProc;
+ encodingPtr->fromUtfProc = typePtr->fromUtfProc;
+ encodingPtr->freeProc = typePtr->freeProc;
+ encodingPtr->nullSize = typePtr->nullSize;
+ encodingPtr->clientData = typePtr->clientData;
+ if (typePtr->nullSize == 1) {
+ encodingPtr->lengthProc = (LengthProc *) strlen;
+ } else {
+ encodingPtr->lengthProc = (LengthProc *) unilen;
+ }
+ encodingPtr->refCount = 1;
+ encodingPtr->hPtr = NULL;
+
+ if (typePtr->encodingName) {
Tcl_HashEntry *hPtr;
int isNew;
- Encoding *encodingPtr;
char *name;
Tcl_MutexLock(&encodingMutex);
@@ -1056,30 +1088,17 @@ Tcl_CreateEncoding(
* reference goes away.
*/
- encodingPtr = Tcl_GetHashValue(hPtr);
- encodingPtr->hPtr = NULL;
+ Encoding *replaceMe = Tcl_GetHashValue(hPtr);
+ replaceMe->hPtr = NULL;
}
name = ckalloc(strlen(typePtr->encodingName) + 1);
-
- encodingPtr = ckalloc(sizeof(Encoding));
encodingPtr->name = strcpy(name, typePtr->encodingName);
- encodingPtr->toUtfProc = typePtr->toUtfProc;
- encodingPtr->fromUtfProc = typePtr->fromUtfProc;
- encodingPtr->freeProc = typePtr->freeProc;
- encodingPtr->nullSize = typePtr->nullSize;
- encodingPtr->clientData = typePtr->clientData;
- if (typePtr->nullSize == 1) {
- encodingPtr->lengthProc = (LengthProc *) strlen;
- } else {
- encodingPtr->lengthProc = (LengthProc *) unilen;
- }
- encodingPtr->refCount = 1;
encodingPtr->hPtr = hPtr;
Tcl_SetHashValue(hPtr, encodingPtr);
Tcl_MutexUnlock(&encodingMutex);
-
+ }
return (Tcl_Encoding) encodingPtr;
}
@@ -1518,10 +1537,10 @@ OpenEncodingFileChannel(
}
}
if (!verified) {
- const char *dirString = Tcl_GetString(directory);
+ const char *dirString = TclGetString(directory);
for (i=0; i<numDirs && !verified; i++) {
- if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) {
+ if (strcmp(dirString, TclGetString(dir[i])) == 0) {
verified = 1;
}
}
@@ -1762,7 +1781,7 @@ LoadTableEncoding(
const char *p;
Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
- p = Tcl_GetString(objPtr);
+ p = TclGetString(objPtr);
hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])];
dataPtr->toUnicode[hi] = pageMemPtr;
p += 2;
@@ -2055,7 +2074,7 @@ LoadEscapeEncoding(
dataPtr->numSubTables =
Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData),
- (size_t) Tcl_DStringLength(&escapeData));
+ Tcl_DStringLength(&escapeData));
Tcl_DStringFree(&escapeData);
memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
@@ -2296,8 +2315,11 @@ UtfToUtfProc(
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
- Tcl_UniChar ch = 0;
+ Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ if (flags & TCL_ENCODING_START) {
+ *statePtr = 0;
+ }
result = TCL_OK;
srcStart = src;
@@ -2349,12 +2371,19 @@ UtfToUtfProc(
* incomplete char its bytes are made to represent themselves.
*/
- ch = (unsigned char) *src;
+ *chPtr = (unsigned char) *src;
src += 1;
- dst += Tcl_UniCharToUtf(ch, dst);
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
} else {
- src += TclUtfToUniChar(src, &ch);
- dst += Tcl_UniCharToUtf(ch, dst);
+ int len = TclUtfToUniChar(src, chPtr);
+ src += len;
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
+#if TCL_UTF_MAX <= 4
+ if (!len) {
+ src += TclUtfToUniChar(src, chPtr);
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
+ }
+#endif
}
}
@@ -2410,8 +2439,11 @@ UnicodeToUtfProc(
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
int result, numChars, charLimit = INT_MAX;
- Tcl_UniChar ch = 0;
+ Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ if (flags & TCL_ENCODING_START) {
+ *statePtr = 0;
+ }
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
}
@@ -2439,11 +2471,11 @@ UnicodeToUtfProc(
* Tcl_UniChar-size data.
*/
- ch = *(Tcl_UniChar *)src;
- if (ch && ch < 0x80) {
- *dst++ = (ch & 0xFF);
+ *chPtr = *(Tcl_UniChar *)src;
+ if (*chPtr && *chPtr < 0x80) {
+ *dst++ = (*chPtr & 0xFF);
} else {
- dst += Tcl_UniCharToUtf(ch, dst);
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
}
src += sizeof(Tcl_UniChar);
}
@@ -2500,8 +2532,11 @@ UtfToUnicodeProc(
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
- Tcl_UniChar ch = 0;
+ Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ if (flags & TCL_ENCODING_START) {
+ *statePtr = 0;
+ }
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -2527,7 +2562,7 @@ UtfToUnicodeProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- src += TclUtfToUniChar(src, &ch);
+ src += TclUtfToUniChar(src, chPtr);
/*
* Need to handle this in a way that won't cause misalignment by
@@ -2536,23 +2571,23 @@ UtfToUnicodeProc(
#ifdef WORDS_BIGENDIAN
#if TCL_UTF_MAX > 4
- *dst++ = (ch >> 24);
- *dst++ = ((ch >> 16) & 0xFF);
- *dst++ = ((ch >> 8) & 0xFF);
- *dst++ = (ch & 0xFF);
+ *dst++ = (*chPtr >> 24);
+ *dst++ = ((*chPtr >> 16) & 0xFF);
+ *dst++ = ((*chPtr >> 8) & 0xFF);
+ *dst++ = (*chPtr & 0xFF);
#else
- *dst++ = (ch >> 8);
- *dst++ = (ch & 0xFF);
+ *dst++ = (*chPtr >> 8);
+ *dst++ = (*chPtr & 0xFF);
#endif
#else
#if TCL_UTF_MAX > 4
- *dst++ = (ch & 0xFF);
- *dst++ = ((ch >> 8) & 0xFF);
- *dst++ = ((ch >> 16) & 0xFF);
- *dst++ = (ch >> 24);
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = ((*chPtr >> 8) & 0xFF);
+ *dst++ = ((*chPtr >> 16) & 0xFF);
+ *dst++ = (*chPtr >> 24);
#else
- *dst++ = (ch & 0xFF);
- *dst++ = (ch >> 8);
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (*chPtr >> 8);
#endif
#endif
}
@@ -2754,7 +2789,7 @@ TableFromUtfProc(
}
len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
/*
* This prevents a crash condition. More evaluation is required for
* full support of int Tcl_UniChar. [Bug 1004065]
@@ -2763,6 +2798,10 @@ TableFromUtfProc(
if (ch & 0xffff0000) {
word = 0;
} else
+#else
+ if (!len) {
+ word = 0;
+ } else
#endif
word = fromUnicode[(ch >> 8)][ch & 0xff];
@@ -2960,12 +2999,18 @@ Iso88591FromUtfProc(
* Check for illegal characters.
*/
- if (ch > 0xff) {
+ if (ch > 0xff
+#if TCL_UTF_MAX <= 4
+ || !len
+#endif
+ ) {
if (flags & TCL_ENCODING_STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
-
+#if TCL_UTF_MAX <= 4
+ if (!len) len = 4;
+#endif
/*
* Plunge on, using '?' as a fallback character.
*/
@@ -3599,11 +3644,11 @@ unilen(
static void
InitializeEncodingSearchPath(
char **valuePtr,
- int *lengthPtr,
+ unsigned int *lengthPtr,
Tcl_Encoding *encodingPtr)
{
const char *bytes;
- int i, numDirs, numBytes;
+ int i, numDirs;
Tcl_Obj *libPathObj, *encodingObj, *searchPathObj;
TclNewLiteralStringObj(encodingObj, "encoding");
@@ -3633,11 +3678,11 @@ InitializeEncodingSearchPath(
if (*encodingPtr) {
((Encoding *)(*encodingPtr))->refCount++;
}
- bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes);
+ bytes = TclGetString(searchPathObj);
- *lengthPtr = numBytes;
- *valuePtr = ckalloc(numBytes + 1);
- memcpy(*valuePtr, bytes, (size_t) numBytes + 1);
+ *lengthPtr = searchPathObj->length;
+ *valuePtr = ckalloc(*lengthPtr + 1);
+ memcpy(*valuePtr, bytes, *lengthPtr + 1);
Tcl_DecrRefCount(searchPathObj);
}