diff options
Diffstat (limited to 'generic/tclEncoding.c')
| -rw-r--r-- | generic/tclEncoding.c | 131 |
1 files changed, 73 insertions, 58 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 91c2278..3d892b7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src); * convert between various character sets and UTF-8. */ -typedef struct Encoding { +typedef struct { char *name; /* Name of encoding. Malloced because (1) hash * table entry that owns this encoding may be * freed prior to this encoding being freed, @@ -57,7 +57,7 @@ typedef struct Encoding { * encoding. */ -typedef struct TableEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -91,7 +91,7 @@ typedef struct TableEncodingData { * for switching character sets. */ -typedef struct EscapeSubTable { +typedef struct { unsigned sequenceLen; /* Length of following string. */ char sequence[16]; /* Escape code that marks this encoding. */ char name[32]; /* Name for encoding. */ @@ -100,7 +100,7 @@ typedef struct EscapeSubTable { * yet. */ } EscapeSubTable; -typedef struct EscapeEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -563,7 +563,7 @@ TclInitEncodingSubsystem(void) * formed UTF-8 into a properly formed stream. */ - type.encodingName = "identity"; + type.encodingName = NULL; type.toUtfProc = BinaryProc; type.fromUtfProc = BinaryProc; type.freeProc = NULL; @@ -693,6 +693,7 @@ TclFinalizeEncodingSubsystem(void) *------------------------------------------------------------------------- */ +#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9 const char * Tcl_GetDefaultEncodingDir(void) { @@ -736,6 +737,7 @@ Tcl_SetDefaultEncodingDir( Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); Tcl_SetEncodingSearchPath(searchPath); } +#endif /* *------------------------------------------------------------------------- @@ -851,7 +853,9 @@ FreeEncoding( if (encodingPtr->hPtr != NULL) { Tcl_DeleteHashEntry(encodingPtr->hPtr); } - ckfree(encodingPtr->name); + if (encodingPtr->name) { + ckfree(encodingPtr->name); + } ckfree(encodingPtr); } } @@ -1040,9 +1044,24 @@ Tcl_CreateEncoding( const Tcl_EncodingType *typePtr) /* The encoding type. */ { + Encoding *encodingPtr = ckalloc(sizeof(Encoding)); + encodingPtr->name = NULL; + encodingPtr->toUtfProc = typePtr->toUtfProc; + encodingPtr->fromUtfProc = typePtr->fromUtfProc; + encodingPtr->freeProc = typePtr->freeProc; + encodingPtr->nullSize = typePtr->nullSize; + encodingPtr->clientData = typePtr->clientData; + if (typePtr->nullSize == 1) { + encodingPtr->lengthProc = (LengthProc *) strlen; + } else { + encodingPtr->lengthProc = (LengthProc *) unilen; + } + encodingPtr->refCount = 1; + encodingPtr->hPtr = NULL; + + if (typePtr->encodingName) { Tcl_HashEntry *hPtr; int isNew; - Encoding *encodingPtr; char *name; Tcl_MutexLock(&encodingMutex); @@ -1053,30 +1072,17 @@ Tcl_CreateEncoding( * reference goes away. */ - encodingPtr = Tcl_GetHashValue(hPtr); - encodingPtr->hPtr = NULL; + Encoding *replaceMe = Tcl_GetHashValue(hPtr); + replaceMe->hPtr = NULL; } name = ckalloc(strlen(typePtr->encodingName) + 1); - - encodingPtr = ckalloc(sizeof(Encoding)); encodingPtr->name = strcpy(name, typePtr->encodingName); - encodingPtr->toUtfProc = typePtr->toUtfProc; - encodingPtr->fromUtfProc = typePtr->fromUtfProc; - encodingPtr->freeProc = typePtr->freeProc; - encodingPtr->nullSize = typePtr->nullSize; - encodingPtr->clientData = typePtr->clientData; - if (typePtr->nullSize == 1) { - encodingPtr->lengthProc = (LengthProc *) strlen; - } else { - encodingPtr->lengthProc = (LengthProc *) unilen; - } - encodingPtr->refCount = 1; encodingPtr->hPtr = hPtr; Tcl_SetHashValue(hPtr, encodingPtr); Tcl_MutexUnlock(&encodingMutex); - + } return (Tcl_Encoding) encodingPtr; } @@ -2293,8 +2299,11 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } result = TCL_OK; srcStart = src; @@ -2326,7 +2335,7 @@ UtfToUtfProc( } if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) { /* - * Copy 7bit chatacters, but skip null-bytes when we are in input + * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xc080. */ @@ -2341,17 +2350,17 @@ UtfToUtfProc( src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* - * Always check before using Tcl_UtfToUniChar. Not doing can so - * cause it run beyond the endof the buffer! If we happen such an - * incomplete char its byts are made to represent themselves. + * Always check before using TclUtfToUniChar. Not doing can so + * cause it run beyond the end of the buffer! If we happen such an + * incomplete char its bytes are made to represent themselves. */ - ch = (unsigned char) *src; + *chPtr = (unsigned char) *src; src += 1; - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - src += Tcl_UtfToUniChar(src, &ch); - dst += Tcl_UniCharToUtf(ch, dst); + src += TclUtfToUniChar(src, chPtr); + dst += Tcl_UniCharToUtf(*chPtr, dst); } } @@ -2407,8 +2416,11 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -2436,11 +2448,11 @@ UnicodeToUtfProc( * Tcl_UniChar-size data. */ - ch = *(Tcl_UniChar *)src; - if (ch && ch < 0x80) { - *dst++ = (ch & 0xFF); + *chPtr = *(Tcl_UniChar *)src; + if (*chPtr && *chPtr < 0x80) { + *dst++ = (*chPtr & 0xFF); } else { - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(*chPtr, dst); } src += sizeof(Tcl_UniChar); } @@ -2497,8 +2509,11 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2524,7 +2539,7 @@ UtfToUnicodeProc( result = TCL_CONVERT_NOSPACE; break; } - src += TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, chPtr); /* * Need to handle this in a way that won't cause misalignment by @@ -2533,23 +2548,23 @@ UtfToUnicodeProc( #ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 4 - *dst++ = (ch >> 24); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = (ch & 0xFF); + *dst++ = (*chPtr >> 24); + *dst++ = ((*chPtr >> 16) & 0xFF); + *dst++ = ((*chPtr >> 8) & 0xFF); + *dst++ = (*chPtr & 0xFF); #else - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); #endif #else #if TCL_UTF_MAX > 4 - *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = (ch >> 24); + *dst++ = (*chPtr & 0xFF); + *dst++ = ((*chPtr >> 8) & 0xFF); + *dst++ = ((*chPtr >> 16) & 0xFF); + *dst++ = (*chPtr >> 24); #else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); #endif #endif } @@ -2607,7 +2622,7 @@ TableToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart, *prefixBytes; int result, byte, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const unsigned short *const *toUnicode; const unsigned short *pageZero; TableEncodingData *dataPtr = clientData; @@ -2719,7 +2734,7 @@ TableFromUtfProc( { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd, *prefixBytes; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int result, len, word, numChars; TableEncodingData *dataPtr = clientData; const unsigned short *const *fromUnicode; @@ -2853,7 +2868,7 @@ Iso88591ToUtfProc( result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; @@ -2939,7 +2954,7 @@ Iso88591FromUtfProc( dstEnd = dst + dstLen - 1; for (numChars = 0; src < srcEnd; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int len; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -3326,7 +3341,7 @@ EscapeFromUtfProc( for (numChars = 0; src < srcEnd; numChars++) { unsigned len; int word; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* @@ -3371,7 +3386,7 @@ EscapeFromUtfProc( /* * The state variable has the value of oldState when word is 0. - * In this case, the escape sequense should not be copied to dst + * In this case, the escape sequence should not be copied to dst * because the current character set is not changed. */ @@ -3596,7 +3611,7 @@ unilen( static void InitializeEncodingSearchPath( char **valuePtr, - size_t *lengthPtr, + unsigned int *lengthPtr, Tcl_Encoding *encodingPtr) { const char *bytes; |
