From a97e3eef6d56096f000475cef61a482b7cf21b73 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 26 Feb 2021 13:18:50 +0000 Subject: Increase some (internal) variables from int/long to long/size_t. On the way to allow bigger blocks in the threaded allocator --- generic/tclInt.h | 2 +- generic/tclThreadAlloc.c | 56 +++++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 27e818c..cc4f6a9 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -1804,7 +1804,7 @@ typedef struct AllocCache { struct Cache *nextPtr; /* Linked list of cache entries. */ Tcl_ThreadId owner; /* Which thread's cache is this? */ Tcl_Obj *firstObjPtr; /* List of free objects for thread. */ - int numObjects; /* Number of objects for thread. */ + size_t numObjects; /* Number of objects for thread. */ } AllocCache; /* diff --git a/generic/tclThreadAlloc.c b/generic/tclThreadAlloc.c index ad508b9..cd55ab9 100644 --- a/generic/tclThreadAlloc.c +++ b/generic/tclThreadAlloc.c @@ -89,11 +89,10 @@ typedef struct Bucket { /* All fields below for accounting only */ - long numRemoves; /* Number of removes from bucket */ - long numInserts; /* Number of inserts into bucket */ - long numWaits; /* Number of waits to acquire a lock */ - long numLocks; /* Number of locks acquired */ - long totalAssigned; /* Total space assigned to bucket */ + size_t numRemoves; /* Number of removes from bucket */ + size_t numInserts; /* Number of inserts into bucket */ + size_t numLocks; /* Number of locks acquired */ + size_t totalAssigned; /* Total space assigned to bucket */ } Bucket; /* @@ -107,9 +106,9 @@ typedef struct Cache { struct Cache *nextPtr; /* Linked list of cache entries */ Tcl_ThreadId owner; /* Which thread's cache is this? */ Tcl_Obj *firstObjPtr; /* List of free objects for thread */ - int numObjects; /* Number of objects for thread */ + size_t numObjects; /* Number of objects for thread */ Tcl_Obj *lastPtr; /* Last object in this cache */ - int totalAssigned; /* Total space assigned to thread */ + size_t totalAssigned; /* Total space assigned to thread */ Bucket buckets[NBUCKETS]; /* The buckets for this thread */ } Cache; @@ -132,12 +131,12 @@ static struct { static Cache * GetCache(void); static void LockBucket(Cache *cachePtr, int bucket); static void UnlockBucket(Cache *cachePtr, int bucket); -static void PutBlocks(Cache *cachePtr, int bucket, int numMove); +static void PutBlocks(Cache *cachePtr, int bucket, long numMove); static int GetBlocks(Cache *cachePtr, int bucket); static Block * Ptr2Block(void *ptr); static void * Block2Ptr(Block *blockPtr, int bucket, unsigned int reqSize); -static void MoveObjs(Cache *fromPtr, Cache *toPtr, int numMove); -static void PutObjs(Cache *fromPtr, int numMove); +static void MoveObjs(Cache *fromPtr, Cache *toPtr, long numMove); +static void PutObjs(Cache *fromPtr, long numMove); /* * Local variables defined in this file and initialized at startup. @@ -548,7 +547,7 @@ TclThreadAllocObj(void) */ if (cachePtr->numObjects == 0) { - int numMove; + long numMove; Tcl_MutexLock(objLockPtr); numMove = sharedPtr->numObjects; @@ -565,11 +564,11 @@ TclThreadAllocObj(void) cachePtr->numObjects = numMove = NOBJALLOC; newObjsPtr = (Tcl_Obj *)TclpSysAlloc(sizeof(Tcl_Obj) * numMove, 0); if (newObjsPtr == NULL) { - Tcl_Panic("alloc: could not allocate %d new objects", numMove); + Tcl_Panic("alloc: could not allocate %ld new objects", numMove); } cachePtr->lastPtr = newObjsPtr + numMove - 1; objPtr = cachePtr->firstObjPtr; /* NULL */ - while (--numMove >= 0) { + while (numMove-- > 0) { newObjsPtr[numMove].internalRep.twoPtrValue.ptr1 = objPtr; objPtr = newObjsPtr + numMove; } @@ -671,14 +670,13 @@ Tcl_GetMemoryInfo( Tcl_DStringAppendElement(dsPtr, buf); } for (n = 0; n < NBUCKETS; ++n) { - sprintf(buf, "%lu %ld %ld %ld %ld %ld %ld", - (unsigned long) bucketInfo[n].blockSize, + sprintf(buf, "%" TCL_Z_MODIFIER "u %ld %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d", + bucketInfo[n].blockSize, cachePtr->buckets[n].numFree, cachePtr->buckets[n].numRemoves, cachePtr->buckets[n].numInserts, cachePtr->buckets[n].totalAssigned, - cachePtr->buckets[n].numLocks, - cachePtr->buckets[n].numWaits); + cachePtr->buckets[n].numLocks); Tcl_DStringAppendElement(dsPtr, buf); } Tcl_DStringEndSublist(dsPtr); @@ -707,7 +705,7 @@ static void MoveObjs( Cache *fromPtr, Cache *toPtr, - int numMove) + long numMove) { Tcl_Obj *objPtr = fromPtr->firstObjPtr; Tcl_Obj *fromFirstObjPtr = objPtr; @@ -720,7 +718,7 @@ MoveObjs( * to be moved) as the first object in the 'from' cache. */ - while (--numMove) { + while (numMove-- > 1) { objPtr = (Tcl_Obj *)objPtr->internalRep.twoPtrValue.ptr1; } fromPtr->firstObjPtr = (Tcl_Obj *)objPtr->internalRep.twoPtrValue.ptr1; @@ -754,9 +752,9 @@ MoveObjs( static void PutObjs( Cache *fromPtr, - int numMove) + long numMove) { - int keep = fromPtr->numObjects - numMove; + size_t keep = fromPtr->numObjects - numMove; Tcl_Obj *firstPtr, *lastPtr = NULL; fromPtr->numObjects = keep; @@ -767,7 +765,7 @@ PutObjs( do { lastPtr = firstPtr; firstPtr = (Tcl_Obj *)firstPtr->internalRep.twoPtrValue.ptr1; - } while (--keep > 0); + } while (keep-- > 1); lastPtr->internalRep.twoPtrValue.ptr1 = NULL; } @@ -898,14 +896,14 @@ static void PutBlocks( Cache *cachePtr, int bucket, - int numMove) + long numMove) { /* * We have numFree. Want to shed numMove. So compute how many * Blocks to keep. */ - int keep = cachePtr->buckets[bucket].numFree - numMove; + long keep = cachePtr->buckets[bucket].numFree - numMove; Block *lastPtr = NULL, *firstPtr; cachePtr->buckets[bucket].numFree = keep; @@ -916,7 +914,7 @@ PutBlocks( do { lastPtr = firstPtr; firstPtr = firstPtr->nextBlock; - } while (--keep > 0); + } while (keep-- > 1); lastPtr->nextBlock = NULL; } @@ -961,7 +959,7 @@ GetBlocks( int bucket) { Block *blockPtr; - int n; + long n; /* * First, atttempt to move blocks from the shared cache. Note the @@ -994,7 +992,7 @@ GetBlocks( cachePtr->buckets[bucket].firstPtr = blockPtr; sharedPtr->buckets[bucket].numFree -= n; cachePtr->buckets[bucket].numFree = n; - while (--n > 0) { + while (n-- > 1) { blockPtr = blockPtr->nextBlock; } sharedPtr->buckets[bucket].firstPtr = blockPtr->nextBlock; @@ -1016,7 +1014,7 @@ GetBlocks( blockPtr = NULL; n = NBUCKETS; size = 0; - while (--n > bucket) { + while (n-- > bucket + 1) { if (cachePtr->buckets[n].numFree > 0) { size = bucketInfo[n].blockSize; blockPtr = cachePtr->buckets[n].firstPtr; @@ -1045,7 +1043,7 @@ GetBlocks( n = size / bucketInfo[bucket].blockSize; cachePtr->buckets[bucket].numFree = n; cachePtr->buckets[bucket].firstPtr = blockPtr; - while (--n > 0) { + while (n-- > 1) { blockPtr->nextBlock = (Block *) ((char *) blockPtr + bucketInfo[bucket].blockSize); blockPtr = blockPtr->nextBlock; -- cgit v0.12 From 62939392967f50a987a386e6075c7913db471c5a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 26 Feb 2021 14:52:14 +0000 Subject: Further internal variable upgrade from long -> size_t --- generic/tclAlloc.c | 10 +++++----- generic/tclThreadAlloc.c | 39 ++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/generic/tclAlloc.c b/generic/tclAlloc.c index 70cb1b6..2043248 100644 --- a/generic/tclAlloc.c +++ b/generic/tclAlloc.c @@ -94,7 +94,7 @@ union overhead { #define MINBLOCK ((sizeof(union overhead) + (TCL_ALLOCALIGN-1)) & ~(TCL_ALLOCALIGN-1)) #define NBUCKETS (13 - (MINBLOCK >> 4)) -#define MAXMALLOC (1<<(NBUCKETS+2)) +#define MAXMALLOC ((size_t)1 << (NBUCKETS+2)) static union overhead *nextf[NBUCKETS]; /* @@ -583,7 +583,7 @@ TclpRealloc( Tcl_MutexUnlock(allocMutexPtr); return (void *)(overPtr+1); } - maxSize = 1 << (i+3); + maxSize = (size_t)1 << (i+3); expensive = 0; if (numBytes+OVERHEAD > maxSize) { expensive = 1; @@ -656,18 +656,18 @@ mstats( for (j=0, overPtr=nextf[i]; overPtr; overPtr=overPtr->next, j++) { fprintf(stderr, " %u", j); } - totalFree += ((size_t)j) * (1 << (i + 3)); + totalFree += ((size_t)j) * ((size_t)1 << (i + 3)); } fprintf(stderr, "\nused:\t"); for (i = 0; i < NBUCKETS; i++) { fprintf(stderr, " %" TCL_Z_MODIFIER "u", numMallocs[i]); - totalUsed += numMallocs[i] * (1 << (i + 3)); + totalUsed += numMallocs[i] * ((size_t)1 << (i + 3)); } fprintf(stderr, "\n\tTotal small in use: %" TCL_Z_MODIFIER "u, total free: %" TCL_Z_MODIFIER "u\n", totalUsed, totalFree); - fprintf(stderr, "\n\tNumber of big (>%d) blocks in use: %" TCL_Z_MODIFIER "u\n", + fprintf(stderr, "\n\tNumber of big (>%" TCL_Z_MODIFIER "u) blocks in use: %" TCL_Z_MODIFIER "u\n", MAXMALLOC, numMallocs[NBUCKETS]); Tcl_MutexUnlock(allocMutexPtr); diff --git a/generic/tclThreadAlloc.c b/generic/tclThreadAlloc.c index cd55ab9..28475f9 100644 --- a/generic/tclThreadAlloc.c +++ b/generic/tclThreadAlloc.c @@ -82,10 +82,10 @@ typedef union Block { * and statistics information. */ -typedef struct Bucket { +typedef struct { Block *firstPtr; /* First block available */ Block *lastPtr; /* End of block list */ - long numFree; /* Number of blocks available */ + size_t numFree; /* Number of blocks available */ /* All fields below for accounting only */ @@ -119,8 +119,8 @@ typedef struct Cache { static struct { size_t blockSize; /* Bucket blocksize. */ - int maxBlocks; /* Max blocks before move to share. */ - int numMove; /* Num blocks to move to share. */ + size_t maxBlocks; /* Max blocks before move to share. */ + size_t numMove; /* Num blocks to move to share. */ Tcl_Mutex *lockPtr; /* Share bucket lock. */ } bucketInfo[NBUCKETS]; @@ -131,12 +131,12 @@ static struct { static Cache * GetCache(void); static void LockBucket(Cache *cachePtr, int bucket); static void UnlockBucket(Cache *cachePtr, int bucket); -static void PutBlocks(Cache *cachePtr, int bucket, long numMove); +static void PutBlocks(Cache *cachePtr, int bucket, size_t numMove); static int GetBlocks(Cache *cachePtr, int bucket); static Block * Ptr2Block(void *ptr); -static void * Block2Ptr(Block *blockPtr, int bucket, unsigned int reqSize); -static void MoveObjs(Cache *fromPtr, Cache *toPtr, long numMove); -static void PutObjs(Cache *fromPtr, long numMove); +static void * Block2Ptr(Block *blockPtr, int bucket, size_t reqSize); +static void MoveObjs(Cache *fromPtr, Cache *toPtr, size_t numMove); +static void PutObjs(Cache *fromPtr, size_t numMove); /* * Local variables defined in this file and initialized at startup. @@ -547,7 +547,7 @@ TclThreadAllocObj(void) */ if (cachePtr->numObjects == 0) { - long numMove; + size_t numMove; Tcl_MutexLock(objLockPtr); numMove = sharedPtr->numObjects; @@ -670,7 +670,8 @@ Tcl_GetMemoryInfo( Tcl_DStringAppendElement(dsPtr, buf); } for (n = 0; n < NBUCKETS; ++n) { - sprintf(buf, "%" TCL_Z_MODIFIER "u %ld %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d %" TCL_Z_MODIFIER "d", + sprintf(buf, "%" TCL_Z_MODIFIER "u %" TCL_Z_MODIFIER "u %" TCL_Z_MODIFIER "u %" + TCL_Z_MODIFIER "u %" TCL_Z_MODIFIER "u %" TCL_Z_MODIFIER "u", bucketInfo[n].blockSize, cachePtr->buckets[n].numFree, cachePtr->buckets[n].numRemoves, @@ -705,7 +706,7 @@ static void MoveObjs( Cache *fromPtr, Cache *toPtr, - long numMove) + size_t numMove) { Tcl_Obj *objPtr = fromPtr->firstObjPtr; Tcl_Obj *fromFirstObjPtr = objPtr; @@ -752,7 +753,7 @@ MoveObjs( static void PutObjs( Cache *fromPtr, - long numMove) + size_t numMove) { size_t keep = fromPtr->numObjects - numMove; Tcl_Obj *firstPtr, *lastPtr = NULL; @@ -806,7 +807,7 @@ static void * Block2Ptr( Block *blockPtr, int bucket, - unsigned int reqSize) + size_t reqSize) { void *ptr; @@ -896,14 +897,14 @@ static void PutBlocks( Cache *cachePtr, int bucket, - long numMove) + size_t numMove) { /* * We have numFree. Want to shed numMove. So compute how many * Blocks to keep. */ - long keep = cachePtr->buckets[bucket].numFree - numMove; + size_t keep = cachePtr->buckets[bucket].numFree - numMove; Block *lastPtr = NULL, *firstPtr; cachePtr->buckets[bucket].numFree = keep; @@ -959,7 +960,7 @@ GetBlocks( int bucket) { Block *blockPtr; - long n; + size_t n; /* * First, atttempt to move blocks from the shared cache. Note the @@ -1014,7 +1015,7 @@ GetBlocks( blockPtr = NULL; n = NBUCKETS; size = 0; - while (n-- > bucket + 1) { + while (n-- > (size_t)bucket + 1) { if (cachePtr->buckets[n].numFree > 0) { size = bucketInfo[n].blockSize; blockPtr = cachePtr->buckets[n].firstPtr; @@ -1080,9 +1081,9 @@ TclInitThreadAlloc(void) objLockPtr = TclpNewAllocMutex(); for (i = 0; i < NBUCKETS; ++i) { bucketInfo[i].blockSize = MINALLOC << i; - bucketInfo[i].maxBlocks = 1 << (NBUCKETS - 1 - i); + bucketInfo[i].maxBlocks = ((size_t)1) << (NBUCKETS - 1 - i); bucketInfo[i].numMove = i < NBUCKETS - 1 ? - 1 << (NBUCKETS - 2 - i) : 1; + (size_t)1 << (NBUCKETS - 2 - i) : 1; bucketInfo[i].lockPtr = TclpNewAllocMutex(); } TclpInitAllocCache(); -- cgit v0.12 From 1ac55b7d3fca0fa53e5ebbd6f7107d4dc3b79c9b Mon Sep 17 00:00:00 2001 From: oehhar Date: Sat, 27 Feb 2021 11:19:28 +0000 Subject: Ticket [87082587c4]: typo in msgcat man page --- doc/msgcat.n | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/msgcat.n b/doc/msgcat.n index 2fc1eee..0811bae 100644 --- a/doc/msgcat.n +++ b/doc/msgcat.n @@ -143,7 +143,7 @@ cannot be set independently. For example, if the current locale is en_US_funky, then \fB::msgcat::mcpreferences\fR returns \fB{en_us_funky en_us en {}}\fR. .TP -\fB::msgcat:mcloadedlocales subcommand\fR ?\fIlocale\fR? +\fB::msgcat::mcloadedlocales subcommand\fR ?\fIlocale\fR? . This group of commands manage the list of loaded locales for packages not setting a package locale. .PP -- cgit v0.12 From b697c844add273719e0d724f0e7680d215a0b163 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 2 Mar 2021 10:10:54 +0000 Subject: Backport some UTF-8-related changed from 8.7 to 8.6, only for TCL_UTF_MAX > 3. No change for TCL_UTF_MAX=3. Also adapt test-cases accordingly, and add comments why the changes were done. --- generic/tclUtf.c | 20 ++++++++++++++--- tests/utf.test | 65 ++++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index f99c497..65a3f41 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -90,6 +90,8 @@ static const unsigned char complete[256] = { #if TCL_UTF_MAX > 3 4,4,4,4,4, #else + /* Tcl_UtfToUniChar() accesses src[1] and src[2] to check whether + * the UTF-8 sequence is valid, so we cannot use 1 here. */ 3,3,3,3,3, #endif 1,1,1,1,1,1,1,1,1,1,1 @@ -536,7 +538,7 @@ Tcl_UtfToUniCharDString( w = wString; p = src; endPtr = src + length; - optPtr = endPtr - TCL_UTF_MAX; + optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3) ; while (p <= optPtr) { p += TclUtfToUniChar(p, &ch); *w++ = ch; @@ -623,7 +625,7 @@ Tcl_NumUtfChars( /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; /* Pointer to last byte where optimization still can be used */ - const char *optPtr = endPtr - TCL_UTF_MAX; + const char *optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3); /* * Optimize away the call in this loop. Justified because... @@ -759,6 +761,19 @@ Tcl_UtfNext( int left; const char *next; +#if TCL_UTF_MAX > 3 + if (((*src) & 0xC0) == 0x80) { + /* Continuation byte, so we start 'inside' a (possible valid) UTF-8 + * sequence. Since we are not allowed to access src[-1], we cannot + * check if the sequence is actually valid, the best we can do is + * just assume it is valid and locate the end. */ + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; + } +#endif + left = totalBytes[UCHAR(*src)]; next = src + 1; while (--left) { @@ -895,7 +910,6 @@ Tcl_UtfPrev( * properly formed byte sequence to find, and we can stop looking, * accepting the fallback. */ - return fallback; } diff --git a/tests/utf.test b/tests/utf.test index 8e886ae..cd8feb6 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -3,7 +3,7 @@ # errors. No output means no errors were found. # # Copyright (c) 1997 Sun Microsystems, Inc. -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -21,6 +21,7 @@ testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] testConstraint ucs4 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] +testConstraint ucs2_utf16 [expr {![testConstraint ucs4]}] testConstraint Uesc [expr {"\U0041" eq "A"}] testConstraint pre388 [expr {"\x741" eq "A"}] @@ -78,9 +79,12 @@ test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 -test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} { +test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} { expr {"\UD842" eq "\uD842"} } 1 +test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring fullutf} { + expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} +} 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" @@ -103,7 +107,7 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} testbytestrin test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring \xE4\xB9\x8E] } 1 -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2_utf16} { string length [testbytestring \xF0\x90\x80\x80] } 2 test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { @@ -215,9 +219,12 @@ test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0]G } 1 -test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} { +test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\x00] } 1 +test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xD0] } 1 @@ -476,12 +483,18 @@ test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring u test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF0\x90\x80\x80] } 4 -test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { +test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\x00] } 1 -test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 +test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x00] } 1 +test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x00] +} 2 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 1 @@ -491,18 +504,30 @@ test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbyte test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { testutfnext [testbytestring \xF4\x90\x80\x80] } 1 -test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { +test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0] } 1 -test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0] +} 3 +test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x80] } 1 -test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x80] +} 3 +test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0] } 1 -test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] +} 3 +test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x80\x80] } 1 +test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x80\x80] +} 3 test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { testutfnext G 0 } 0 @@ -600,18 +625,30 @@ test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xA0\xA0] 2 } 0 -test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.123.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0]G 3 } 1 -test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.123.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0]G 3 +} 3 +test utf-6.124.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0] 3 } 1 -test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.124.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] 3 +} 3 +test utf-6.125.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0]G 4 } 1 -test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.125.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0]G 4 +} 3 +test utf-6.126.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0\xA0] 4 } 1 +test utf-6.126.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0\xA0] 4 +} 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From 773a30a541bb7f3681c3e0dc08b5c9d7e23fbaed Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 2 Mar 2021 10:53:30 +0000 Subject: Using 0xFC00 is more readable here than ~0x3FF. It's sufficient becauwe ch1 and ch2 are only 16-bit. Backported from 8.7 --- generic/tclUtf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 65a3f41..57e58c1 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1296,11 +1296,11 @@ Tcl_UtfNcmp( if (ch1 != ch2) { #if TCL_UTF_MAX == 4 /* Surrogates always report higher than non-surrogates */ - if (((ch1 & ~0x3FF) == 0xD800)) { - if ((ch2 & ~0x3FF) != 0xD800) { + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { return ch1; } - } else if ((ch2 & ~0x3FF) == 0xD800) { + } else if ((ch2 & 0xFC00) == 0xD800) { return -ch2; } #endif -- cgit v0.12 From 6433d6a30be72d681a6f24797f77f14f65d7b031 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 3 Mar 2021 14:31:37 +0000 Subject: Backport improvements in UTF-8 handling for Tcl_UtfPrev/Tcl_UtfNext from 8.7 (through 8.6). No change for TCL_UTF_MAX=3. Adapt test-cases accordingly --- generic/tclInt.h | 8 ++--- generic/tclUtf.c | 50 ++++++++++++++++++------------ tests/utf.test | 92 +++++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 99 insertions(+), 51 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index b6d6a88..1a286a0 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3685,17 +3685,17 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file, */ #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0xC0) ? \ - ((*(chPtr) = (unsigned char) *(str)), 1) \ + (((UCHAR(*(str))) < 0x80) ? \ + ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToUniChar(str, chPtr)) #define TclUtfPrev(src, start) \ (((src) < (start)+2) ? (start) : \ - ((unsigned char) *(src - 1)) < 0x80 ? (src)-1 : \ + (UCHAR(*((src) - 1))) < 0x80 ? (src)-1 : \ Tcl_UtfPrev(src, start)) #define TclUtfNext(src) \ - ((((unsigned char) *(src)) < 0xC0) ? src + 1 : Tcl_UtfNext(src)) + (((UCHAR(*(src))) < 0x80) ? src + 1 : Tcl_UtfNext(src)) /* *---------------------------------------------------------------- diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 7309208..03d0f3a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -167,14 +167,13 @@ Invalid( unsigned char byte = UCHAR(*src); int index; - if ((byte & 0xC3) != 0xC0) { + if ((byte & 0xC3) == 0xC0) { /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ - return 0; - } - index = (byte - 0xC0) >> 1; - if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { - /* Out of bounds - report invalid. */ - return 1; + index = (byte - 0xC0) >> 1; + if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { + /* Out of bounds - report invalid. */ + return 1; + } } return 0; } @@ -425,10 +424,10 @@ Tcl_UtfToUniCharDString( Tcl_UniChar *w, *wString; const char *p; int oldLength; - /* Pointer to the end of string. Never read endPtr[0] */ - const char *endPtr = src + length; - /* Pointer to breakpoint in scan where optimization is lost */ - const char *optPtr = endPtr - TCL_UTF_MAX; + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr; if (length < 0) { length = strlen(src); @@ -448,14 +447,15 @@ Tcl_UtfToUniCharDString( w = wString; p = src; endPtr = src + length; - optPtr = endPtr - TCL_UTF_MAX; + optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3) ; while (p <= optPtr) { p += TclUtfToUniChar(p, w); w++; } while (p < endPtr) { if (Tcl_UtfCharComplete(p, endPtr-p)) { - p += TclUtfToUniChar(p, w++); + p += TclUtfToUniChar(p, w); + w++; } else { *w++ = UCHAR(*p++); } @@ -534,7 +534,7 @@ Tcl_NumUtfChars( /* Pointer to the end of string. Never read endPtr[0] */ const char *endPtr = src + length; /* Pointer to last byte where optimization still can be used */ - const char *optPtr = endPtr - TCL_UTF_MAX; + const char *optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3); /* * Optimize away the call in this loop. Justified because... @@ -554,7 +554,7 @@ Tcl_NumUtfChars( src += TclUtfToUniChar(src, &ch); } else { /* - * src points to incomplete UTF-8 sequence + * src points to incomplete UTF-8 sequence * Treat first byte as character and count it */ src++; @@ -570,7 +570,7 @@ Tcl_NumUtfChars( * * Tcl_UtfFindFirst -- * - * Returns a pointer to the first occurance of the given Unicode character + * Returns a pointer to the first occurrence of the given Unicode character * in the NULL-terminated UTF-8 string. The NULL terminator is considered * part of the UTF-8 string. Equivalent to Plan 9 utfrune(). * @@ -671,6 +671,19 @@ Tcl_UtfNext( int left; const char *next; +#if TCL_UTF_MAX > 3 + if (((*src) & 0xC0) == 0x80) { + /* Continuation byte, so we start 'inside' a (possible valid) UTF-8 + * sequence. Since we are not allowed to access src[-1], we cannot + * check if the sequence is actually valid, the best we can do is + * just assume it is valid and locate the end. */ + if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { + ++src; + } + return src; + } +#endif + left = totalBytes[UCHAR(*src)]; next = src + 1; while (--left) { @@ -800,14 +813,13 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < TCL_UTF_MAX); + } while (trailBytesSeen < (TCL_UTF_MAX < 4 ? 3 : 4)); /* - * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a + * We've seen 3 trail bytes, so we know there will not be a * properly formed byte sequence to find, and we can stop looking, * accepting the fallback. */ - return fallback; } diff --git a/tests/utf.test b/tests/utf.test index e65f352..06ac329 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -3,7 +3,7 @@ # errors. No output means no errors were found. # # Copyright (c) 1997 Sun Microsystems, Inc. -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -78,7 +78,10 @@ test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 -test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { +test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} { + expr {"\UD842" eq "\uD842"} +} 1 +test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring fullutf} { expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} } 1 @@ -106,13 +109,19 @@ test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestrin test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring \xF0\x90\x80\x80] } 4 -test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { - string length [testbytestring \xF0\x90\x80\x80] +test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc utf16} { + string length \U010000 +} 2 +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { + string length \U010000 } 1 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring \xF4\x8F\xBF\xBF] } 4 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { +test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc utf16} { + string length \U10FFFF +} 2 +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { string length \U10FFFF } 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -209,15 +218,18 @@ test utf-6.7 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.8 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext A[testbytestring \xF8] } 1 -test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring} { +test utf-6.9 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0] } 1 test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0]G } 1 -test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} { +test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\x00] } 1 +test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xD0] } 1 @@ -476,12 +488,18 @@ test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring u test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF0\x90\x80\x80] } 4 -test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { +test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\x00] } 1 -test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 +test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x00] } 1 +test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x00] +} 2 test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 1 @@ -491,18 +509,30 @@ test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbyte test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { testutfnext [testbytestring \xF4\x90\x80\x80] } 1 -test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { +test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0] } 1 -test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0] +} 3 +test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x80] } 1 -test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x80] +} 3 +test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0] } 1 -test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { +test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] +} 3 +test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \x80\x80\x80\x80] } 1 +test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \x80\x80\x80\x80] +} 3 test utf-6.96 {Tcl_UtfNext, read limits} testutfnext { testutfnext G 0 } 0 @@ -554,10 +584,7 @@ test utf-6.111 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 3 } 1 -test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 3 -} 4 -test utf-6.112.3 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { +test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 3 } 0 test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { @@ -575,10 +602,7 @@ test utf-6.115 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 3 } 1 -test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring utf16} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 3 -} 4 -test utf-6.116.2 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} { +test utf-6.116.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 3 } 0 test utf-6.117.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { @@ -593,27 +617,39 @@ test utf-6.118 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xA0]G 1 } 1 -test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0] 1 } 1 -test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.121 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0]G 2 } 1 -test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.122 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0] 2 } 1 -test utf-6.123 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.123.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0]G 3 } 1 -test utf-6.124 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.123.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0]G 3 +} 3 +test utf-6.124.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0] 3 } 1 -test utf-6.125 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.124.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] 3 +} 3 +test utf-6.125.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0]G 4 } 1 -test utf-6.126 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.125.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0]G 4 +} 3 +test utf-6.126.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0\xA0\xA0\xA0\xA0] 4 } 1 +test utf-6.126.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0\xA0] 4 +} 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} -- cgit v0.12 From bebf0454f609132c9de5705f9a7b7f720438dd5e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 4 Mar 2021 09:42:47 +0000 Subject: cleanup genStubs.tcl, e.g. "==" -> "eq" and "!=" -> "ne". No change in output --- tools/genStubs.tcl | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/genStubs.tcl b/tools/genStubs.tcl index 67b5112..814d154 100644 --- a/tools/genStubs.tcl +++ b/tools/genStubs.tcl @@ -4,7 +4,7 @@ # interface. # # -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1998-1999 Scriptics Corporation. # Copyright (c) 2007 Daniel A. Steffen # # See the file "license.terms" for information on usage and redistribution @@ -194,7 +194,7 @@ proc genStubs::declare {args} { set decl [parseDecl $decl] foreach platform $platformList { - if {$decl != ""} { + if {$decl ne ""} { set stubs($curName,$platform,$index) $decl if {![info exists stubs($curName,$platform,lastNum)] \ || ($index > $stubs($curName,$platform,lastNum))} { @@ -243,8 +243,9 @@ proc genStubs::rewriteFile {file text} { return } set in [open ${file} r] + fconfigure $in -eofchar "\032 {}" -encoding utf-8 set out [open ${file}.new w] - fconfigure $out -translation lf + fconfigure $out -translation lf -encoding utf-8 while {![eof $in]} { set line [gets $in] @@ -465,7 +466,9 @@ proc genStubs::makeDecl {name decl index} { } set line "$scspec $rtype" set count [expr {2 - ([string length $line] / 8)}] - append line [string range "\t\t\t" 0 $count] + if {$count >= 0} { + append line [string range "\t\t\t" 0 $count] + } set pad [expr {24 - [string length $line]}] if {$pad <= 0} { append line " " @@ -552,7 +555,7 @@ proc genStubs::makeMacro {name decl index} { append lfname [string range $fname 1 end] set text "#ifndef $fname\n#define $fname \\\n\t(" - if {$args == ""} { + if {$args eq ""} { append text "*" } append text "${name}StubsPtr->$lfname)" @@ -579,14 +582,14 @@ proc genStubs::makeSlot {name decl index} { append lfname [string range $fname 1 end] set text " " - if {$args == ""} { + if {$args eq ""} { append text $rtype " *" $lfname "; /* $index */\n" return $text } if {$rtype ne "void"} { regsub -all void $rtype VOID rtype } - if {[string range $rtype end-8 end] == "__stdcall"} { + if {[string range $rtype end-8 end] eq "__stdcall"} { append text [string trim [string range $rtype 0 end-9]] " (__stdcall *" $lfname ") " } else { append text $rtype " (*" $lfname ") " @@ -1004,7 +1007,7 @@ proc genStubs::emitHeader {name} { } append text "\ntypedef struct ${capName}Stubs {\n" append text " int magic;\n" - if {$epoch != ""} { + if {$epoch ne ""} { append text " int epoch;\n" append text " int revision;\n" } @@ -1053,7 +1056,7 @@ proc genStubs::emitInit {name textVar} { } append text "\n${capName}Stubs ${name}Stubs = \{\n" append text " TCL_STUB_MAGIC,\n" - if {$epoch != ""} { + if {$epoch ne ""} { set CAPName [string toupper $name] append text " ${CAPName}_STUBS_EPOCH,\n" append text " ${CAPName}_STUBS_REVISION,\n" @@ -1132,7 +1135,7 @@ proc genStubs::init {} { set outDir [lindex $argv 0] foreach file [lrange $argv 1 end] { - source $file + source -encoding utf-8 $file } foreach name [lsort [array names interfaces]] { @@ -1154,7 +1157,7 @@ proc genStubs::init {} { # Results: # Returns any values that were not assigned to variables. -if {[string length [namespace which lassign]] == 0} { +if {[namespace which lassign] ne ""} { proc lassign {valueList args} { if {[llength $args] == 0} { error "wrong # args: should be \"lassign list varName ?varName ...?\"" -- cgit v0.12 From 3f89004e3770f9777b2c028f268d0e8cda84172c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 4 Mar 2021 09:53:10 +0000 Subject: Add some more unused entries to the stub table, keeping up with the table size increase of higher Tcl versions --- generic/tcl.decls | 7 ++++++- generic/tclDecls.h | 27 ++++++++++++++++++++++++--- generic/tclInt.decls | 1 + generic/tclPlatDecls.h | 10 ++++++++++ generic/tclStubInit.c | 10 +++++++++- generic/tclTomMath.decls | 1 - 6 files changed, 50 insertions(+), 6 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 6510249..50e9465 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -21,6 +21,7 @@ library tcl interface tcl hooks {tclPlat tclInt tclIntPlat} +scspec EXTERN # Declare each of the functions in the public Tcl interface. Note that # the an index should never be reused for a different function in order @@ -2110,7 +2111,7 @@ declare 579 { # ----- BASELINE -- FOR -- 8.5.0 ----- # -declare 649 { +declare 656 { void TclUnusedStubEntry(void) } @@ -2150,6 +2151,9 @@ declare 1 macosx { const char *bundleName, const char *bundleVersion, int hasResourceFile, int maxPathLen, char *libraryPath) } +declare 2 macosx { + void TclUnusedStubEntry(void) +} ############################################################################## @@ -2191,6 +2195,7 @@ export { export { TclTomMathStubs *tclTomMathStubsPtr (fool checkstubs) } + # Local Variables: # mode: tcl # End: diff --git a/generic/tclDecls.h b/generic/tclDecls.h index a5b7ec1..3651a3b 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -3481,9 +3481,16 @@ EXTERN void Tcl_AppendPrintfToObj(Tcl_Obj *objPtr, /* Slot 646 is reserved */ /* Slot 647 is reserved */ /* Slot 648 is reserved */ +/* Slot 649 is reserved */ +/* Slot 650 is reserved */ +/* Slot 651 is reserved */ +/* Slot 652 is reserved */ +/* Slot 653 is reserved */ +/* Slot 654 is reserved */ +/* Slot 655 is reserved */ #ifndef TclUnusedStubEntry_TCL_DECLARED #define TclUnusedStubEntry_TCL_DECLARED -/* 649 */ +/* 656 */ EXTERN void TclUnusedStubEntry(void); #endif @@ -4170,7 +4177,14 @@ typedef struct TclStubs { VOID *reserved646; VOID *reserved647; VOID *reserved648; - void (*tclUnusedStubEntry) (void); /* 649 */ + VOID *reserved649; + VOID *reserved650; + VOID *reserved651; + VOID *reserved652; + VOID *reserved653; + VOID *reserved654; + VOID *reserved655; + void (*tclUnusedStubEntry) (void); /* 656 */ } TclStubs; extern TclStubs *tclStubsPtr; @@ -6592,9 +6606,16 @@ extern TclStubs *tclStubsPtr; /* Slot 646 is reserved */ /* Slot 647 is reserved */ /* Slot 648 is reserved */ +/* Slot 649 is reserved */ +/* Slot 650 is reserved */ +/* Slot 651 is reserved */ +/* Slot 652 is reserved */ +/* Slot 653 is reserved */ +/* Slot 654 is reserved */ +/* Slot 655 is reserved */ #ifndef TclUnusedStubEntry #define TclUnusedStubEntry \ - (tclStubsPtr->tclUnusedStubEntry) /* 649 */ + (tclStubsPtr->tclUnusedStubEntry) /* 656 */ #endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclInt.decls b/generic/tclInt.decls index 892f977..df39bef 100644 --- a/generic/tclInt.decls +++ b/generic/tclInt.decls @@ -17,6 +17,7 @@ library tcl # Define the unsupported generic interfaces. interface tclInt +scspec EXTERN # Declare each of the functions in the unsupported internal Tcl # interface. These interfaces are allowed to changed between versions. diff --git a/generic/tclPlatDecls.h b/generic/tclPlatDecls.h index ef23c84..16e8af0 100644 --- a/generic/tclPlatDecls.h +++ b/generic/tclPlatDecls.h @@ -75,6 +75,11 @@ EXTERN int Tcl_MacOSXOpenVersionedBundleResources( int hasResourceFile, int maxPathLen, char *libraryPath); #endif +#ifndef TclUnusedStubEntry_TCL_DECLARED +#define TclUnusedStubEntry_TCL_DECLARED +/* 2 */ +EXTERN void TclUnusedStubEntry(void); +#endif #endif /* MACOSX */ typedef struct TclPlatStubs { @@ -88,6 +93,7 @@ typedef struct TclPlatStubs { #ifdef MAC_OSX_TCL /* MACOSX */ int (*tcl_MacOSXOpenBundleResources) (Tcl_Interp *interp, CONST char *bundleName, int hasResourceFile, int maxPathLen, char *libraryPath); /* 0 */ int (*tcl_MacOSXOpenVersionedBundleResources) (Tcl_Interp *interp, CONST char *bundleName, CONST char *bundleVersion, int hasResourceFile, int maxPathLen, char *libraryPath); /* 1 */ + void (*tclUnusedStubEntry) (void); /* 2 */ #endif /* MACOSX */ } TclPlatStubs; @@ -122,6 +128,10 @@ extern TclPlatStubs *tclPlatStubsPtr; #define Tcl_MacOSXOpenVersionedBundleResources \ (tclPlatStubsPtr->tcl_MacOSXOpenVersionedBundleResources) /* 1 */ #endif +#ifndef TclUnusedStubEntry +#define TclUnusedStubEntry \ + (tclPlatStubsPtr->tclUnusedStubEntry) /* 2 */ +#endif #endif /* MACOSX */ #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 1a83752..6968d89 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -675,6 +675,7 @@ TclPlatStubs tclPlatStubs = { #ifdef MAC_OSX_TCL /* MACOSX */ Tcl_MacOSXOpenBundleResources, /* 0 */ Tcl_MacOSXOpenVersionedBundleResources, /* 1 */ + TclUnusedStubEntry, /* 2 */ #endif /* MACOSX */ }; @@ -1446,7 +1447,14 @@ TclStubs tclStubs = { NULL, /* 646 */ NULL, /* 647 */ NULL, /* 648 */ - TclUnusedStubEntry, /* 649 */ + NULL, /* 649 */ + NULL, /* 650 */ + NULL, /* 651 */ + NULL, /* 652 */ + NULL, /* 653 */ + NULL, /* 654 */ + NULL, /* 655 */ + TclUnusedStubEntry, /* 656 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclTomMath.decls b/generic/tclTomMath.decls index dfb6956..56993f2 100644 --- a/generic/tclTomMath.decls +++ b/generic/tclTomMath.decls @@ -17,7 +17,6 @@ library tcl # Define the unsupported generic interfaces. interface tclTomMath -# hooks {tclTomMathInt} scspec EXTERN # Declare each of the functions in the Tcl tommath interface -- cgit v0.12