From f04c7d313f1392d0e474bbb3c40af1d69791f770 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 25 Aug 2011 16:26:37 +0000 Subject: 3396731 Another rewrite of TclStringObjReverse() to make it adopt the nijtmans approach for reversing the objPtr->bytes rep without losing performance. --- generic/tclStringObj.c | 176 +++++++++++++++++++++++++++++-------------------- tests/string.test | 8 +++ 2 files changed, 114 insertions(+), 70 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index ab62359..27480c5 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2653,99 +2653,135 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ +void +ReverseBytes( + unsigned char *to, /* Copy bytes into here... */ + unsigned char *from, /* ...from here... */ + int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + if (to == from) { + /* Reversing in place */ + from += count - 1; + while (to < from) { + unsigned char c = *from; + *from-- = *to; + *to++ = c; + } + } else { + from += count - 1; + while (count--) { + *to++ = *from--; + } + } +} + +void +ReverseUniChars( + Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */ + Tcl_UniChar *from, /* ...from here... */ + unsigned int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + if (to == from) { + /* Reversing in place */ + from += count - 1; + while (to < from) { + Tcl_UniChar c = *from; + *from-- = *to; + *to++ = c; + } + } else { + from += count - 1; + while (count--) { + *to++ = *from--; + } + } +} + Tcl_Obj * TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; - char *src = NULL, *dest = NULL; - Tcl_UniChar *usrc = NULL, *udest = NULL; - Tcl_Obj *resultPtr = NULL; - SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - if (stringPtr->hasUnicode == 0) { - if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (stringPtr->numChars <= 1) { - return objPtr; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } - if (stringPtr->numChars == objPtr->length) { - /* - * All one-byte chars. Reverse in objPtr->bytes. - */ + ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); + return objPtr; + } - if (Tcl_IsShared(objPtr)) { - resultPtr = Tcl_NewObj(); - Tcl_SetObjLength(resultPtr, objPtr->length); - dest = TclGetString(resultPtr); - src = objPtr->bytes + objPtr->length - 1; - while (src >= objPtr->bytes) { - *dest++ = *src--; - } - return resultPtr; - } + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->hasUnicode) { + Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + if (Tcl_IsShared(objPtr)) { /* - * Unshared. Reverse objPtr->bytes in place. + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. */ - dest = objPtr->bytes; - src = dest + objPtr->length - 1; - while (dest < src) { - char tmp = *src; - - *src-- = *dest; - *dest++ = tmp; - } - return objPtr; + Tcl_UniChar ch = 0; + objPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(objPtr, stringPtr->numChars); } - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (stringPtr->numChars <= 1) { - return objPtr; + ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars); } - /* - * Reverse the Unicode rep. - */ - - if (Tcl_IsShared(objPtr)) { - Tcl_UniChar ch = 0; - - /* - * Create a non-empty, pure unicode value, so we can coax - * Tcl_SetObjLength into growing the unicode rep buffer. - */ + if (objPtr->bytes) { + int numChars = stringPtr->numChars; + int numBytes = objPtr->length; + char *to, *from = objPtr->bytes; - resultPtr = Tcl_NewUnicodeObj(&ch, 1); - Tcl_SetObjLength(resultPtr, stringPtr->numChars); - udest = Tcl_GetUnicode(resultPtr); - usrc = stringPtr->unicode + stringPtr->numChars - 1; - while (usrc >= stringPtr->unicode) { - *udest++ = *usrc--; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewObj(); + Tcl_SetObjLength(objPtr, numBytes); } - return resultPtr; - } + to = objPtr->bytes; - /* - * Unshared. Reverse objPtr->bytes in place. - */ + if (numChars < numBytes) { + /* + * Either numChars == -1 and we don't know how many chars are + * represented by objPtr->bytes and we need Pass 1 just in case, + * or numChars >= 0 and we know we have fewer chars than bytes, + * so we know there's a multibyte character needing Pass 1. + * + * Pass 1. Reverse the bytes of each multi-byte character. + */ + int charCount = 0; + int bytesLeft = numBytes; - udest = stringPtr->unicode; - usrc = udest + stringPtr->numChars - 1; - while (udest < usrc) { - Tcl_UniChar tmp = *usrc; + while (bytesLeft) { + /* + * NOTE: We know that the from buffer is NUL-terminated. + * It's part of the contract for objPtr->bytes values. + * Thus, we can skip calling Tcl_UtfCharComplete() here. + */ + Tcl_UniChar ch = 0; + int bytesInChar = Tcl_UtfToUniChar(from, &ch); + + ReverseBytes((unsigned char *)to, (unsigned char *)from, + bytesInChar); + to += bytesInChar; + from += bytesInChar; + bytesLeft -= bytesInChar; + charCount++; + } - *usrc-- = *udest; - *udest++ = tmp; + from = to = objPtr->bytes; + stringPtr->numChars = charCount; + } + /* Pass 2. Reverse all the bytes. */ + ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); } - TclInvalidateStringRep(objPtr); - stringPtr->allocated = 0; return objPtr; } diff --git a/tests/string.test b/tests/string.test index 1a62a66..e53504f 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1623,6 +1623,14 @@ test string-24.12 {string reverse command - corner case} { set y \udead string is ascii [string reverse $x$y] } 0 +test string-24.13 {string reverse command - pure bytearray} { + binary scan [string reverse [binary format H* 010203]] H* x + set x +} 030201 +test string-24.14 {string reverse command - pure bytearray} { + binary scan [tcl::string::reverse [binary format H* 010203]] H* x + set x +} 030201 test string-25.1 {string is list} { string is list {a b c} -- cgit v0.12