diff options
author | dgp <dgp@users.sourceforge.net> | 2011-08-27 04:24:24 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2011-08-27 04:24:24 (GMT) |
commit | 1e29bab5af753724511fb41ab9fcf3148a3f4067 (patch) | |
tree | 93b5bb1e890187c860d1b11ee291fe09f46da457 | |
parent | 545f0cffe802c26b1779eb2f9ca6c4ade8c8c654 (diff) | |
parent | 65fc2758670c06dcb89d1bd829f990290c74e8c3 (diff) | |
download | tcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.zip tcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.tar.gz tcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.tar.bz2 |
3396731 Revise the [string reverse] implementation to operate on the
representation that comes in, avoid conversion to other reps.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | generic/tclStringObj.c | 220 | ||||
-rw-r--r-- | tests/string.test | 8 |
3 files changed, 129 insertions, 109 deletions
@@ -1,10 +1,12 @@ -2011-08-23 Don Porter <dgp@users.sourceforge.net> +2011-08-27 Don Porter <dgp@users.sourceforge.net> - * generic/tclIORChan.c: [Bug 3396948] Leak of ReflectedChannelMap. + * generic/tclStringObj.c: [RFE 3396731] Revise the [string reverse] + * tests/string.test: implementation to operate on the representation + that comes in, avoid conversion to other reps. -2011-08-23 Jan Nijtmans <nijtmans@users.sf.net> +2011-08-23 Don Porter <dgp@users.sourceforge.net> - * generic/tclStringObj.c: [FRQ 3396731] inline string reverse + * generic/tclIORChan.c: [Bug 3396948] Leak of ReflectedChannelMap. 2011-08-19 Don Porter <dgp@users.sourceforge.net> diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 9cb973e..bccd28a 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2653,125 +2653,135 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ +void +ReverseBytes( + unsigned char *to, /* Copy bytes into here... */ + unsigned char *from, /* ...from here... */ + int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + unsigned char *src = from + count - 1; + if (to == from) { + /* Reversing in place */ + while (to < src) { + unsigned char c = *src; + *src-- = *to; + *to++ = c; + } + } else { + while (src >= from) { + *to++ = *src--; + } + } +} + +void +ReverseUniChars( + Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */ + Tcl_UniChar *from, /* ...from here... */ + unsigned int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + Tcl_UniChar *src = from + count - 1; + if (to == from) { + /* Reversing in place */ + from += count - 1; + while (to < src) { + Tcl_UniChar c = *src; + *src-- = *to; + *to++ = c; + } + } else { + while (src >= from) { + *to++ = *src--; + } + } +} + Tcl_Obj * TclStringObjReverse( Tcl_Obj *objPtr) { - char *src, *dest; - Tcl_Obj *resultPtr = objPtr; - char c; - - /* Special case: Pure Unicode array */ - if ((objPtr->typePtr == &tclStringType) && !objPtr->bytes) { - String *strPtr = GET_STRING(objPtr); - if (strPtr->hasUnicode) { - String *dstStrPtr = stringAlloc(strPtr->numChars); - Tcl_UniChar *chars = strPtr->unicode; - Tcl_UniChar *dstChars = dstStrPtr->unicode + strPtr->numChars; - - resultPtr = Tcl_NewObj(); - resultPtr->bytes = NULL; - SET_STRING(resultPtr, dstStrPtr); - resultPtr->typePtr = &tclStringType; - dstStrPtr->maxChars = strPtr->numChars; - dstStrPtr->unicode[strPtr->numChars] = 0; - dstStrPtr->numChars = strPtr->numChars; - dstStrPtr->hasUnicode = 1; - dstStrPtr->allocated = 0; - - while (--dstChars >= dstStrPtr->unicode) { - *dstChars = *chars++; - } - return resultPtr; + String *stringPtr; + + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } + ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); + return objPtr; } - src = TclGetString(objPtr); - if (Tcl_IsShared(objPtr)) { - resultPtr = Tcl_NewObj(); - Tcl_SetObjLength(resultPtr, objPtr->length); - dest = TclGetString(resultPtr); - memcpy(dest, src, objPtr->length); - } else { - TclFreeIntRep(objPtr); - dest = src; - } + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); - src = dest + objPtr->length; + if (stringPtr->hasUnicode) { + Tcl_UniChar *from = Tcl_GetUnicode(objPtr); - /* Pass 1: reverse individual bytes of UTF-8 representation. */ - while (dest < src) { - Tcl_UniChar ch = 0; - switch (Tcl_UtfToUniChar(dest, &ch)) { - case 1: { - ++dest; - break; - } - case 2: { - c = dest[0]; - dest[0] = dest[1]; - dest[1] = c; - dest += 2; - break; - } - case 3: { - c = dest[0]; - dest[0] = dest[2]; - dest[2] = c; - dest += 3; - break; - } -#if TCL_UTF_MAX > 4 - case 5: { - c = dest[0]; - dest[0] = dest[4]; - dest[4] = c; - c = dest[1]; - dest[1] = dest[3]; - dest[3] = c; - dest += 5; - break; - } -#endif -#if TCL_UTF_MAX > 5 - case 6: { - c = dest[0]; - dest[0] = dest[5]; - dest[5] = c; - c = dest[1]; - dest[1] = dest[4]; - dest[4] = c; - c = dest[0]; - dest[2] = dest[3]; - dest[3] = c; - dest += 6; - break; - } -#endif - default: { -#if TCL_UTF_MAX > 3 - c = dest[0]; - dest[0] = dest[3]; - dest[3] = c; - c = dest[1]; - dest[1] = dest[2]; - dest[2] = c; - dest += 4; -#endif - break; - } + if (Tcl_IsShared(objPtr)) { + /* + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. + */ + + Tcl_UniChar ch = 0; + objPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(objPtr, stringPtr->numChars); } + ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars); } - /* Pass 2: Reverse byte string. */ - dest = TclGetString(resultPtr); + if (objPtr->bytes) { + int numChars = stringPtr->numChars; + int numBytes = objPtr->length; + char *to, *from = objPtr->bytes; + + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewObj(); + Tcl_SetObjLength(objPtr, numBytes); + } + to = objPtr->bytes; + + if (numChars < numBytes) { + /* + * Either numChars == -1 and we don't know how many chars are + * represented by objPtr->bytes and we need Pass 1 just in case, + * or numChars >= 0 and we know we have fewer chars than bytes, + * so we know there's a multibyte character needing Pass 1. + * + * Pass 1. Reverse the bytes of each multi-byte character. + */ + int charCount = 0; + int bytesLeft = numBytes; + + while (bytesLeft) { + /* + * NOTE: We know that the from buffer is NUL-terminated. + * It's part of the contract for objPtr->bytes values. + * Thus, we can skip calling Tcl_UtfCharComplete() here. + */ + Tcl_UniChar ch = 0; + int bytesInChar = Tcl_UtfToUniChar(from, &ch); + + ReverseBytes((unsigned char *)to, (unsigned char *)from, + bytesInChar); + to += bytesInChar; + from += bytesInChar; + bytesLeft -= bytesInChar; + charCount++; + } - while (dest < --src) { - c = *src; - *src = *dest; - *dest++ = c; + from = to = objPtr->bytes; + stringPtr->numChars = charCount; } - return resultPtr; + /* Pass 2. Reverse all the bytes. */ + ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); + } + + return objPtr; } /* diff --git a/tests/string.test b/tests/string.test index 92f544e..85a7372 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1626,6 +1626,14 @@ test string-24.12 {string reverse command - corner case} { test string-24.13 {string reverse command - pure Unicode string} { string reverse [string range \ubeef\udead\ubeef\udead\ubeef\udead 1 5] } \udead\ubeef\udead\ubeef\udead +test string-24.14 {string reverse command - pure bytearray} { + binary scan [string reverse [binary format H* 010203]] H* x + set x +} 030201 +test string-24.15 {string reverse command - pure bytearray} { + binary scan [tcl::string::reverse [binary format H* 010203]] H* x + set x +} 030201 test string-25.1 {string is list} { string is list {a b c} |