From f04c7d313f1392d0e474bbb3c40af1d69791f770 Mon Sep 17 00:00:00 2001 From: dgp Date: Thu, 25 Aug 2011 16:26:37 +0000 Subject: 3396731 Another rewrite of TclStringObjReverse() to make it adopt the nijtmans approach for reversing the objPtr->bytes rep without losing performance. --- generic/tclStringObj.c | 176 +++++++++++++++++++++++++++++-------------------- tests/string.test | 8 +++ 2 files changed, 114 insertions(+), 70 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index ab62359..27480c5 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2653,99 +2653,135 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ +void +ReverseBytes( + unsigned char *to, /* Copy bytes into here... */ + unsigned char *from, /* ...from here... */ + int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + if (to == from) { + /* Reversing in place */ + from += count - 1; + while (to < from) { + unsigned char c = *from; + *from-- = *to; + *to++ = c; + } + } else { + from += count - 1; + while (count--) { + *to++ = *from--; + } + } +} + +void +ReverseUniChars( + Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */ + Tcl_UniChar *from, /* ...from here... */ + unsigned int count) /* Until this many are copied, */ + /* reversing as you go. */ +{ + if (to == from) { + /* Reversing in place */ + from += count - 1; + while (to < from) { + Tcl_UniChar c = *from; + *from-- = *to; + *to++ = c; + } + } else { + from += count - 1; + while (count--) { + *to++ = *from--; + } + } +} + Tcl_Obj * TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; - char *src = NULL, *dest = NULL; - Tcl_UniChar *usrc = NULL, *udest = NULL; - Tcl_Obj *resultPtr = NULL; - SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + if (TclIsPureByteArray(objPtr)) { + int numBytes; + unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - if (stringPtr->hasUnicode == 0) { - if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); - } - if (stringPtr->numChars <= 1) { - return objPtr; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewByteArrayObj(NULL, numBytes); } - if (stringPtr->numChars == objPtr->length) { - /* - * All one-byte chars. Reverse in objPtr->bytes. - */ + ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes); + return objPtr; + } - if (Tcl_IsShared(objPtr)) { - resultPtr = Tcl_NewObj(); - Tcl_SetObjLength(resultPtr, objPtr->length); - dest = TclGetString(resultPtr); - src = objPtr->bytes + objPtr->length - 1; - while (src >= objPtr->bytes) { - *dest++ = *src--; - } - return resultPtr; - } + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->hasUnicode) { + Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + if (Tcl_IsShared(objPtr)) { /* - * Unshared. Reverse objPtr->bytes in place. + * Create a non-empty, pure unicode value, so we can coax + * Tcl_SetObjLength into growing the unicode rep buffer. */ - dest = objPtr->bytes; - src = dest + objPtr->length - 1; - while (dest < src) { - char tmp = *src; - - *src-- = *dest; - *dest++ = tmp; - } - return objPtr; + Tcl_UniChar ch = 0; + objPtr = Tcl_NewUnicodeObj(&ch, 1); + Tcl_SetObjLength(objPtr, stringPtr->numChars); } - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (stringPtr->numChars <= 1) { - return objPtr; + ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars); } - /* - * Reverse the Unicode rep. - */ - - if (Tcl_IsShared(objPtr)) { - Tcl_UniChar ch = 0; - - /* - * Create a non-empty, pure unicode value, so we can coax - * Tcl_SetObjLength into growing the unicode rep buffer. - */ + if (objPtr->bytes) { + int numChars = stringPtr->numChars; + int numBytes = objPtr->length; + char *to, *from = objPtr->bytes; - resultPtr = Tcl_NewUnicodeObj(&ch, 1); - Tcl_SetObjLength(resultPtr, stringPtr->numChars); - udest = Tcl_GetUnicode(resultPtr); - usrc = stringPtr->unicode + stringPtr->numChars - 1; - while (usrc >= stringPtr->unicode) { - *udest++ = *usrc--; + if (Tcl_IsShared(objPtr)) { + objPtr = Tcl_NewObj(); + Tcl_SetObjLength(objPtr, numBytes); } - return resultPtr; - } + to = objPtr->bytes; - /* - * Unshared. Reverse objPtr->bytes in place. - */ + if (numChars < numBytes) { + /* + * Either numChars == -1 and we don't know how many chars are + * represented by objPtr->bytes and we need Pass 1 just in case, + * or numChars >= 0 and we know we have fewer chars than bytes, + * so we know there's a multibyte character needing Pass 1. + * + * Pass 1. Reverse the bytes of each multi-byte character. + */ + int charCount = 0; + int bytesLeft = numBytes; - udest = stringPtr->unicode; - usrc = udest + stringPtr->numChars - 1; - while (udest < usrc) { - Tcl_UniChar tmp = *usrc; + while (bytesLeft) { + /* + * NOTE: We know that the from buffer is NUL-terminated. + * It's part of the contract for objPtr->bytes values. + * Thus, we can skip calling Tcl_UtfCharComplete() here. + */ + Tcl_UniChar ch = 0; + int bytesInChar = Tcl_UtfToUniChar(from, &ch); + + ReverseBytes((unsigned char *)to, (unsigned char *)from, + bytesInChar); + to += bytesInChar; + from += bytesInChar; + bytesLeft -= bytesInChar; + charCount++; + } - *usrc-- = *udest; - *udest++ = tmp; + from = to = objPtr->bytes; + stringPtr->numChars = charCount; + } + /* Pass 2. Reverse all the bytes. */ + ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); } - TclInvalidateStringRep(objPtr); - stringPtr->allocated = 0; return objPtr; } diff --git a/tests/string.test b/tests/string.test index 1a62a66..e53504f 100644 --- a/tests/string.test +++ b/tests/string.test @@ -1623,6 +1623,14 @@ test string-24.12 {string reverse command - corner case} { set y \udead string is ascii [string reverse $x$y] } 0 +test string-24.13 {string reverse command - pure bytearray} { + binary scan [string reverse [binary format H* 010203]] H* x + set x +} 030201 +test string-24.14 {string reverse command - pure bytearray} { + binary scan [tcl::string::reverse [binary format H* 010203]] H* x + set x +} 030201 test string-25.1 {string is list} { string is list {a b c} -- cgit v0.12 From 65fc2758670c06dcb89d1bd829f990290c74e8c3 Mon Sep 17 00:00:00 2001 From: dgp Date: Sat, 27 Aug 2011 02:28:47 +0000 Subject: Repaired the lost performance in the copy loop hotspots. Now meets or beats the former trunk (and current trunk by magnitudes) in tclbench. --- generic/tclStringObj.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 27480c5..bccd28a 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2660,18 +2660,17 @@ ReverseBytes( int count) /* Until this many are copied, */ /* reversing as you go. */ { + unsigned char *src = from + count - 1; if (to == from) { /* Reversing in place */ - from += count - 1; - while (to < from) { - unsigned char c = *from; - *from-- = *to; + while (to < src) { + unsigned char c = *src; + *src-- = *to; *to++ = c; } } else { - from += count - 1; - while (count--) { - *to++ = *from--; + while (src >= from) { + *to++ = *src--; } } } @@ -2683,18 +2682,18 @@ ReverseUniChars( unsigned int count) /* Until this many are copied, */ /* reversing as you go. */ { + Tcl_UniChar *src = from + count - 1; if (to == from) { /* Reversing in place */ from += count - 1; - while (to < from) { - Tcl_UniChar c = *from; - *from-- = *to; + while (to < src) { + Tcl_UniChar c = *src; + *src-- = *to; *to++ = c; } } else { - from += count - 1; - while (count--) { - *to++ = *from--; + while (src >= from) { + *to++ = *src--; } } } -- cgit v0.12 From 7fa60e4fb188f417e4b968ef37085cc9c1c171e2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 29 Aug 2011 07:25:27 +0000 Subject: [3396731] inline string reverse: minor further improvements --- generic/tclStringObj.c | 54 ++++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index bccd28a..d721c47 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2653,47 +2653,24 @@ Tcl_ObjPrintf( *--------------------------------------------------------------------------- */ -void +static void ReverseBytes( unsigned char *to, /* Copy bytes into here... */ unsigned char *from, /* ...from here... */ int count) /* Until this many are copied, */ /* reversing as you go. */ { - unsigned char *src = from + count - 1; + unsigned char *src = from + count; if (to == from) { /* Reversing in place */ - while (to < src) { + while (--src > to) { unsigned char c = *src; - *src-- = *to; - *to++ = c; - } - } else { - while (src >= from) { - *to++ = *src--; - } - } -} - -void -ReverseUniChars( - Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */ - Tcl_UniChar *from, /* ...from here... */ - unsigned int count) /* Until this many are copied, */ - /* reversing as you go. */ -{ - Tcl_UniChar *src = from + count - 1; - if (to == from) { - /* Reversing in place */ - from += count - 1; - while (to < src) { - Tcl_UniChar c = *src; - *src-- = *to; + *src = *to; *to++ = c; } } else { - while (src >= from) { - *to++ = *src--; + while (--src >= from) { + *to++ = *src; } } } @@ -2703,6 +2680,7 @@ TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; + Tcl_UniChar ch; if (TclIsPureByteArray(objPtr)) { int numBytes; @@ -2720,18 +2698,31 @@ TclStringObjReverse( if (stringPtr->hasUnicode) { Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + Tcl_UniChar *src = from + stringPtr->numChars; if (Tcl_IsShared(objPtr)) { + Tcl_UniChar *to; + /* * Create a non-empty, pure unicode value, so we can coax * Tcl_SetObjLength into growing the unicode rep buffer. */ - Tcl_UniChar ch = 0; + ch = 0; objPtr = Tcl_NewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); + to = Tcl_GetUnicode(objPtr); + while (--src >= from) { + *to++ = *src; + } + } else { + /* Reversing in place */ + while (--src > from) { + ch = *src; + *src = *from; + *from++ = ch; + } } - ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars); } if (objPtr->bytes) { @@ -2763,7 +2754,6 @@ TclStringObjReverse( * It's part of the contract for objPtr->bytes values. * Thus, we can skip calling Tcl_UtfCharComplete() here. */ - Tcl_UniChar ch = 0; int bytesInChar = Tcl_UtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, -- cgit v0.12