summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2011-08-25 16:26:37 (GMT)
committerdgp <dgp@users.sourceforge.net>2011-08-25 16:26:37 (GMT)
commita499c460ec172f0ea32ab6ae236fd5f251cf7abb (patch)
tree8ed874dc86b4e1e6a4d244dcd17c9aaee45211c3 /generic/tclStringObj.c
parent8da04cdb20496062dcda7a110668e31ab493c800 (diff)
downloadtcl-a499c460ec172f0ea32ab6ae236fd5f251cf7abb.zip
tcl-a499c460ec172f0ea32ab6ae236fd5f251cf7abb.tar.gz
tcl-a499c460ec172f0ea32ab6ae236fd5f251cf7abb.tar.bz2
3396731 Another rewrite of TclStringObjReverse() to make it adopt the
nijtmans approach for reversing the objPtr->bytes rep without losing performance.
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c176
1 files changed, 106 insertions, 70 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index ab62359..27480c5 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2653,99 +2653,135 @@ Tcl_ObjPrintf(
*---------------------------------------------------------------------------
*/
+void
+ReverseBytes(
+ unsigned char *to, /* Copy bytes into here... */
+ unsigned char *from, /* ...from here... */
+ int count) /* Until this many are copied, */
+ /* reversing as you go. */
+{
+ if (to == from) {
+ /* Reversing in place */
+ from += count - 1;
+ while (to < from) {
+ unsigned char c = *from;
+ *from-- = *to;
+ *to++ = c;
+ }
+ } else {
+ from += count - 1;
+ while (count--) {
+ *to++ = *from--;
+ }
+ }
+}
+
+void
+ReverseUniChars(
+ Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */
+ Tcl_UniChar *from, /* ...from here... */
+ unsigned int count) /* Until this many are copied, */
+ /* reversing as you go. */
+{
+ if (to == from) {
+ /* Reversing in place */
+ from += count - 1;
+ while (to < from) {
+ Tcl_UniChar c = *from;
+ *from-- = *to;
+ *to++ = c;
+ }
+ } else {
+ from += count - 1;
+ while (count--) {
+ *to++ = *from--;
+ }
+ }
+}
+
Tcl_Obj *
TclStringObjReverse(
Tcl_Obj *objPtr)
{
String *stringPtr;
- char *src = NULL, *dest = NULL;
- Tcl_UniChar *usrc = NULL, *udest = NULL;
- Tcl_Obj *resultPtr = NULL;
- SetStringFromAny(NULL, objPtr);
- stringPtr = GET_STRING(objPtr);
+ if (TclIsPureByteArray(objPtr)) {
+ int numBytes;
+ unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
- if (stringPtr->hasUnicode == 0) {
- if (stringPtr->numChars == -1) {
- TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length);
- }
- if (stringPtr->numChars <= 1) {
- return objPtr;
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
}
- if (stringPtr->numChars == objPtr->length) {
- /*
- * All one-byte chars. Reverse in objPtr->bytes.
- */
+ ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
+ return objPtr;
+ }
- if (Tcl_IsShared(objPtr)) {
- resultPtr = Tcl_NewObj();
- Tcl_SetObjLength(resultPtr, objPtr->length);
- dest = TclGetString(resultPtr);
- src = objPtr->bytes + objPtr->length - 1;
- while (src >= objPtr->bytes) {
- *dest++ = *src--;
- }
- return resultPtr;
- }
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+ if (stringPtr->hasUnicode) {
+ Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
+ if (Tcl_IsShared(objPtr)) {
/*
- * Unshared. Reverse objPtr->bytes in place.
+ * Create a non-empty, pure unicode value, so we can coax
+ * Tcl_SetObjLength into growing the unicode rep buffer.
*/
- dest = objPtr->bytes;
- src = dest + objPtr->length - 1;
- while (dest < src) {
- char tmp = *src;
-
- *src-- = *dest;
- *dest++ = tmp;
- }
- return objPtr;
+ Tcl_UniChar ch = 0;
+ objPtr = Tcl_NewUnicodeObj(&ch, 1);
+ Tcl_SetObjLength(objPtr, stringPtr->numChars);
}
- FillUnicodeRep(objPtr);
- stringPtr = GET_STRING(objPtr);
- }
- if (stringPtr->numChars <= 1) {
- return objPtr;
+ ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars);
}
- /*
- * Reverse the Unicode rep.
- */
-
- if (Tcl_IsShared(objPtr)) {
- Tcl_UniChar ch = 0;
-
- /*
- * Create a non-empty, pure unicode value, so we can coax
- * Tcl_SetObjLength into growing the unicode rep buffer.
- */
+ if (objPtr->bytes) {
+ int numChars = stringPtr->numChars;
+ int numBytes = objPtr->length;
+ char *to, *from = objPtr->bytes;
- resultPtr = Tcl_NewUnicodeObj(&ch, 1);
- Tcl_SetObjLength(resultPtr, stringPtr->numChars);
- udest = Tcl_GetUnicode(resultPtr);
- usrc = stringPtr->unicode + stringPtr->numChars - 1;
- while (usrc >= stringPtr->unicode) {
- *udest++ = *usrc--;
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewObj();
+ Tcl_SetObjLength(objPtr, numBytes);
}
- return resultPtr;
- }
+ to = objPtr->bytes;
- /*
- * Unshared. Reverse objPtr->bytes in place.
- */
+ if (numChars < numBytes) {
+ /*
+ * Either numChars == -1 and we don't know how many chars are
+ * represented by objPtr->bytes and we need Pass 1 just in case,
+ * or numChars >= 0 and we know we have fewer chars than bytes,
+ * so we know there's a multibyte character needing Pass 1.
+ *
+ * Pass 1. Reverse the bytes of each multi-byte character.
+ */
+ int charCount = 0;
+ int bytesLeft = numBytes;
- udest = stringPtr->unicode;
- usrc = udest + stringPtr->numChars - 1;
- while (udest < usrc) {
- Tcl_UniChar tmp = *usrc;
+ while (bytesLeft) {
+ /*
+ * NOTE: We know that the from buffer is NUL-terminated.
+ * It's part of the contract for objPtr->bytes values.
+ * Thus, we can skip calling Tcl_UtfCharComplete() here.
+ */
+ Tcl_UniChar ch = 0;
+ int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+
+ ReverseBytes((unsigned char *)to, (unsigned char *)from,
+ bytesInChar);
+ to += bytesInChar;
+ from += bytesInChar;
+ bytesLeft -= bytesInChar;
+ charCount++;
+ }
- *usrc-- = *udest;
- *udest++ = tmp;
+ from = to = objPtr->bytes;
+ stringPtr->numChars = charCount;
+ }
+ /* Pass 2. Reverse all the bytes. */
+ ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
}
- TclInvalidateStringRep(objPtr);
- stringPtr->allocated = 0;
return objPtr;
}