summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2011-08-27 04:24:24 (GMT)
committerdgp <dgp@users.sourceforge.net>2011-08-27 04:24:24 (GMT)
commit1e29bab5af753724511fb41ab9fcf3148a3f4067 (patch)
tree93b5bb1e890187c860d1b11ee291fe09f46da457
parent545f0cffe802c26b1779eb2f9ca6c4ade8c8c654 (diff)
parent65fc2758670c06dcb89d1bd829f990290c74e8c3 (diff)
downloadtcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.zip
tcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.tar.gz
tcl-1e29bab5af753724511fb41ab9fcf3148a3f4067.tar.bz2
3396731 Revise the [string reverse] implementation to operate on the
representation that comes in, avoid conversion to other reps.
-rw-r--r--ChangeLog10
-rw-r--r--generic/tclStringObj.c220
-rw-r--r--tests/string.test8
3 files changed, 129 insertions, 109 deletions
diff --git a/ChangeLog b/ChangeLog
index bc323fa..67572ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,10 +1,12 @@
-2011-08-23 Don Porter <dgp@users.sourceforge.net>
+2011-08-27 Don Porter <dgp@users.sourceforge.net>
- * generic/tclIORChan.c: [Bug 3396948] Leak of ReflectedChannelMap.
+ * generic/tclStringObj.c: [RFE 3396731] Revise the [string reverse]
+ * tests/string.test: implementation to operate on the representation
+ that comes in, avoid conversion to other reps.
-2011-08-23 Jan Nijtmans <nijtmans@users.sf.net>
+2011-08-23 Don Porter <dgp@users.sourceforge.net>
- * generic/tclStringObj.c: [FRQ 3396731] inline string reverse
+ * generic/tclIORChan.c: [Bug 3396948] Leak of ReflectedChannelMap.
2011-08-19 Don Porter <dgp@users.sourceforge.net>
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 9cb973e..bccd28a 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2653,125 +2653,135 @@ Tcl_ObjPrintf(
*---------------------------------------------------------------------------
*/
+void
+ReverseBytes(
+ unsigned char *to, /* Copy bytes into here... */
+ unsigned char *from, /* ...from here... */
+ int count) /* Until this many are copied, */
+ /* reversing as you go. */
+{
+ unsigned char *src = from + count - 1;
+ if (to == from) {
+ /* Reversing in place */
+ while (to < src) {
+ unsigned char c = *src;
+ *src-- = *to;
+ *to++ = c;
+ }
+ } else {
+ while (src >= from) {
+ *to++ = *src--;
+ }
+ }
+}
+
+void
+ReverseUniChars(
+ Tcl_UniChar *to, /* Copy Tcl_UniChars into here... */
+ Tcl_UniChar *from, /* ...from here... */
+ unsigned int count) /* Until this many are copied, */
+ /* reversing as you go. */
+{
+ Tcl_UniChar *src = from + count - 1;
+ if (to == from) {
+ /* Reversing in place */
+ from += count - 1;
+ while (to < src) {
+ Tcl_UniChar c = *src;
+ *src-- = *to;
+ *to++ = c;
+ }
+ } else {
+ while (src >= from) {
+ *to++ = *src--;
+ }
+ }
+}
+
Tcl_Obj *
TclStringObjReverse(
Tcl_Obj *objPtr)
{
- char *src, *dest;
- Tcl_Obj *resultPtr = objPtr;
- char c;
-
- /* Special case: Pure Unicode array */
- if ((objPtr->typePtr == &tclStringType) && !objPtr->bytes) {
- String *strPtr = GET_STRING(objPtr);
- if (strPtr->hasUnicode) {
- String *dstStrPtr = stringAlloc(strPtr->numChars);
- Tcl_UniChar *chars = strPtr->unicode;
- Tcl_UniChar *dstChars = dstStrPtr->unicode + strPtr->numChars;
-
- resultPtr = Tcl_NewObj();
- resultPtr->bytes = NULL;
- SET_STRING(resultPtr, dstStrPtr);
- resultPtr->typePtr = &tclStringType;
- dstStrPtr->maxChars = strPtr->numChars;
- dstStrPtr->unicode[strPtr->numChars] = 0;
- dstStrPtr->numChars = strPtr->numChars;
- dstStrPtr->hasUnicode = 1;
- dstStrPtr->allocated = 0;
-
- while (--dstChars >= dstStrPtr->unicode) {
- *dstChars = *chars++;
- }
- return resultPtr;
+ String *stringPtr;
+
+ if (TclIsPureByteArray(objPtr)) {
+ int numBytes;
+ unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes);
+
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewByteArrayObj(NULL, numBytes);
}
+ ReverseBytes(Tcl_GetByteArrayFromObj(objPtr, NULL), from, numBytes);
+ return objPtr;
}
- src = TclGetString(objPtr);
- if (Tcl_IsShared(objPtr)) {
- resultPtr = Tcl_NewObj();
- Tcl_SetObjLength(resultPtr, objPtr->length);
- dest = TclGetString(resultPtr);
- memcpy(dest, src, objPtr->length);
- } else {
- TclFreeIntRep(objPtr);
- dest = src;
- }
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
- src = dest + objPtr->length;
+ if (stringPtr->hasUnicode) {
+ Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
- /* Pass 1: reverse individual bytes of UTF-8 representation. */
- while (dest < src) {
- Tcl_UniChar ch = 0;
- switch (Tcl_UtfToUniChar(dest, &ch)) {
- case 1: {
- ++dest;
- break;
- }
- case 2: {
- c = dest[0];
- dest[0] = dest[1];
- dest[1] = c;
- dest += 2;
- break;
- }
- case 3: {
- c = dest[0];
- dest[0] = dest[2];
- dest[2] = c;
- dest += 3;
- break;
- }
-#if TCL_UTF_MAX > 4
- case 5: {
- c = dest[0];
- dest[0] = dest[4];
- dest[4] = c;
- c = dest[1];
- dest[1] = dest[3];
- dest[3] = c;
- dest += 5;
- break;
- }
-#endif
-#if TCL_UTF_MAX > 5
- case 6: {
- c = dest[0];
- dest[0] = dest[5];
- dest[5] = c;
- c = dest[1];
- dest[1] = dest[4];
- dest[4] = c;
- c = dest[0];
- dest[2] = dest[3];
- dest[3] = c;
- dest += 6;
- break;
- }
-#endif
- default: {
-#if TCL_UTF_MAX > 3
- c = dest[0];
- dest[0] = dest[3];
- dest[3] = c;
- c = dest[1];
- dest[1] = dest[2];
- dest[2] = c;
- dest += 4;
-#endif
- break;
- }
+ if (Tcl_IsShared(objPtr)) {
+ /*
+ * Create a non-empty, pure unicode value, so we can coax
+ * Tcl_SetObjLength into growing the unicode rep buffer.
+ */
+
+ Tcl_UniChar ch = 0;
+ objPtr = Tcl_NewUnicodeObj(&ch, 1);
+ Tcl_SetObjLength(objPtr, stringPtr->numChars);
}
+ ReverseUniChars(Tcl_GetUnicode(objPtr), from, stringPtr->numChars);
}
- /* Pass 2: Reverse byte string. */
- dest = TclGetString(resultPtr);
+ if (objPtr->bytes) {
+ int numChars = stringPtr->numChars;
+ int numBytes = objPtr->length;
+ char *to, *from = objPtr->bytes;
+
+ if (Tcl_IsShared(objPtr)) {
+ objPtr = Tcl_NewObj();
+ Tcl_SetObjLength(objPtr, numBytes);
+ }
+ to = objPtr->bytes;
+
+ if (numChars < numBytes) {
+ /*
+ * Either numChars == -1 and we don't know how many chars are
+ * represented by objPtr->bytes and we need Pass 1 just in case,
+ * or numChars >= 0 and we know we have fewer chars than bytes,
+ * so we know there's a multibyte character needing Pass 1.
+ *
+ * Pass 1. Reverse the bytes of each multi-byte character.
+ */
+ int charCount = 0;
+ int bytesLeft = numBytes;
+
+ while (bytesLeft) {
+ /*
+ * NOTE: We know that the from buffer is NUL-terminated.
+ * It's part of the contract for objPtr->bytes values.
+ * Thus, we can skip calling Tcl_UtfCharComplete() here.
+ */
+ Tcl_UniChar ch = 0;
+ int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+
+ ReverseBytes((unsigned char *)to, (unsigned char *)from,
+ bytesInChar);
+ to += bytesInChar;
+ from += bytesInChar;
+ bytesLeft -= bytesInChar;
+ charCount++;
+ }
- while (dest < --src) {
- c = *src;
- *src = *dest;
- *dest++ = c;
+ from = to = objPtr->bytes;
+ stringPtr->numChars = charCount;
}
- return resultPtr;
+ /* Pass 2. Reverse all the bytes. */
+ ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
+ }
+
+ return objPtr;
}
/*
diff --git a/tests/string.test b/tests/string.test
index 92f544e..85a7372 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -1626,6 +1626,14 @@ test string-24.12 {string reverse command - corner case} {
test string-24.13 {string reverse command - pure Unicode string} {
string reverse [string range \ubeef\udead\ubeef\udead\ubeef\udead 1 5]
} \udead\ubeef\udead\ubeef\udead
+test string-24.14 {string reverse command - pure bytearray} {
+ binary scan [string reverse [binary format H* 010203]] H* x
+ set x
+} 030201
+test string-24.15 {string reverse command - pure bytearray} {
+ binary scan [tcl::string::reverse [binary format H* 010203]] H* x
+ set x
+} 030201
test string-25.1 {string is list} {
string is list {a b c}