summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2009-02-05 11:57:25 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2009-02-05 11:57:25 (GMT)
commit7933720835766c9a797749bd47fd1501ae6871d2 (patch)
treec3c9843cb1f2031ef7906393aac0148a811807d4
parent1508a4647865986ad56b7f73c13f7829e1a23c3f (diff)
downloadtcl-7933720835766c9a797749bd47fd1501ae6871d2.zip
tcl-7933720835766c9a797749bd47fd1501ae6871d2.tar.gz
tcl-7933720835766c9a797749bd47fd1501ae6871d2.tar.bz2
More/better/cleaner handling of the bytearray special casing for string ops.
-rw-r--r--ChangeLog8
-rw-r--r--generic/tclStringObj.c96
2 files changed, 87 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index 65a4067..265120d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,8 @@
appending of one bytearray to another, which can be extremely rapid.
Part of scheme to address [Bug 1665628] by making the basic string
operations more efficient on byte arrays.
+ (Tcl_GetCharLength, Tcl_GetUniChar, Tcl_GetRange): More special casing
+ work for bytearrays.
2009-02-04 Don Porter <dgp@users.sourceforge.net>
@@ -16,9 +18,9 @@
2009-02-03 Jan Nijtmans <nijtmans@users.sf.net>
- * macosx/tclMacOSXFCmd.c - eliminate some unnessary type casts
- * unix/tclLoadDyld.c - some internal const decorations
- * unix/tclUnixCompat.c - spacing
+ * macosx/tclMacOSXFCmd.c: Eliminate some unnessary type casts
+ * unix/tclLoadDyld.c: some internal const decorations
+ * unix/tclUnixCompat.c: spacing
* unix/tclUnixFCmd.c
* unix/tclUnixFile.c
* win/tclWinDde.c
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index b7961b8..e7a8880 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -33,7 +33,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStringObj.c,v 1.85 2009/02/05 01:21:59 dkf Exp $ */
+ * RCS: @(#) $Id: tclStringObj.c,v 1.86 2009/02/05 11:57:26 dkf Exp $ */
#include "tclInt.h"
#include "tommath.h"
@@ -135,6 +135,19 @@ typedef struct String {
((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr))
/*
+ * Macro that encapsulates the logic that determines when it is safe to
+ * interpret a string as a byte array directly. In summary, the object must be
+ * a byte array and must not have a string representation (as the operations
+ * that it is used in are defined on strings, not byte arrays). Theoretically
+ * it is possible to also be efficient in the case where the object's bytes
+ * field is filled by generation from the byte array (c.f. list canonicality)
+ * but we don't do that at the moment since this is purely about efficiency.
+ */
+
+#define IS_PURE_BYTE_ARRAY(objPtr) \
+ (((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL))
+
+/*
* TCL STRING GROWTH ALGORITHM
*
* When growing strings (during an append, for example), the following growth
@@ -357,6 +370,23 @@ Tcl_GetCharLength(
{
String *stringPtr;
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * without string representation; we don't need to convert to a string to
+ * perform the get-length operation.
+ */
+
+ if (IS_PURE_BYTE_ARRAY(objPtr)) {
+ int length;
+
+ (void) Tcl_GetByteArrayFromObj(objPtr, &length);
+ return length;
+ }
+
+ /*
+ * OK, need to work with the object as a string.
+ */
+
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
@@ -442,6 +472,22 @@ Tcl_GetUniChar(
Tcl_UniChar unichar;
String *stringPtr;
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * without string representation; we don't need to convert to a string to
+ * perform the indexing operation.
+ */
+
+ if (IS_PURE_BYTE_ARRAY(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+
+ return bytes[index];
+ }
+
+ /*
+ * OK, need to work with the object as a string.
+ */
+
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
@@ -609,6 +655,22 @@ Tcl_GetRange(
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
String *stringPtr;
+ /*
+ * Optimize the case where we're really dealing with a bytearray object
+ * without string representation; we don't need to convert to a string to
+ * perform the substring operation.
+ */
+
+ if (IS_PURE_BYTE_ARRAY(objPtr)) {
+ unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
+
+ return Tcl_NewByteArrayObj(bytes+first, last-first+1);
+ }
+
+ /*
+ * OK, need to work with the object as a string.
+ */
+
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
@@ -637,7 +699,7 @@ Tcl_GetRange(
* the specified range of chars.
*/
- newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
+ newObjPtr = Tcl_NewStringObj(str+first, last-first+1);
/*
* Since we know the new string only has 1-byte chars, we can set it's
@@ -742,11 +804,12 @@ Tcl_SetObjLength(
if (length < 0) {
/*
- * Setting to a negative length is nonsense. This is probably the
+ * Setting to a negative length is nonsense. This is probably the
* result of overflowing the signed integer range.
*/
- Tcl_Panic( "Tcl_SetObjLength: negative length requested: "
- "%d (integer overflow?)", length);
+
+ Tcl_Panic("Tcl_SetObjLength: negative length requested: "
+ "%d (integer overflow?)", length);
}
if (Tcl_IsShared(objPtr)) {
Tcl_Panic("%s called with shared object", "Tcl_SetObjLength");
@@ -1218,19 +1281,19 @@ Tcl_AppendObjToObj(
/*
* Handle append of one bytearray object to another as a special case.
- * Note that we only do this when the object being written doesn't have a
- * string rep; if it did, then appending the byte arrays together could
- * well lose information; this is a special-case optimization only.
+ * Note that we only do this when the objects don't have string reps; if
+ * it did, then appending the byte arrays together could well lose
+ * information; this is a special-case optimization only.
*/
- if (objPtr->typePtr == &tclByteArrayType && objPtr->bytes == NULL
- && appendObjPtr->typePtr == &tclByteArrayType) {
+ if (IS_PURE_BYTE_ARRAY(objPtr) && IS_PURE_BYTE_ARRAY(appendObjPtr)) {
unsigned char *bytesDst, *bytesSrc;
int lengthSrc, lengthTotal;
/*
- * Note that we do not assume that objPtr and appendObjPtr must be
- * distinct!
+ * We do not assume that objPtr and appendObjPtr must be distinct!
+ * This makes this code a bit more complex than it otherwise would be,
+ * but in turn makes it much safer.
*/
(void) Tcl_GetByteArrayFromObj(objPtr, &length);
@@ -1245,6 +1308,10 @@ Tcl_AppendObjToObj(
return;
}
+ /*
+ * Must append as strings.
+ */
+
SetStringFromAny(NULL, objPtr);
/*
@@ -2054,8 +2121,9 @@ Tcl_AppendFormatToObj(
allocSegment = 1;
Tcl_IncrRefCount(segment);
- if ((isNegative || gotPlus || gotSpace) && (useBig || (ch == 'd'))) {
- Tcl_AppendToObj(segment, (isNegative ? "-" : gotPlus ? "+" : " "), 1);
+ if ((isNegative || gotPlus || gotSpace) && (useBig || ch=='d')) {
+ Tcl_AppendToObj(segment,
+ (isNegative ? "-" : gotPlus ? "+" : " "), 1);
}
if (gotHash) {