From 7933720835766c9a797749bd47fd1501ae6871d2 Mon Sep 17 00:00:00 2001 From: dkf Date: Thu, 5 Feb 2009 11:57:25 +0000 Subject: More/better/cleaner handling of the bytearray special casing for string ops. --- ChangeLog | 8 +++-- generic/tclStringObj.c | 96 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index 65a4067..265120d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ appending of one bytearray to another, which can be extremely rapid. Part of scheme to address [Bug 1665628] by making the basic string operations more efficient on byte arrays. + (Tcl_GetCharLength, Tcl_GetUniChar, Tcl_GetRange): More special casing + work for bytearrays. 2009-02-04 Don Porter @@ -16,9 +18,9 @@ 2009-02-03 Jan Nijtmans - * macosx/tclMacOSXFCmd.c - eliminate some unnessary type casts - * unix/tclLoadDyld.c - some internal const decorations - * unix/tclUnixCompat.c - spacing + * macosx/tclMacOSXFCmd.c: Eliminate some unnessary type casts + * unix/tclLoadDyld.c: some internal const decorations + * unix/tclUnixCompat.c: spacing * unix/tclUnixFCmd.c * unix/tclUnixFile.c * win/tclWinDde.c diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index b7961b8..e7a8880 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -33,7 +33,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.85 2009/02/05 01:21:59 dkf Exp $ */ + * RCS: @(#) $Id: tclStringObj.c,v 1.86 2009/02/05 11:57:26 dkf Exp $ */ #include "tclInt.h" #include "tommath.h" @@ -135,6 +135,19 @@ typedef struct String { ((objPtr)->internalRep.otherValuePtr = (void *) (stringPtr)) /* + * Macro that encapsulates the logic that determines when it is safe to + * interpret a string as a byte array directly. In summary, the object must be + * a byte array and must not have a string representation (as the operations + * that it is used in are defined on strings, not byte arrays). Theoretically + * it is possible to also be efficient in the case where the object's bytes + * field is filled by generation from the byte array (c.f. list canonicality) + * but we don't do that at the moment since this is purely about efficiency. + */ + +#define IS_PURE_BYTE_ARRAY(objPtr) \ + (((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL)) + +/* * TCL STRING GROWTH ALGORITHM * * When growing strings (during an append, for example), the following growth @@ -357,6 +370,23 @@ Tcl_GetCharLength( { String *stringPtr; + /* + * Optimize the case where we're really dealing with a bytearray object + * without string representation; we don't need to convert to a string to + * perform the get-length operation. + */ + + if (IS_PURE_BYTE_ARRAY(objPtr)) { + int length; + + (void) Tcl_GetByteArrayFromObj(objPtr, &length); + return length; + } + + /* + * OK, need to work with the object as a string. + */ + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); @@ -442,6 +472,22 @@ Tcl_GetUniChar( Tcl_UniChar unichar; String *stringPtr; + /* + * Optimize the case where we're really dealing with a bytearray object + * without string representation; we don't need to convert to a string to + * perform the indexing operation. + */ + + if (IS_PURE_BYTE_ARRAY(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + + return bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); @@ -609,6 +655,22 @@ Tcl_GetRange( Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ String *stringPtr; + /* + * Optimize the case where we're really dealing with a bytearray object + * without string representation; we don't need to convert to a string to + * perform the substring operation. + */ + + if (IS_PURE_BYTE_ARRAY(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); + + return Tcl_NewByteArrayObj(bytes+first, last-first+1); + } + + /* + * OK, need to work with the object as a string. + */ + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); @@ -637,7 +699,7 @@ Tcl_GetRange( * the specified range of chars. */ - newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); + newObjPtr = Tcl_NewStringObj(str+first, last-first+1); /* * Since we know the new string only has 1-byte chars, we can set it's @@ -742,11 +804,12 @@ Tcl_SetObjLength( if (length < 0) { /* - * Setting to a negative length is nonsense. This is probably the + * Setting to a negative length is nonsense. This is probably the * result of overflowing the signed integer range. */ - Tcl_Panic( "Tcl_SetObjLength: negative length requested: " - "%d (integer overflow?)", length); + + Tcl_Panic("Tcl_SetObjLength: negative length requested: " + "%d (integer overflow?)", length); } if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_SetObjLength"); @@ -1218,19 +1281,19 @@ Tcl_AppendObjToObj( /* * Handle append of one bytearray object to another as a special case. - * Note that we only do this when the object being written doesn't have a - * string rep; if it did, then appending the byte arrays together could - * well lose information; this is a special-case optimization only. + * Note that we only do this when the objects don't have string reps; if + * it did, then appending the byte arrays together could well lose + * information; this is a special-case optimization only. */ - if (objPtr->typePtr == &tclByteArrayType && objPtr->bytes == NULL - && appendObjPtr->typePtr == &tclByteArrayType) { + if (IS_PURE_BYTE_ARRAY(objPtr) && IS_PURE_BYTE_ARRAY(appendObjPtr)) { unsigned char *bytesDst, *bytesSrc; int lengthSrc, lengthTotal; /* - * Note that we do not assume that objPtr and appendObjPtr must be - * distinct! + * We do not assume that objPtr and appendObjPtr must be distinct! + * This makes this code a bit more complex than it otherwise would be, + * but in turn makes it much safer. */ (void) Tcl_GetByteArrayFromObj(objPtr, &length); @@ -1245,6 +1308,10 @@ Tcl_AppendObjToObj( return; } + /* + * Must append as strings. + */ + SetStringFromAny(NULL, objPtr); /* @@ -2054,8 +2121,9 @@ Tcl_AppendFormatToObj( allocSegment = 1; Tcl_IncrRefCount(segment); - if ((isNegative || gotPlus || gotSpace) && (useBig || (ch == 'd'))) { - Tcl_AppendToObj(segment, (isNegative ? "-" : gotPlus ? "+" : " "), 1); + if ((isNegative || gotPlus || gotSpace) && (useBig || ch=='d')) { + Tcl_AppendToObj(segment, + (isNegative ? "-" : gotPlus ? "+" : " "), 1); } if (gotHash) { -- cgit v0.12