From 1c52941e5f67f7f374dbc110234bf18a7ac4844a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 May 2014 07:38:27 +0000 Subject: Fix more corner-cases like [0e92c404f19ede5b2eb06e6db27647d3138cc56|0e92c404f1]: The only place where a type of &tclByteArrayType can be trusted is when determining its length, because the character length of a (UTF-8) string is always equal to the byte length of the byte array. --- generic/tclCmdMZ.c | 12 ++++++------ generic/tclExecute.c | 10 +++++----- generic/tclInt.h | 18 ++++++++++++++++++ generic/tclUtil.c | 2 +- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index d106f53..70943e9 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1345,7 +1345,7 @@ StringIndexCmd( * Unicode string rep to get the index'th char. */ - if (objv[1]->typePtr == &tclByteArrayType) { + if (TclIsPureByteArray(objv[1])) { const unsigned char *string = Tcl_GetByteArrayFromObj(objv[1], &length); @@ -2086,7 +2086,7 @@ StringRangeCmd( * Unicode string rep to get the range. */ - if (objv[1]->typePtr == &tclByteArrayType && (objv[1]->bytes==NULL)) { + if (TclIsPureByteArray(objv[1])) { string = Tcl_GetByteArrayFromObj(objv[1], &length); length--; } else { @@ -2537,8 +2537,8 @@ StringEqualCmd( return TCL_OK; } - if (!nocase && objv[0]->typePtr == &tclByteArrayType && - objv[1]->typePtr == &tclByteArrayType) { + if (!nocase && TclIsPureByteArray(objv[0]) && + TclIsPureByteArray(objv[1])) { /* * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're @@ -2684,8 +2684,8 @@ StringCmpCmd( return TCL_OK; } - if (!nocase && objv[0]->typePtr == &tclByteArrayType && - objv[1]->typePtr == &tclByteArrayType) { + if (!nocase && TclIsPureByteArray(objv[0]) && + TclIsPureByteArray(objv[1])) { /* * Use binary versions of comparisons since that won't cause undue * type conversions and it is much faster. Only do this if we're diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 2e396e8..c4f9836 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -4235,8 +4235,8 @@ TclExecuteByteCode( */ iResult = s1len = s2len = 0; - } else if ((valuePtr->typePtr == &tclByteArrayType) - && (value2Ptr->typePtr == &tclByteArrayType)) { + } else if (TclIsPureByteArray(valuePtr) + && TclIsPureByteArray(value2Ptr)) { s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); iResult = memcmp(s1, s2, @@ -4354,7 +4354,7 @@ TclExecuteByteCode( * use the Unicode string rep to get the index'th char. */ - if (valuePtr->typePtr == &tclByteArrayType) { + if (TclIsPureByteArray(valuePtr)) { bytes = (char *)Tcl_GetByteArrayFromObj(valuePtr, &length); } else { /* @@ -4370,7 +4370,7 @@ TclExecuteByteCode( } if ((index >= 0) && (index < length)) { - if (valuePtr->typePtr == &tclByteArrayType) { + if (TclIsPureByteArray(valuePtr)) { objResultPtr = Tcl_NewByteArrayObj((unsigned char *) (&bytes[index]), 1); } else if (valuePtr->bytes && length == valuePtr->length) { @@ -4422,7 +4422,7 @@ TclExecuteByteCode( ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); match = TclUniCharMatch(ustring1, length1, ustring2, length2, nocase); - } else if ((valuePtr->typePtr == &tclByteArrayType) && !nocase) { + } else if (TclIsPureByteArray(valuePtr) && !nocase) { unsigned char *string1, *string2; int length1, length2; diff --git a/generic/tclInt.h b/generic/tclInt.h index 00b246b..2353450 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3668,6 +3668,24 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file, /* *---------------------------------------------------------------- + * Macro that encapsulates the logic that determines when it is safe to + * interpret a string as a byte array directly. In summary, the object must be + * a byte array and must not have a string representation (as the operations + * that it is used in are defined on strings, not byte arrays). Theoretically + * it is possible to also be efficient in the case where the object's bytes + * field is filled by generation from the byte array (c.f. list canonicality) + * but we don't do that at the moment since this is purely about efficiency. + * The ANSI C "prototype" for this macro is: + * + * MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr); + *---------------------------------------------------------------- + */ + +#define TclIsPureByteArray(objPtr) \ + (((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL)) + +/* + *---------------------------------------------------------------- * Macro used by the Tcl core to compare Unicode strings. On big-endian * systems we can use the more efficient memcmp, but this would not be * lexically correct on little-endian systems. The ANSI C "prototype" for diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 5f4cdae..8c6adfe 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2334,7 +2334,7 @@ TclStringMatchObj( udata = Tcl_GetUnicodeFromObj(strObj, &length); uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); - } else if ((strObj->typePtr == &tclByteArrayType) && !flags) { + } else if (TclIsPureByteArray(strObj) && !flags) { unsigned char *data, *ptn; data = Tcl_GetByteArrayFromObj(strObj, &length); -- cgit v0.12