summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2014-05-01 07:38:27 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2014-05-01 07:38:27 (GMT)
commit1c52941e5f67f7f374dbc110234bf18a7ac4844a (patch)
tree4f49557b0fc34ae6f65fc299aaadf9d60bfcc691 /generic
parent7df749abdc0780eb176e6fade94388d60cd8a0ef (diff)
downloadtcl-1c52941e5f67f7f374dbc110234bf18a7ac4844a.zip
tcl-1c52941e5f67f7f374dbc110234bf18a7ac4844a.tar.gz
tcl-1c52941e5f67f7f374dbc110234bf18a7ac4844a.tar.bz2
Fix more corner-cases like [0e92c404f19ede5b2eb06e6db27647d3138cc56|0e92c404f1]: The only place where a type of &tclByteArrayType can be trusted is when determining its length, because the character length of a (UTF-8) string is always equal to the byte length of the byte array.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCmdMZ.c12
-rw-r--r--generic/tclExecute.c10
-rw-r--r--generic/tclInt.h18
-rw-r--r--generic/tclUtil.c2
4 files changed, 30 insertions, 12 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d106f53..70943e9 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1345,7 +1345,7 @@ StringIndexCmd(
* Unicode string rep to get the index'th char.
*/
- if (objv[1]->typePtr == &tclByteArrayType) {
+ if (TclIsPureByteArray(objv[1])) {
const unsigned char *string =
Tcl_GetByteArrayFromObj(objv[1], &length);
@@ -2086,7 +2086,7 @@ StringRangeCmd(
* Unicode string rep to get the range.
*/
- if (objv[1]->typePtr == &tclByteArrayType && (objv[1]->bytes==NULL)) {
+ if (TclIsPureByteArray(objv[1])) {
string = Tcl_GetByteArrayFromObj(objv[1], &length);
length--;
} else {
@@ -2537,8 +2537,8 @@ StringEqualCmd(
return TCL_OK;
}
- if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
- objv[1]->typePtr == &tclByteArrayType) {
+ if (!nocase && TclIsPureByteArray(objv[0]) &&
+ TclIsPureByteArray(objv[1])) {
/*
* Use binary versions of comparisons since that won't cause undue
* type conversions and it is much faster. Only do this if we're
@@ -2684,8 +2684,8 @@ StringCmpCmd(
return TCL_OK;
}
- if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
- objv[1]->typePtr == &tclByteArrayType) {
+ if (!nocase && TclIsPureByteArray(objv[0]) &&
+ TclIsPureByteArray(objv[1])) {
/*
* Use binary versions of comparisons since that won't cause undue
* type conversions and it is much faster. Only do this if we're
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 2e396e8..c4f9836 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -4235,8 +4235,8 @@ TclExecuteByteCode(
*/
iResult = s1len = s2len = 0;
- } else if ((valuePtr->typePtr == &tclByteArrayType)
- && (value2Ptr->typePtr == &tclByteArrayType)) {
+ } else if (TclIsPureByteArray(valuePtr)
+ && TclIsPureByteArray(value2Ptr)) {
s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
iResult = memcmp(s1, s2,
@@ -4354,7 +4354,7 @@ TclExecuteByteCode(
* use the Unicode string rep to get the index'th char.
*/
- if (valuePtr->typePtr == &tclByteArrayType) {
+ if (TclIsPureByteArray(valuePtr)) {
bytes = (char *)Tcl_GetByteArrayFromObj(valuePtr, &length);
} else {
/*
@@ -4370,7 +4370,7 @@ TclExecuteByteCode(
}
if ((index >= 0) && (index < length)) {
- if (valuePtr->typePtr == &tclByteArrayType) {
+ if (TclIsPureByteArray(valuePtr)) {
objResultPtr = Tcl_NewByteArrayObj((unsigned char *)
(&bytes[index]), 1);
} else if (valuePtr->bytes && length == valuePtr->length) {
@@ -4422,7 +4422,7 @@ TclExecuteByteCode(
ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
match = TclUniCharMatch(ustring1, length1, ustring2, length2,
nocase);
- } else if ((valuePtr->typePtr == &tclByteArrayType) && !nocase) {
+ } else if (TclIsPureByteArray(valuePtr) && !nocase) {
unsigned char *string1, *string2;
int length1, length2;
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 00b246b..2353450 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3668,6 +3668,24 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file,
/*
*----------------------------------------------------------------
+ * Macro that encapsulates the logic that determines when it is safe to
+ * interpret a string as a byte array directly. In summary, the object must be
+ * a byte array and must not have a string representation (as the operations
+ * that it is used in are defined on strings, not byte arrays). Theoretically
+ * it is possible to also be efficient in the case where the object's bytes
+ * field is filled by generation from the byte array (c.f. list canonicality)
+ * but we don't do that at the moment since this is purely about efficiency.
+ * The ANSI C "prototype" for this macro is:
+ *
+ * MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr);
+ *----------------------------------------------------------------
+ */
+
+#define TclIsPureByteArray(objPtr) \
+ (((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL))
+
+/*
+ *----------------------------------------------------------------
* Macro used by the Tcl core to compare Unicode strings. On big-endian
* systems we can use the more efficient memcmp, but this would not be
* lexically correct on little-endian systems. The ANSI C "prototype" for
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 5f4cdae..8c6adfe 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2334,7 +2334,7 @@ TclStringMatchObj(
udata = Tcl_GetUnicodeFromObj(strObj, &length);
uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen);
match = TclUniCharMatch(udata, length, uptn, plen, flags);
- } else if ((strObj->typePtr == &tclByteArrayType) && !flags) {
+ } else if (TclIsPureByteArray(strObj) && !flags) {
unsigned char *data, *ptn;
data = Tcl_GetByteArrayFromObj(strObj, &length);