From 1c52941e5f67f7f374dbc110234bf18a7ac4844a Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Thu, 1 May 2014 07:38:27 +0000
Subject: Fix more corner-cases like
 [0e92c404f19ede5b2eb06e6db27647d3138cc56|0e92c404f1]: The only place where a
 type of &tclByteArrayType can be trusted is when determining its length,
 because the character length of a (UTF-8) string is always equal to the byte
 length of the byte array.

---
 generic/tclCmdMZ.c   | 12 ++++++------
 generic/tclExecute.c | 10 +++++-----
 generic/tclInt.h     | 18 ++++++++++++++++++
 generic/tclUtil.c    |  2 +-
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d106f53..70943e9 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1345,7 +1345,7 @@ StringIndexCmd(
      * Unicode string rep to get the index'th char.
      */
 
-    if (objv[1]->typePtr == &tclByteArrayType) {
+    if (TclIsPureByteArray(objv[1])) {
 	const unsigned char *string =
 		Tcl_GetByteArrayFromObj(objv[1], &length);
 
@@ -2086,7 +2086,7 @@ StringRangeCmd(
      * Unicode string rep to get the range.
      */
 
-    if (objv[1]->typePtr == &tclByteArrayType && (objv[1]->bytes==NULL)) {
+    if (TclIsPureByteArray(objv[1])) {
 	string = Tcl_GetByteArrayFromObj(objv[1], &length);
 	length--;
     } else {
@@ -2537,8 +2537,8 @@ StringEqualCmd(
 	return TCL_OK;
     }
 
-    if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
-	    objv[1]->typePtr == &tclByteArrayType) {
+    if (!nocase && TclIsPureByteArray(objv[0]) &&
+	    TclIsPureByteArray(objv[1])) {
 	/*
 	 * Use binary versions of comparisons since that won't cause undue
 	 * type conversions and it is much faster. Only do this if we're
@@ -2684,8 +2684,8 @@ StringCmpCmd(
 	return TCL_OK;
     }
 
-    if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
-	    objv[1]->typePtr == &tclByteArrayType) {
+    if (!nocase && TclIsPureByteArray(objv[0]) &&
+	    TclIsPureByteArray(objv[1])) {
 	/*
 	 * Use binary versions of comparisons since that won't cause undue
 	 * type conversions and it is much faster. Only do this if we're
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 2e396e8..c4f9836 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -4235,8 +4235,8 @@ TclExecuteByteCode(
 	     */
 
 	    iResult = s1len = s2len = 0;
-	} else if ((valuePtr->typePtr == &tclByteArrayType)
-		&& (value2Ptr->typePtr == &tclByteArrayType)) {
+	} else if (TclIsPureByteArray(valuePtr)
+		&& TclIsPureByteArray(value2Ptr)) {
 	    s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
 	    s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
 	    iResult = memcmp(s1, s2,
@@ -4354,7 +4354,7 @@ TclExecuteByteCode(
 	 * use the Unicode string rep to get the index'th char.
 	 */
 
-	if (valuePtr->typePtr == &tclByteArrayType) {
+	if (TclIsPureByteArray(valuePtr)) {
 	    bytes = (char *)Tcl_GetByteArrayFromObj(valuePtr, &length);
 	} else {
 	    /*
@@ -4370,7 +4370,7 @@ TclExecuteByteCode(
 	}
 
 	if ((index >= 0) && (index < length)) {
-	    if (valuePtr->typePtr == &tclByteArrayType) {
+	    if (TclIsPureByteArray(valuePtr)) {
 		objResultPtr = Tcl_NewByteArrayObj((unsigned char *)
 			(&bytes[index]), 1);
 	    } else if (valuePtr->bytes && length == valuePtr->length) {
@@ -4422,7 +4422,7 @@ TclExecuteByteCode(
 	    ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2);
 	    match = TclUniCharMatch(ustring1, length1, ustring2, length2,
 		    nocase);
-	} else if ((valuePtr->typePtr == &tclByteArrayType) && !nocase) {
+	} else if (TclIsPureByteArray(valuePtr) && !nocase) {
 	    unsigned char *string1, *string2;
 	    int length1, length2;
 
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 00b246b..2353450 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3668,6 +3668,24 @@ MODULE_SCOPE void	TclDbInitNewObj(Tcl_Obj *objPtr, CONST char *file,
 
 /*
  *----------------------------------------------------------------
+ * Macro that encapsulates the logic that determines when it is safe to
+ * interpret a string as a byte array directly. In summary, the object must be
+ * a byte array and must not have a string representation (as the operations
+ * that it is used in are defined on strings, not byte arrays). Theoretically
+ * it is possible to also be efficient in the case where the object's bytes
+ * field is filled by generation from the byte array (c.f. list canonicality)
+ * but we don't do that at the moment since this is purely about efficiency.
+ * The ANSI C "prototype" for this macro is:
+ *
+ * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
+ *----------------------------------------------------------------
+ */
+
+#define TclIsPureByteArray(objPtr) \
+	(((objPtr)->typePtr==&tclByteArrayType) && ((objPtr)->bytes==NULL))
+
+/*
+ *----------------------------------------------------------------
  * Macro used by the Tcl core to compare Unicode strings. On big-endian
  * systems we can use the more efficient memcmp, but this would not be
  * lexically correct on little-endian systems. The ANSI C "prototype" for
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 5f4cdae..8c6adfe 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -2334,7 +2334,7 @@ TclStringMatchObj(
 	udata = Tcl_GetUnicodeFromObj(strObj, &length);
 	uptn  = Tcl_GetUnicodeFromObj(ptnObj, &plen);
 	match = TclUniCharMatch(udata, length, uptn, plen, flags);
-    } else if ((strObj->typePtr == &tclByteArrayType) && !flags) {
+    } else if (TclIsPureByteArray(strObj) && !flags) {
 	unsigned char *data, *ptn;
 
 	data = Tcl_GetByteArrayFromObj(strObj, &length);
-- 
cgit v0.12