* generic/tclStringObj.c: Revised ObjPrintfVA to take care

* generic/tclParseExpr.c: to copy only whole characters when doing %s formatting. This relieves callers of TclObjPrintf() and TclFormatToErrorInfo() from needing to fix arguments to character boundaries. Tcl_ParseExpr() simplified by taking advantage. [Bug 1547786]
author: dgp <dgp@users.sourceforge.net> 2006-08-28 16:05:31 (GMT)
committer: dgp <dgp@users.sourceforge.net> 2006-08-28 16:05:31 (GMT)
commit: 9571f2381728758877fe7ef50963e3e373aad92f (patch)
tree: d56b5c209ddc88aa2b96deabf31937377f8a7010 /generic
parent: 484ddcb6a498fab5690d6e8d58fb6b75f16195c7 (diff)
download: tcl-9571f2381728758877fe7ef50963e3e373aad92f.zip
tcl-9571f2381728758877fe7ef50963e3e373aad92f.tar.gz
tcl-9571f2381728758877fe7ef50963e3e373aad92f.tar.bz2
2 files changed, 35 insertions, 25 deletions
diff --git a/generic/tclParseExpr.c b/generic/tclParseExpr.c
index 9560630..4a2ab8d 100644
--- a/generic/tclParseExpr.c
+++ b/generic/tclParseExpr.c
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclParseExpr.c,v 1.39 2006/08/23 21:31:55 dgp Exp $
+ * RCS: @(#) $Id: tclParseExpr.c,v 1.40 2006/08/28 16:05:32 dgp Exp $
  */
 
 #define OLD_EXPR_PARSER 0
@@ -2611,18 +2611,15 @@ Tcl_ParseExpr(
 	    TclObjPrintf(NULL, msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"",
 		    ((start - limit) < scratch.string) ? "" : "...",
 		    ((start - limit) < scratch.string)
-		    ? (start - scratch.string)
-		    : (start - Tcl_UtfPrev(start+1-limit+3, scratch.string)),
+		    ? (start - scratch.string) : limit - 3,
 		    ((start - limit) < scratch.string) 
-		    ? scratch.string
-		    : Tcl_UtfPrev(start+1-limit+3, scratch.string),
+		    ? scratch.string : start - limit + 3,
 		    (scanned < limit) ? scanned : limit - 3, start,
 		    (scanned < limit) ? "" : "...",
 		    insertMark ? mark : "",
 		    (start + scanned + limit > scratch.end)
-		    ? scratch.end - (start + scanned)
-		    : Tcl_UtfPrev(start+scanned+limit-3+1, start+scanned)
-		    - (start + scanned), start + scanned,
+		    ? scratch.end - (start + scanned) : limit-3, 
+		    start + scanned,
 		    (start + scanned + limit > scratch.end) ? "" : "..."
 		    );
 	    if (post != NULL) {
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index c0736f3..40ec1bf 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -33,7 +33,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStringObj.c,v 1.56 2006/08/28 14:13:22 dgp Exp $ */
+ * RCS: @(#) $Id: tclStringObj.c,v 1.57 2006/08/28 16:05:32 dgp Exp $ */
 
 #include "tclInt.h"
 #include "tommath.h"
@@ -2392,7 +2392,7 @@ ObjPrintfVA(
     Tcl_IncrRefCount(list);
     while (*p != '\0') {
 	int size = 0, seekingConversion = 1, gotPrecision = 0;
-	int lastNum = -1, numBytes = -1;
+	int lastNum = -1;
 
 	if (*p++ != '%') {
 	    continue;
@@ -2408,27 +2408,40 @@ ObjPrintfVA(
 		seekingConversion = 0;
 		break;
 	    case 's': {
-		char *bytes = va_arg(argList, char *);
+		CONST char *q, *end, *bytes = va_arg(argList, char *);
 		seekingConversion = 0;
-		if (gotPrecision) {
-		    char *end = bytes + lastNum;
-		    char *q = bytes;
-		    while ((q < end) && (*q != '\0')) {
-			q++;
-		    }
-		    numBytes = (int)(q - bytes);
+
+		/*
+		 * The buffer to copy characters from starts at bytes
+		 * and ends at either the first NUL byte, or after
+		 * lastNum bytes, when caller has indicated a limit.  
+		 */
+
+		end = bytes;
+		while ((!gotPrecision || lastNum--) && (*end != '\0')) {
+		    end++;
 		}
-		Tcl_ListObjAppendElement(NULL, list,
-			Tcl_NewStringObj(bytes , numBytes));
 
 		/*
-		 * We took no more than numBytes bytes from the (char *). In
-		 * turn, [format] will take no more than numBytes characters
-		 * from the Tcl_Obj. Since numBytes characters must be no less
-		 * than numBytes bytes, the character limit will have no
-		 * effect and we can just pass it through.
+		 * Within that buffer, we trim both ends if needed so that
+		 * we copy only whole characters, and avoid copying any
+		 * partial multi-byte characters.
 		 */
 
+		q = Tcl_UtfPrev(end, bytes);
+		if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
+		    end = q;
+		}
+
+		q = bytes + TCL_UTF_MAX;
+		while ((bytes < end) && (bytes < q)
+			&& ((*bytes & 0xC0) == 0x80)) {
+		    bytes++;
+		}
+
+		Tcl_ListObjAppendElement(NULL, list,
+			Tcl_NewStringObj(bytes , (int)(end - bytes)));
+
 		break;
 	    }
 	    case 'c':
author	dgp <dgp@users.sourceforge.net>	2006-08-28 16:05:31 (GMT)
committer	dgp <dgp@users.sourceforge.net>	2006-08-28 16:05:31 (GMT)
commit	9571f2381728758877fe7ef50963e3e373aad92f (patch)
tree	d56b5c209ddc88aa2b96deabf31937377f8a7010 /generic
parent	484ddcb6a498fab5690d6e8d58fb6b75f16195c7 (diff)
download	tcl-9571f2381728758877fe7ef50963e3e373aad92f.zip tcl-9571f2381728758877fe7ef50963e3e373aad92f.tar.gz tcl-9571f2381728758877fe7ef50963e3e373aad92f.tar.bz2