From 9571f2381728758877fe7ef50963e3e373aad92f Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 28 Aug 2006 16:05:31 +0000 Subject: * generic/tclStringObj.c: Revised ObjPrintfVA to take care * generic/tclParseExpr.c: to copy only whole characters when doing %s formatting. This relieves callers of TclObjPrintf() and TclFormatToErrorInfo() from needing to fix arguments to character boundaries. Tcl_ParseExpr() simplified by taking advantage. [Bug 1547786] --- ChangeLog | 7 +++++++ generic/tclParseExpr.c | 13 +++++-------- generic/tclStringObj.c | 47 ++++++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/ChangeLog b/ChangeLog index 456f661..425f857 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2006-08-28 Don Porter + * generic/tclStringObj.c: Revised ObjPrintfVA to take care + * generic/tclParseExpr.c: to copy only whole characters when + doing %s formatting. This relieves callers of TclObjPrintf() and + TclFormatToErrorInfo() from needing to fix arguments to character + boundaries. Tcl_ParseExpr() simplified by taking advantage. + [Bug 1547786] + * generic/tclStringObj.c: Corrected TclFormatObj's failure to count up the number of arguments required by examining the format string. [Bug 1547681] diff --git a/generic/tclParseExpr.c b/generic/tclParseExpr.c index 9560630..4a2ab8d 100644 --- a/generic/tclParseExpr.c +++ b/generic/tclParseExpr.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclParseExpr.c,v 1.39 2006/08/23 21:31:55 dgp Exp $ + * RCS: @(#) $Id: tclParseExpr.c,v 1.40 2006/08/28 16:05:32 dgp Exp $ */ #define OLD_EXPR_PARSER 0 @@ -2611,18 +2611,15 @@ Tcl_ParseExpr( TclObjPrintf(NULL, msg, "\nin expression \"%s%.*s%.*s%s%s%.*s%s\"", ((start - limit) < scratch.string) ? "" : "...", ((start - limit) < scratch.string) - ? (start - scratch.string) - : (start - Tcl_UtfPrev(start+1-limit+3, scratch.string)), + ? (start - scratch.string) : limit - 3, ((start - limit) < scratch.string) - ? scratch.string - : Tcl_UtfPrev(start+1-limit+3, scratch.string), + ? scratch.string : start - limit + 3, (scanned < limit) ? scanned : limit - 3, start, (scanned < limit) ? "" : "...", insertMark ? mark : "", (start + scanned + limit > scratch.end) - ? scratch.end - (start + scanned) - : Tcl_UtfPrev(start+scanned+limit-3+1, start+scanned) - - (start + scanned), start + scanned, + ? scratch.end - (start + scanned) : limit-3, + start + scanned, (start + scanned + limit > scratch.end) ? "" : "..." ); if (post != NULL) { diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c0736f3..40ec1bf 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -33,7 +33,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.56 2006/08/28 14:13:22 dgp Exp $ */ + * RCS: @(#) $Id: tclStringObj.c,v 1.57 2006/08/28 16:05:32 dgp Exp $ */ #include "tclInt.h" #include "tommath.h" @@ -2392,7 +2392,7 @@ ObjPrintfVA( Tcl_IncrRefCount(list); while (*p != '\0') { int size = 0, seekingConversion = 1, gotPrecision = 0; - int lastNum = -1, numBytes = -1; + int lastNum = -1; if (*p++ != '%') { continue; @@ -2408,27 +2408,40 @@ ObjPrintfVA( seekingConversion = 0; break; case 's': { - char *bytes = va_arg(argList, char *); + CONST char *q, *end, *bytes = va_arg(argList, char *); seekingConversion = 0; - if (gotPrecision) { - char *end = bytes + lastNum; - char *q = bytes; - while ((q < end) && (*q != '\0')) { - q++; - } - numBytes = (int)(q - bytes); + + /* + * The buffer to copy characters from starts at bytes + * and ends at either the first NUL byte, or after + * lastNum bytes, when caller has indicated a limit. + */ + + end = bytes; + while ((!gotPrecision || lastNum--) && (*end != '\0')) { + end++; } - Tcl_ListObjAppendElement(NULL, list, - Tcl_NewStringObj(bytes , numBytes)); /* - * We took no more than numBytes bytes from the (char *). In - * turn, [format] will take no more than numBytes characters - * from the Tcl_Obj. Since numBytes characters must be no less - * than numBytes bytes, the character limit will have no - * effect and we can just pass it through. + * Within that buffer, we trim both ends if needed so that + * we copy only whole characters, and avoid copying any + * partial multi-byte characters. */ + q = Tcl_UtfPrev(end, bytes); + if (!Tcl_UtfCharComplete(q, (int)(end - q))) { + end = q; + } + + q = bytes + TCL_UTF_MAX; + while ((bytes < end) && (bytes < q) + && ((*bytes & 0xC0) == 0x80)) { + bytes++; + } + + Tcl_ListObjAppendElement(NULL, list, + Tcl_NewStringObj(bytes , (int)(end - bytes))); + break; } case 'c': -- cgit v0.12