From 9d73340679f7c5a710e47d19fe4cdfc41461afe3 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 9 Sep 2005 15:44:26 +0000 Subject: * generic/tclInt.h: New internal routines TclFormatObj() * generic/tclStringObj.c: and TclAppendFormattedObjs() to offer sprintf()-like means to append to Tcl_Obj. Work in progress toward [RFE 572392]. * generic/tclCmdAH.c: Compiler directive NEW_FORMAT when #define'd directs the [format] command to be implemented in terms of the new TclAppendFormattedObjs() routine. --- ChangeLog | 11 + generic/tclCmdAH.c | 19 +- generic/tclInt.h | 6 +- generic/tclStringObj.c | 600 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 633 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e913820..12e6253 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2005-09-09 Don Porter + + * generic/tclInt.h: New internal routines TclFormatObj() + * generic/tclStringObj.c: and TclAppendFormattedObjs() to offer + sprintf()-like means to append to Tcl_Obj. Work in progress toward + [RFE 572392]. + + * generic/tclCmdAH.c: Compiler directive NEW_FORMAT when #define'd + directs the [format] command to be implemented in terms of the new + TclAppendFormattedObjs() routine. + 2005-09-08 Donal K. Fellows TIP#254 IMPLEMENTATION diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 9e9ab9c..9d84adf 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdAH.c,v 1.66 2005/08/26 13:26:55 dkf Exp $ + * RCS: @(#) $Id: tclCmdAH.c,v 1.67 2005/09/09 15:44:27 dgp Exp $ */ #include "tclInt.h" @@ -1900,6 +1900,7 @@ Tcl_FormatObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { +#ifndef NEW_FORMAT char *format; /* Used to read characters from the format * string. */ int formatLen; /* The length of the format string */ @@ -1932,7 +1933,9 @@ Tcl_FormatObjCmd(dummy, interp, objc, objv) #define WIDE_VALUE 5 #define MAX_FLOAT_SIZE 320 +#endif Tcl_Obj *resultPtr; /* Where result is stored finally. */ +#ifndef NEW_FORMAT char staticBuf[MAX_FLOAT_SIZE + 1]; /* A static buffer to copy the format results * into */ @@ -1974,12 +1977,25 @@ Tcl_FormatObjCmd(dummy, interp, objc, objv) * So, what happens here is to scan the format string one % group at a * time, making many individual calls to sprintf. */ +#endif if (objc < 2) { Tcl_WrongNumArgs(interp, 1, objv, "formatString ?arg arg ...?"); return TCL_ERROR; } +#ifdef NEW_FORMAT + resultPtr = Tcl_NewObj(); + Tcl_IncrRefCount(resultPtr); + if (TclAppendFormattedObjs(interp, resultPtr, Tcl_GetString(objv[1]), + objc-2, objv+2) != TCL_OK) { + Tcl_DecrRefCount(resultPtr); + return TCL_ERROR; + } + Tcl_SetObjResult(interp, resultPtr); + Tcl_DecrRefCount(resultPtr); + return TCL_OK; +#else format = Tcl_GetStringFromObj(objv[1], &formatLen); endPtr = format + formatLen; resultPtr = Tcl_NewObj(); @@ -2381,6 +2397,7 @@ Tcl_FormatObjCmd(dummy, interp, objc, objv) } Tcl_DecrRefCount(resultPtr); return TCL_ERROR; +#endif } /* diff --git a/generic/tclInt.h b/generic/tclInt.h index 4ec4f71..25b4b78 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.248 2005/09/06 14:40:11 dkf Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.249 2005/09/09 15:44:27 dgp Exp $ */ #ifndef _TCLINT @@ -1950,6 +1950,9 @@ MODULE_SCOPE char tclEmptyString; *---------------------------------------------------------------- */ +MODULE_SCOPE int TclAppendFormattedObjs(Tcl_Interp *interp, + Tcl_Obj *appendObj, CONST char *format, + int objc, Tcl_Obj *CONST objv[]); MODULE_SCOPE void TclAppendLimitedToObj(Tcl_Obj *objPtr, CONST char *bytes, int length, int limit, CONST char *ellipsis); @@ -1998,6 +2001,7 @@ MODULE_SCOPE void TclFinalizePreserve(void); MODULE_SCOPE void TclFinalizeSynchronization(void); MODULE_SCOPE void TclFinalizeThreadData(void); MODULE_SCOPE void TclFormatNaN(double value, char* buffer); +MODULE_SCOPE int TclFormatObj TCL_VARARGS(Tcl_Interp *, arg1); MODULE_SCOPE int TclFSFileAttrIndex(Tcl_Obj *pathPtr, CONST char *attributeName, int *indexPtr); MODULE_SCOPE Tcl_Obj * TclGetBgErrorHandler(Tcl_Interp *interp); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index b77620b..453069d 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -33,7 +33,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.38 2005/07/24 22:56:43 dkf Exp $ */ + * RCS: @(#) $Id: tclStringObj.c,v 1.39 2005/09/09 15:44:27 dgp Exp $ */ #include "tclInt.h" @@ -52,6 +52,8 @@ static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, CONST char *bytes, int numBytes)); static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr)); +static int FormatObjVA _ANSI_ARGS_((Tcl_Interp *interp, + va_list argList)); static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, Tcl_Obj *copyPtr)); @@ -1669,6 +1671,602 @@ Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1) } /* + *---------------------------------------------------------------------- + * + * TclAppendFormattedObjs -- + * + * This function appends a list of Tcl_Obj's to a Tcl_Obj according + * to the formatting instructions embedded in the format string. The + * formatting instructions are inspired by sprintf(). Returns TCL_OK + * when successful. If there's an error in the arguments, TCL_ERROR is + * returned, and an error message is written to the interp, if non-NULL. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclAppendFormattedObjs(interp, baseObj, format, objc, objv) + Tcl_Interp *interp; + Tcl_Obj *baseObj; + CONST char *format; + int objc; + Tcl_Obj *CONST objv[]; +{ + CONST char *span = format; + int numBytes = 0; + int objIndex = 0; + int gotXpg = 0, gotSequential = 0; + Tcl_Obj *appendObj = Tcl_NewObj(); + CONST char *msg; + CONST char *mixedXPG = "cannot mix \"%\" and \"%n$\" conversion specifiers"; + CONST char *badIndex[2] = { + "not enough arguments for all format specifiers", + "\"%n$\" argument index out of range" + }; + + if (Tcl_IsShared(baseObj)) { + Tcl_Panic("TclAppendFormattedObjs called with shared object"); + } + + Tcl_IncrRefCount(appendObj); + /* format string is NUL-terminated */ + while (*format != '\0') { + char *end; + int gotMinus, gotHash, gotZero, gotSpace, gotPlus, sawFlag; + int width, gotPrecision, precision, useShort, useWide; + int newXpg, numChars, allocSegment = 0; + Tcl_Obj *segment; + Tcl_UniChar ch; + int step = Tcl_UtfToUniChar(format, &ch); + + format += step; + if (ch != '%') { + numBytes += step; + continue; + } + if (numBytes) { + Tcl_AppendToObj(appendObj, span, numBytes); + numBytes = 0; + } + + /* Saw a % : process the format specifier */ + /* 0. %% : Escape format handling */ + + step = Tcl_UtfToUniChar(format, &ch); + if (ch == '%') { + span = format; + numBytes = step; + format += step; + continue; + } + + /* 1. XPG3 position specifier */ + + newXpg = 0; + if (isdigit(UCHAR(ch))) { + int position = strtoul(format, &end, 10); + if (*end == '$') { + newXpg = 1; + objIndex = position - 1; + format = end + 1; + step = Tcl_UtfToUniChar(format, &ch); + } + } + if (newXpg) { + if (gotSequential) { + msg = mixedXPG; + goto errorMsg; + } + gotXpg = 1; + } else { + if (gotXpg) { + msg = mixedXPG; + goto errorMsg; + } + gotSequential = 1; + } + if ((objIndex < 0) || (objIndex >= objc)) { + msg = badIndex[gotXpg]; + goto errorMsg; + } + + /* 2. Set of flags */ + + gotMinus = gotHash = gotZero = gotSpace = gotPlus = 0; + sawFlag = 1; + do { + switch (ch) { + case '-': + gotMinus = 1; + break; + case '#': + gotHash = 1; + break; + case '0': + gotZero = 1; + break; + case ' ': + gotSpace = 1; + break; + case '+': + gotPlus = 1; + break; + default: + sawFlag = 0; + } + if (sawFlag) { + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + } while (sawFlag); + + /* 3. Minimum field width */ + + width = 0; + if (isdigit(UCHAR(ch))) { + width = strtoul(format, &end, 10); + format = end; + step = Tcl_UtfToUniChar(format, &ch); + } else if (ch == '*') { + if (objIndex >= objc - 1) { + msg = badIndex[gotXpg]; + goto errorMsg; + } + if (Tcl_GetIntFromObj(interp, objv[objIndex], &width) != TCL_OK) { + goto error; + } + if (width < 0) { + width = -width; + gotMinus = 1; + } + objIndex++; + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + + /* 4. Precision */ + + gotPrecision = precision = 0; + if (ch == '.') { + gotPrecision = 1; + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + if (isdigit(UCHAR(ch))) { + precision = strtoul(format, &end, 10); + format = end; + step = Tcl_UtfToUniChar(format, &ch); + } else if (ch == '*') { + if (objIndex >= objc - 1) { + msg = badIndex[gotXpg]; + goto errorMsg; + } + if (Tcl_GetIntFromObj(interp, objv[objIndex], &precision) + != TCL_OK) { + goto error; + } + /* TODO: Check this truncation logic */ + if (precision < 0) { + precision = 0; + } + objIndex++; + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + + /* 5. Length modifier */ + + useShort = useWide = 0; + if (ch == 'h') { + useShort = 1; + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } else if (ch == 'l') { +#ifndef TCL_WIDE_INT_IS_LONG + useWide = 1; +#endif + format += step; + step = Tcl_UtfToUniChar(format, &ch); + } + + format += step; + span = format; + + /* 6. Conversion character */ + segment = objv[objIndex]; + if (ch == 'i') { + ch = 'd'; + } + switch (ch) { + case '\0': + msg = "format string ended in middle of field specifier"; + goto errorMsg; + case 's': { + numChars = Tcl_GetCharLength(segment); + if (gotPrecision && (precision < numChars)) { + segment = Tcl_GetRange(segment, 0, precision - 1); + Tcl_IncrRefCount(segment); + allocSegment = 1; + } + break; + } + case 'c': { + char buf[TCL_UTF_MAX]; + int code, length; + if (Tcl_GetIntFromObj(interp, segment, &code) != TCL_OK) { + goto error; + } + length = Tcl_UniCharToUtf(code, buf); + segment = Tcl_NewStringObj(buf, length); + Tcl_IncrRefCount(segment); + allocSegment = 1; + break; + } + + case 'u': + case 'd': + case 'o': + case 'x': + case 'X': { + short int s; + long l; + Tcl_WideInt w; + int isNegative = 0; + + if (Tcl_GetLongFromObj(NULL, segment, &l) != TCL_OK) { + if (Tcl_GetWideIntFromObj(interp, segment, &w) != TCL_OK) { + goto error; + } + l = Tcl_WideAsLong(w); + } else if (useWide) { + w = Tcl_LongAsWide(l); + } + + if (useShort) { + s = (short int) l; + isNegative = (s < (short int)0); + } else if (useWide) { + isNegative = (w < (Tcl_WideInt)0); + } else { + isNegative = (l < (long)0); + } + + segment = Tcl_NewObj(); + allocSegment = 1; + Tcl_IncrRefCount(segment); + + if (isNegative || gotPlus) { + if (ch == 'd') { + if (isNegative) { + Tcl_AppendToObj(segment, "-", 1); + } else { + Tcl_AppendToObj(segment, "+", 1); + } + } + } + + if (gotHash) { + switch (ch) { + case 'o': + Tcl_AppendToObj(segment, "0", 1); + precision--; + break; + case 'x': + case 'X': + Tcl_AppendToObj(segment, "0x", 2); + break; + } + } + + switch (ch) { + case 'd': { + int length; + Tcl_Obj *pure; + CONST char *bytes; + + if (isNegative) { + if (useShort) { + pure = Tcl_NewIntObj((int)(-s)); + } else if (useWide) { + pure = Tcl_NewWideIntObj(-w); + } else { + pure = Tcl_NewLongObj(-l); + } + } else { + if (useShort) { + pure = Tcl_NewIntObj((int)(s)); + } else if (useWide) { + pure = Tcl_NewWideIntObj(w); + } else { + pure = Tcl_NewLongObj(l); + } + } + Tcl_IncrRefCount(pure); + bytes = Tcl_GetStringFromObj(pure, &length); + /* Handle things like -INT_MIN == INT_MIN */ + if (*bytes == '-') { + length--; bytes++; + } + /* Canonical decimal string reps for integers are composed + * entirely of one-byte encoded characters, so "length" is + * the number of chars */ + if (gotPrecision) { + while (length < precision) { + Tcl_AppendToObj(segment, "0", 1); + length++; + } + gotZero = 0; + } + if (gotZero) { + length += Tcl_GetCharLength(segment); + while (length < width) { + Tcl_AppendToObj(segment, "0", 1); + length++; + } + } + Tcl_AppendToObj(segment, bytes, -1); + Tcl_DecrRefCount(pure); + break; + } + + case 'u': + case 'o': + case 'x': + case 'X': { + Tcl_WideUInt bits; + int length, numDigits = 0, base = 16; + Tcl_Obj *pure; + char *bytes; + + if (ch == 'u') { + base = 10; + } + if (ch == 'o') { + base = 8; + } + if (useShort) { + unsigned short int us = (unsigned short int) s; + bits = (Tcl_WideUInt) us; + while (us) { + numDigits++; + us /= base; + } + } else if (useWide) { + Tcl_WideUInt uw = (Tcl_WideUInt) w; + bits = uw; + while (uw) { + numDigits++; + uw /= base; + } + } else { + unsigned long int ul = (unsigned long int) l; + bits = (Tcl_WideUInt) ul; + while (ul) { + numDigits++; + ul /= base; + } + } + /* Need to be sure zero becomes "0", not "" */ + if ((numDigits == 0) && !((ch == 'o') && gotHash)) { + numDigits = 1; + } + pure = Tcl_NewObj(); + Tcl_SetObjLength(pure, numDigits); + bytes = Tcl_GetString(pure); + length = numDigits; + while (numDigits--) { + int digitOffset = bits % base; + if (digitOffset > 9) { + bytes[numDigits] = 'a' + digitOffset - 10; + } else { + bytes[numDigits] = '0' + digitOffset; + } + bits /= base; + } + if (gotPrecision) { + while (length < precision) { + Tcl_AppendToObj(segment, "0", 1); + length++; + } + gotZero = 0; + } + if (gotZero) { + length += Tcl_GetCharLength(segment); + while (length < width) { + Tcl_AppendToObj(segment, "0", 1); + length++; + } + } + Tcl_AppendObjToObj(segment, pure); + Tcl_DecrRefCount(pure); + break; + } + + } + break; + } + + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': { +#define MAX_FLOAT_SIZE 320 + char spec[2*TCL_INTEGER_SPACE + 9], *p = spec; + double d; + int length = MAX_FLOAT_SIZE; + char *bytes; + + if (Tcl_GetDoubleFromObj(interp, segment, &d) != TCL_OK) { + goto error; + } + *p++ = '%'; + if (gotMinus) { + *p++ = '-'; + } + if (gotHash) { + *p++ = '#'; + } + if (gotZero) { + *p++ = '0'; + } + if (gotSpace) { + *p++ = ' '; + } + if (gotPlus) { + *p++ = '+'; + } + if (width) { + p += sprintf(p, "%d", width); + } + if (gotPrecision) { + *p++ = '.'; + p += sprintf(p, "%d", precision); + length += precision; + } + /* Don't pass length modifiers ! */ + *p++ = ch; + *p = '\0'; + + segment = Tcl_NewObj(); + allocSegment = 1; + Tcl_SetObjLength(segment, length); + bytes = Tcl_GetString(segment); + Tcl_SetObjLength(segment, sprintf(bytes, spec, d)); + break; + } + default: { + char buf[40]; + sprintf(buf, "bad field specifier \"%c\"", ch); + msg = buf; + goto errorMsg; + } + } + + switch (ch) { + case 'E': + case 'G': + case 'X': { + Tcl_SetObjLength(segment, Tcl_UtfToUpper(Tcl_GetString(segment))); + } + } + + numChars = Tcl_GetCharLength(segment); + if (!gotMinus) { + while (numChars < width) { + Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); + numChars++; + } + } + Tcl_AppendObjToObj(appendObj, segment); + if (allocSegment) { + Tcl_DecrRefCount(segment); + } + while (numChars < width) { + Tcl_AppendToObj(appendObj, (gotZero ? "0" : " "), 1); + numChars++; + } + + objIndex += gotSequential; + } + if (numBytes) { + Tcl_AppendToObj(appendObj, span, numBytes); + numBytes = 0; + } + + Tcl_AppendObjToObj(baseObj, appendObj); + Tcl_DecrRefCount(appendObj); + return TCL_OK; + + errorMsg: + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); + } + error: + Tcl_DecrRefCount(appendObj); + return TCL_ERROR; +} + +/* + *--------------------------------------------------------------------------- + * + * FormatObjVA -- + * + * Populate the Unicode internal rep with the Unicode form of its string + * rep. The object must alread have a "String" internal rep. + * + * Results: + * None. + * + * Side effects: + * Reallocates the String internal rep. + * + *--------------------------------------------------------------------------- + */ + +static int +FormatObjVA(interp, argList) + Tcl_Interp *interp; + va_list argList; +{ + int code, objc; + Tcl_Obj **objv, *element, *list = Tcl_NewObj(); + CONST char *format; + Tcl_Obj *objPtr = va_arg(argList, Tcl_Obj *); + + if (objPtr == NULL) { + Tcl_Panic("TclFormatObj: no Tcl_Obj to append to"); + } + + format = va_arg(argList, CONST char *); + if (format == NULL) { + Tcl_Panic("TclFormatObj: no format string argument"); + } + + Tcl_IncrRefCount(list); + element = va_arg(argList, Tcl_Obj *); + while (element != NULL) { + Tcl_ListObjAppendElement(NULL, list, element); + element = va_arg(argList, Tcl_Obj *); + } + Tcl_ListObjGetElements(NULL, list, &objc, &objv); + code = TclAppendFormattedObjs(interp, objPtr, format, objc, objv); + Tcl_DecrRefCount(list); + return code; +} + +/* + *--------------------------------------------------------------------------- + * + * TclFormatObj -- + * + * Results: + * A standard Tcl result. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +int +TclFormatObj TCL_VARARGS_DEF(Tcl_Interp *,arg1) +{ + va_list argList; + int result; + Tcl_Interp *interp = TCL_VARARGS_START(Tcl_Interp *,arg1,argList); + result = FormatObjVA(interp, argList); + va_end(argList); + return result; +} + +/* *--------------------------------------------------------------------------- * * FillUnicodeRep -- -- cgit v0.12