summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorstanton <stanton>1998-11-16 20:45:22 (GMT)
committerstanton <stanton>1998-11-16 20:45:22 (GMT)
commit9146dba483fef4e11e316b7f6503565b7c656ce4 (patch)
tree7753519b63c4a9c573278d25377599c358ab160d
parentc0fd219230e4295c44743ad165a0d5a1c980b7dc (diff)
downloadtcl-9146dba483fef4e11e316b7f6503565b7c656ce4.zip
tcl-9146dba483fef4e11e316b7f6503565b7c656ce4.tar.gz
tcl-9146dba483fef4e11e316b7f6503565b7c656ce4.tar.bz2
moved scan implementation into tclScan.c
changed scan to support unicode
-rw-r--r--generic/tclCmdMZ.c382
-rw-r--r--generic/tclScan.c1008
-rw-r--r--tests/scan.test64
-rw-r--r--win/makefile.vc3
4 files changed, 1076 insertions, 381 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 1671fef..7fa09d6 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.6 1998/11/11 04:54:08 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.7 1998/11/16 20:45:22 stanton Exp $
*/
#include "tclInt.h"
@@ -21,6 +21,22 @@
#include "tclRegexp.h"
/*
+ * Flag values used by Tcl_ScanObjCmd.
+ */
+
+#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
+#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
+#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
+#define SCAN_WIDTH 0x8 /* A width value was supplied. */
+
+#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
+#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
+#define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
+#define SCAN_XOK 0x80 /* An 'x' is allowed. */
+#define SCAN_PTOK 0x100 /* Decimal point is allowed. */
+#define SCAN_EXPOK 0x200 /* An exponent is allowed. */
+
+/*
* Structure used to hold information about variable traces:
*/
@@ -633,370 +649,6 @@ Tcl_ReturnObjCmd(dummy, interp, objc, objv)
/*
*----------------------------------------------------------------------
*
- * Tcl_ScanObjCmd --
- *
- * This procedure is invoked to process the "scan" Tcl command.
- * See the user documentation for details on what it does.
- *
- * Results:
- * A standard Tcl result.
- *
- * Side effects:
- * See the user documentation.
- *
- *----------------------------------------------------------------------
- */
-
- /* ARGSUSED */
-int
-Tcl_ScanObjCmd(dummy, interp, objc, objv)
- ClientData dummy; /* Not used. */
- Tcl_Interp *interp; /* Current interpreter. */
- int objc; /* Number of arguments. */
- Tcl_Obj *CONST objv[]; /* Argument objects. */
-{
-# define MAX_FIELDS 20
- typedef struct {
- char fmt; /* Format for field. */
- int size; /* How many bytes to allow for
- * field. */
- char *location; /* Where field will be stored. */
- } Field;
- Field fields[MAX_FIELDS]; /* Info about all the fields in the
- * format string. */
- register Field *curField;
- int numFields = 0; /* Number of fields actually
- * specified. */
- int suppress; /* Current field is assignment-
- * suppressed. */
- int totalSize = 0; /* Number of bytes needed to store
- * all results combined. */
- char *results = NULL; /* Where scanned output goes.
- * Malloced; NULL means not allocated
- * yet. */
- Tcl_Obj *varPtr = NULL; /* The vars set by sscanf converted
- * to Tcl_Objects. Initialized to
- * avoid compiler warning. */
- int numScanned; /* sscanf's result. */
- register char *fmt; /* The format specifiers */
- char *src; /* The input to be parsed */
- int i, widthSpecified, fmtLen, srcLen, code, value;
- char unsignedStr[40];
- Tcl_DString srcBuf, fmtBuf;
- Tcl_Encoding encoding;
-
- /*
- * The variables below are used to hold a copy of the format
- * string, so that we can replace format specifiers like "%f"
- * and "%F" with specifiers like "%lf"
- */
-
-# define STATIC_SIZE 5
- char copyBuf[STATIC_SIZE], *fmtCopy;
- register char *dst;
-
- if (objc < 3) {
- Tcl_WrongNumArgs(interp, 1, objv,
- "string format ?varName varName ...?");
- return TCL_ERROR;
- }
-
- encoding = Tcl_GetEncoding(interp, "iso8859-1");
-
- /*
- * This procedure operates in four stages:
- * 1. Scan the format string, collecting information about each field.
- * 2. Allocate an array to hold all of the scanned fields.
- * 3. Call sscanf to do all the dirty work, and have it store the
- * parsed fields in the array.
- * 4. Pick off the fields from the array and assign them to variables.
- */
-
- /*
- * INTL: ISO only.
- *
- * Convert the source and format strings from utf to iso8859-1 so
- * sscanf will work correctly.
- */
-
- code = TCL_OK;
- Tcl_UtfToExternalDString(encoding, Tcl_GetString(objv[1]), -1, &srcBuf);
- Tcl_UtfToExternalDString(encoding, Tcl_GetString(objv[2]), -1, &fmtBuf);
- src = Tcl_DStringValue(&srcBuf);
- srcLen = Tcl_DStringLength(&srcBuf) + 1;
- fmt = Tcl_DStringValue(&fmtBuf);
- fmtLen = (Tcl_DStringLength(&fmtBuf) * 2) + 1;
-
- if (fmtLen < STATIC_SIZE) {
- fmtCopy = copyBuf;
- } else {
- fmtCopy = (char *) ckalloc((unsigned) fmtLen);
- }
- dst = fmtCopy;
- for ( ; *fmt != 0; fmt++) {
- *dst = *fmt;
- dst++;
- if (*fmt != '%') {
- continue;
- }
- fmt++;
- if (*fmt == '%') {
- *dst = *fmt;
- dst++;
- continue;
- }
- if (*fmt == '*') {
- suppress = 1;
- *dst = *fmt;
- dst++;
- fmt++;
- } else {
- suppress = 0;
- }
- widthSpecified = 0;
- while (isdigit(UCHAR(*fmt))) { /* INTL: digit */
- widthSpecified = 1;
- *dst = *fmt;
- dst++;
- fmt++;
- }
- if ((*fmt == 'l') || (*fmt == 'h') || (*fmt == 'L')) {
- fmt++;
- }
- *dst = *fmt;
- dst++;
- if (suppress) {
- continue;
- }
- if (numFields == MAX_FIELDS) {
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "too many fields to scan", (char *) NULL);
- code = TCL_ERROR;
- goto done;
- }
- curField = &fields[numFields];
- numFields++;
- switch (*fmt) {
- case 'd':
- case 'i':
- case 'o':
- case 'x':
- curField->fmt = 'd';
- curField->size = sizeof(int);
- break;
-
- case 'u':
- curField->fmt = 'u';
- curField->size = sizeof(int);
- break;
-
- case 's':
- curField->fmt = 's';
- curField->size = srcLen;
- break;
-
- case 'c':
- if (widthSpecified) {
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "field width may not be specified in %c conversion"
- , (char *) NULL);
- code = TCL_ERROR;
- goto done;
- }
- curField->fmt = 'c';
- curField->size = sizeof(int);
- break;
-
- case 'e':
- case 'f':
- case 'g':
- dst[-1] = 'l';
- dst[0] = 'f';
- dst++;
- curField->fmt = 'f';
- curField->size = sizeof(double);
- break;
-
- case '[':
- curField->fmt = 's';
- curField->size = srcLen;
- do {
- fmt++;
- if (*fmt == 0) {
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "unmatched [ in format string", (char *) NULL);
- code = TCL_ERROR;
- goto done;
- }
- *dst = *fmt;
- dst++;
- } while (*fmt != ']');
- break;
-
- default:
- {
- char buf[50];
-
- sprintf(buf, "bad scan conversion character \"%c\"",*fmt);
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), buf,
- (char *) NULL);
- code = TCL_ERROR;
- goto done;
- }
- }
- curField->size = TCL_ALIGN(curField->size);
- totalSize += curField->size;
- }
- *dst = 0;
-
- if (numFields != (objc - 3)) {
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "different numbers of variable names and field specifiers",
- (char *) NULL);
- code = TCL_ERROR;
- goto done;
- }
-
- /*
- * Step 2:
- */
-
- results = (char *) ckalloc((unsigned) totalSize);
- for (i = 0, totalSize = 0, curField = fields;
- i < numFields; i++, curField++) {
- curField->location = results + totalSize;
- totalSize += curField->size;
- }
-
- /*
- * Fill in the remaining fields with NULL; the only purpose of
- * this is to keep some memory analyzers, like Purify, from
- * complaining.
- */
-
- for ( ; i < MAX_FIELDS; i++, curField++) {
- curField->location = NULL;
- }
-
- /*
- * Step 3:
- */
-
- numScanned = sscanf(src, fmtCopy,
- fields[0].location, fields[1].location, fields[2].location,
- fields[3].location, fields[4].location, fields[5].location,
- fields[6].location, fields[7].location, fields[8].location,
- fields[9].location, fields[10].location, fields[11].location,
- fields[12].location, fields[13].location, fields[14].location,
- fields[15].location, fields[16].location, fields[17].location,
- fields[18].location, fields[19].location);
-
- /*
- * Step 4:
- */
-
- if (numScanned < numFields) {
- numFields = numScanned;
- }
- for (i = 0, curField = fields; i < numFields; i++, curField++) {
- switch (curField->fmt) {
- case 'd':
- varPtr = Tcl_NewIntObj(*((int *)curField->location));
- break;
-
- case 'u':
- /*
- * If value is < 0 then it cannot be stored in a Tcl
- * Integer. Store the unsigned value as a string.
- */
-
- value = (*((int *)curField->location));
- if (value < 0) {
- /*
- * Note: The curField->size was an upper limit to the
- * size of the string. The correct size needs to be
- * re-calculated.
- */
-
- /*
- * INTL: ISO only
- *
- * Convert the scanned string from iso8859-1 to utf.
- */
-
- sprintf(unsignedStr, "%u", value);
- Tcl_DStringSetLength(&srcBuf, 0);
- Tcl_ExternalToUtfDString(encoding, unsignedStr, -1,
- &srcBuf);
- varPtr = Tcl_NewStringObj(Tcl_DStringValue(&srcBuf), -1);
- } else {
- varPtr = Tcl_NewIntObj(value);
- }
- break;
-
- case 'c':
- varPtr = Tcl_NewIntObj(*((unsigned char *)curField->location));
- break;
-
- case 's':
- /*
- * Note: The curField->size was an upper limit to the size of
- * the string. The correct size needs to be re-calculated.
- */
-
- /*
- * INTL: ISO only
- *
- * Convert the scanned string from iso8859-1 to utf.
- */
-
- Tcl_DStringSetLength(&srcBuf, 0);
- Tcl_ExternalToUtfDString(encoding, curField->location, -1,
- &srcBuf);
- varPtr = Tcl_NewStringObj(Tcl_DStringValue(&srcBuf), -1);
- break;
-
- case 'f':
- varPtr = Tcl_NewDoubleObj(*((double *)curField->location));
- break;
-
- default:
- panic("Tcl_ScanObjCmd: unexpected curField->fmt '%c'",
- curField->fmt);
- /*
- * Never reached but tell smart compilers that varPtr
- * can't be used unitialized.
- */
- code = TCL_ERROR;
- goto done;
- }
- if (Tcl_SetObjVar2(interp,
- Tcl_GetString(objv[i+3]), NULL, varPtr, 0) == NULL) {
- Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
- "couldn't set variable \"",
- Tcl_GetString(objv[(i + 3)]), "\"", (char *) NULL);
- code = TCL_ERROR;
- Tcl_DecrRefCount(varPtr);
- goto done;
- }
- }
- Tcl_SetIntObj(Tcl_GetObjResult(interp), numScanned);
-
- done:
- if (results != NULL) {
- ckfree(results);
- }
- if (fmtCopy != copyBuf) {
- ckfree(fmtCopy);
- }
- Tcl_DStringFree(&srcBuf);
- Tcl_DStringFree(&fmtBuf);
- return code;
-}
-
-/*
- *----------------------------------------------------------------------
- *
* Tcl_SourceObjCmd --
*
* This procedure is invoked to process the "source" Tcl command.
diff --git a/generic/tclScan.c b/generic/tclScan.c
new file mode 100644
index 0000000..1b2fb20
--- /dev/null
+++ b/generic/tclScan.c
@@ -0,0 +1,1008 @@
+/*
+ * tclScan.c --
+ *
+ * This file contains the implementation of the "scan" command.
+ *
+ * Copyright (c) 1998 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * RCS: @(#) $Id: tclScan.c,v 1.1.2.1 1998/11/16 20:45:22 stanton Exp $
+ */
+
+#include "tclInt.h"
+
+/*
+ * Flag values used by Tcl_ScanObjCmd.
+ */
+
+#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
+#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
+#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
+#define SCAN_WIDTH 0x8 /* A width value was supplied. */
+
+#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
+#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
+#define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
+#define SCAN_XOK 0x80 /* An 'x' is allowed. */
+#define SCAN_PTOK 0x100 /* Decimal point is allowed. */
+#define SCAN_EXPOK 0x200 /* An exponent is allowed. */
+
+
+/*
+ * The following structure contains the information associated with
+ * a character set.
+ */
+
+typedef struct CharSet {
+ int exclude; /* 1 if this is an exclusion set. */
+ int nchars;
+ Tcl_UniChar *chars;
+ int nranges;
+ struct Range {
+ Tcl_UniChar start;
+ Tcl_UniChar end;
+ } *ranges;
+} CharSet;
+
+/*
+ * Declarations for functions used only in this file.
+ */
+
+static int CharInSet _ANSI_ARGS_((CharSet *cset, Tcl_UniChar ch));
+static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
+static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
+ int numVars));
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * BuildCharSet --
+ *
+ * This function examines a character set format specification
+ * and builds a CharSet containing the individual characters and
+ * character ranges specified.
+ *
+ * Results:
+ * Returns the next format position.
+ *
+ * Side effects:
+ * Initializes the charset.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static char *
+BuildCharSet(cset, format)
+ CharSet *cset;
+ char *format; /* Points to first char of set. */
+{
+ Tcl_UniChar ch, start;
+ int offset, nranges;
+ char *end;
+
+ memset(cset, 0, sizeof(CharSet));
+
+ offset = Tcl_UtfToUniChar(format, &ch);
+ if (ch == '^') {
+ cset->exclude = 1;
+ format += offset;
+ offset = Tcl_UtfToUniChar(format, &ch);
+ }
+ end = format + offset;
+
+ /*
+ * Find the close bracket so we can overallocate the set.
+ */
+
+ if (ch == ']') {
+ end += Tcl_UtfToUniChar(end, &ch);
+ }
+ nranges = 0;
+ while (ch != ']') {
+ if (ch == '-') {
+ nranges++;
+ }
+ end += Tcl_UtfToUniChar(end, &ch);
+ }
+
+ cset->chars = ckalloc(sizeof(Tcl_UniChar) * (end - format - 1));
+ if (nranges > 0) {
+ cset->ranges = ckalloc(sizeof(struct Range)*nranges);
+ } else {
+ cset->ranges = NULL;
+ }
+
+ /*
+ * Now build the character set.
+ */
+
+ cset->nchars = cset->nranges = 0;
+ format += Tcl_UtfToUniChar(format, &ch);
+ start = ch;
+ if (ch == ']' || ch == '-') {
+ cset->chars[cset->nchars++] = ch;
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+ while (ch != ']') {
+ if (*format == '-') {
+ /*
+ * This may be the first character of a range, so don't add
+ * it yet.
+ */
+
+ start = ch;
+ } else if (ch == '-') {
+ /*
+ * Check to see if this is the last character in the set, in which
+ * case it is not a range and we should add the previous character
+ * as well as the dash.
+ */
+
+ if (*format == ']') {
+ cset->chars[cset->nchars++] = start;
+ cset->chars[cset->nchars++] = ch;
+ } else {
+ format += Tcl_UtfToUniChar(format, &ch);
+ cset->ranges[cset->nranges].start = start;
+ cset->ranges[cset->nranges].end = ch;
+ cset->nranges++;
+ }
+ } else {
+ cset->chars[cset->nchars++] = ch;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+ return format;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * CharInSet --
+ *
+ * Check to see if a character matches the given set.
+ *
+ * Results:
+ * Returns non-zero if the character matches the given set.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+CharInSet(cset, ch)
+ CharSet *cset;
+ Tcl_UniChar ch;
+{
+ int i, match = 0;
+ for (i = 0; i < cset->nchars; i++) {
+ if (cset->chars[i] == ch) {
+ match = 1;
+ break;
+ }
+ }
+ if (!match) {
+ for (i = 0; i < cset->nranges; i++) {
+ if ((cset->ranges[i].start <= ch)
+ || (ch <= cset->ranges[i].end)) {
+ match = 1;
+ break;
+ }
+ }
+ }
+ return (cset->exclude ? !match : match);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ReleaseCharSet --
+ *
+ * Free the storage associated with a character set.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+ReleaseCharSet(cset)
+ CharSet *cset;
+{
+ ckfree((char *)cset->chars);
+ if (cset->ranges) {
+ ckfree((char *)cset->ranges);
+ }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ValidateFormat --
+ *
+ * Parse the format string and verify that it is properly formed
+ * and that there are exactly enough variables on the command line.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * May place an error in the interpreter result.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+ValidateFormat(interp, format, numVars)
+ Tcl_Interp *interp; /* Current interpreter. */
+ char *format; /* The format string. */
+ int numVars; /* The number of variables passed to the
+ * scan command. */
+{
+ int gotXpg, gotSequential, value, i, flags;
+ char *end;
+ Tcl_UniChar ch;
+ int *nassign = (int*)ckalloc(sizeof(int) * numVars);
+ int code = TCL_OK;
+ int objIndex;
+
+ /*
+ * Initialize an array that records the number of times a variable
+ * is assigned to by the format string. We use this to detect if
+ * a variable is multiply assigned or left unassigned.
+ */
+
+ for (i = 0; i < numVars; i++) {
+ nassign[i] = 0;
+ }
+
+ objIndex = gotXpg = gotSequential = 0;
+
+ while (*format != '\0') {
+ format += Tcl_UtfToUniChar(format, &ch);
+
+ flags = 0;
+
+ if (ch != '%') {
+ continue;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ if (ch == '%') {
+ continue;
+ }
+ if (ch == '*') {
+ flags |= SCAN_SUPPRESS;
+ format += Tcl_UtfToUniChar(format, &ch);
+ goto xpgCheckDone;
+ }
+
+ if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
+ /*
+ * Check for an XPG3-style %n$ specification. Note: there
+ * must not be a mixture of XPG3 specs and non-XPG3 specs
+ * in the same format string.
+ */
+
+ value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
+ if (*end != '$') {
+ goto notXpg;
+ }
+ format = end+1;
+ format += Tcl_UtfToUniChar(format, &ch);
+ gotXpg = 1;
+ if (gotSequential) {
+ goto mixedXPG;
+ }
+ objIndex = value - 1;
+ if ((objIndex < 0) || (objIndex >= numVars)) {
+ goto badIndex;
+ }
+ goto xpgCheckDone;
+ }
+
+ notXpg:
+ gotSequential = 1;
+ if (gotXpg) {
+ mixedXPG:
+ Tcl_SetResult(interp,
+ "cannot mix \"%\" and \"%n$\" conversion specifiers",
+ TCL_STATIC);
+ goto error;
+ }
+
+ xpgCheckDone:
+ /*
+ * Parse any width specifier.
+ */
+
+ if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
+ value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
+ flags |= SCAN_WIDTH;
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+
+ /*
+ * Ignore size specifier.
+ */
+
+ if ((ch == 'l') || (ch == 'L') || (ch == 'h')) {
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+
+ if (!(flags & SCAN_SUPPRESS) && objIndex >= numVars) {
+ goto badIndex;
+ }
+
+ /*
+ * Handle the various field types.
+ */
+
+ switch (ch) {
+ case 'n':
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'x':
+ case 'u':
+ case 'f':
+ case 'e':
+ case 'g':
+ case 's':
+ break;
+ case 'c':
+ if (flags & SCAN_WIDTH) {
+ Tcl_SetResult(interp, "field width may not be specified in %c conversion", TCL_STATIC);
+ goto error;
+ }
+ break;
+ case '[':
+ if (*format == '\0') {
+ goto badSet;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ if (ch == '^') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+ if (ch == ']') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+ while (ch != ']') {
+ if (*format == '\0') {
+ goto badSet;
+ }
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+ break;
+ badSet:
+ Tcl_SetResult(interp, "unmatched [ in format string",
+ TCL_STATIC);
+ goto error;
+ default:
+ {
+ char buf[TCL_UTF_MAX+1];
+
+ buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
+ Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
+ "bad scan conversion character \"", buf, "\"", NULL);
+ goto error;
+ }
+ }
+ if (!(flags & SCAN_SUPPRESS)) {
+ nassign[objIndex]++;
+ objIndex++;
+ }
+ }
+
+ /*
+ * Verify that all of the variable were assigned exactly once.
+ */
+
+ for (i = 0; i < numVars; i++) {
+ if (nassign[i] > 1) {
+ Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
+ goto error;
+ } else if (nassign[i] == 0) {
+ Tcl_SetResult(interp, "variable is not assigend by any conversion specifiers", TCL_STATIC);
+ goto error;
+ }
+ }
+
+ ckfree((char *)nassign);
+ return TCL_OK;
+
+ badIndex:
+ if (gotXpg) {
+ Tcl_SetResult(interp, "\"%n$\" argument index out of range",
+ TCL_STATIC);
+ } else {
+ Tcl_SetResult(interp,
+ "different numbers of variable names and field specifiers",
+ TCL_STATIC);
+ }
+
+ error:
+ ckfree((char *)nassign);
+ return TCL_ERROR;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_ScanObjCmd --
+ *
+ * This procedure is invoked to process the "scan" Tcl command.
+ * See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+ /* ARGSUSED */
+int
+Tcl_ScanObjCmd(dummy, interp, objc, objv)
+ ClientData dummy; /* Not used. */
+ Tcl_Interp *interp; /* Current interpreter. */
+ int objc; /* Number of arguments. */
+ Tcl_Obj *CONST objv[]; /* Argument objects. */
+{
+ char *format;
+ int numVars;
+ int objIndex, offset, i, value, result, code;
+ char *string, *end, *baseString;
+ char op;
+ int base;
+ int underflow = 0;
+ size_t width;
+ long (*fn)();
+ Tcl_UniChar ch, sch;
+ Tcl_Obj **objs, *objPtr;
+ int flags;
+ char buf[513]; /* Temporary buffer to hold scanned
+ * number strings before they are
+ * passed to strtoul. */
+
+ if (objc < 3) {
+ Tcl_WrongNumArgs(interp, 1, objv,
+ "string format ?varName varName ...?");
+ return TCL_ERROR;
+ }
+
+ format = Tcl_GetStringFromObj(objv[2], NULL);
+ numVars = objc-3;
+
+ /*
+ * Check for errors in the format string.
+ */
+
+ if (ValidateFormat(interp, format, numVars) == TCL_ERROR) {
+ return TCL_ERROR;
+ }
+
+ /*
+ * Allocate space for the result objects.
+ */
+
+ objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * numVars);
+ for (i = 0; i < numVars; i++) {
+ objs[i] = NULL;
+ }
+
+ string = Tcl_GetStringFromObj(objv[1], NULL);
+ baseString = string;
+
+ /*
+ * Iterate over the format string filling in the result objects until
+ * we reach the end of input, the end of the format string, or there
+ * is a mismatch.
+ */
+
+ objIndex = 0;
+ while (*format != '\0') {
+ format += Tcl_UtfToUniChar(format, &ch);
+
+ flags = 0;
+
+ /*
+ * If we see whitespace in the format, skip whitespace in the string.
+ */
+
+ if (TclUniCharIsSpace(ch)) {
+ offset = Tcl_UtfToUniChar(string, &sch);
+ while (TclUniCharIsSpace(sch)) {
+ if (*string == '\0') {
+ goto done;
+ }
+ string += offset;
+ offset = Tcl_UtfToUniChar(string, &sch);
+ }
+ continue;
+ }
+
+ if (ch != '%') {
+ literal:
+ if (*string == '\0') {
+ underflow = 1;
+ goto done;
+ }
+ string += Tcl_UtfToUniChar(string, &sch);
+ if (ch != sch) {
+ goto done;
+ }
+ continue;
+ }
+
+ format += Tcl_UtfToUniChar(format, &ch);
+ if (ch == '%') {
+ goto literal;
+ }
+
+ /*
+ * Check for assignment suppression ('*') or an XPG3-style
+ * assignment ('%n$').
+ */
+
+ if (ch == '*') {
+ flags |= SCAN_SUPPRESS;
+ format += Tcl_UtfToUniChar(format, &ch);
+ } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
+ value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
+ if (*end == '$') {
+ format = end+1;
+ format += Tcl_UtfToUniChar(format, &ch);
+ objIndex = value - 1;
+ }
+ }
+
+ /*
+ * Parse any width specifier.
+ */
+
+ if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
+ width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
+ format += Tcl_UtfToUniChar(format, &ch);
+ } else {
+ width = 0;
+ }
+
+ /*
+ * Ignore size specifier.
+ */
+
+ if ((ch == 'l') || (ch == 'L') || (ch == 'h')) {
+ format += Tcl_UtfToUniChar(format, &ch);
+ }
+
+ /*
+ * Handle the various field types.
+ */
+
+ switch (ch) {
+ case 'n':
+ if (!(flags & SCAN_SUPPRESS)) {
+ objPtr = Tcl_NewIntObj(string - baseString);
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+ continue;
+
+ case 'd':
+ op = 'i';
+ base = 10;
+ fn = (long (*)())strtol;
+ break;
+ case 'i':
+ op = 'i';
+ base = 0;
+ fn = (long (*)())strtol;
+ break;
+ case 'o':
+ op = 'i';
+ base = 8;
+ fn = (long (*)())strtol;
+ break;
+ case 'x':
+ op = 'i';
+ base = 16;
+ fn = (long (*)())strtol;
+ break;
+ case 'u':
+ op = 'i';
+ base = 10;
+ flags |= SCAN_UNSIGNED;
+ fn = (long (*)())strtoul;
+ break;
+
+ case 'f':
+ case 'e':
+ case 'g':
+ op = 'f';
+ break;
+
+ case 's':
+ op = 's';
+ break;
+
+ case 'c':
+ op = 'c';
+ flags |= SCAN_NOSKIP;
+ break;
+ case '[':
+ op = '[';
+ flags |= SCAN_NOSKIP;
+ break;
+ }
+
+ /*
+ * At this point, we will need additional characters from the
+ * string to proceed.
+ */
+
+ if (*string == '\0') {
+ underflow = 1;
+ goto done;
+ }
+
+ /*
+ * Skip any leading whitespace at the beginning of a field unless
+ * the format suppresses this behavior.
+ */
+
+ if (!(flags & SCAN_NOSKIP)) {
+ while (*string != '\0') {
+ offset = Tcl_UtfToUniChar(string, &sch);
+ if (!TclUniCharIsSpace(sch)) {
+ break;
+ }
+ string += offset;
+ }
+ if (*string == '\0') {
+ underflow = 1;
+ goto done;
+ }
+ }
+
+ /*
+ * Perform the requested scanning operation.
+ */
+
+ switch (op) {
+ case 's':
+ /*
+ * Scan a string up to width characters or whitespace.
+ */
+
+ if (width == 0) {
+ width = (size_t) ~0;
+ }
+ end = string;
+ while (*end != '\0') {
+ offset = Tcl_UtfToUniChar(end, &sch);
+ if (TclUniCharIsSpace(sch)) {
+ break;
+ }
+ end += offset;
+ if (--width == 0) {
+ break;
+ }
+ }
+ if (!(flags & SCAN_SUPPRESS)) {
+ objPtr = Tcl_NewStringObj(string, end-string);
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+ string = end;
+ break;
+
+ case '[': {
+ CharSet cset;
+
+ if (width == 0) {
+ width = (size_t) ~0;
+ }
+ end = string;
+
+ format = BuildCharSet(&cset, format);
+ while (*end != '\0') {
+ offset = Tcl_UtfToUniChar(end, &sch);
+ if (!CharInSet(&cset, sch)) {
+ break;
+ }
+ end += offset;
+ if (--width == 0) {
+ break;
+ }
+ }
+ ReleaseCharSet(&cset);
+
+ if (!(flags & SCAN_SUPPRESS)) {
+ objPtr = Tcl_NewStringObj(string, end-string);
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+ string = end;
+
+ break;
+ }
+ case 'c':
+ /*
+ * Scan a single Unicode character.
+ */
+
+ string += Tcl_UtfToUniChar(string, &sch);
+ if (!(flags & SCAN_SUPPRESS)) {
+ objPtr = Tcl_NewIntObj((int)sch);
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+ break;
+
+ case 'i':
+ /*
+ * Scan an unsigned or signed integer.
+ */
+
+ if ((width == 0) || (width > sizeof(buf) - 1)) {
+ width = sizeof(buf) - 1;
+ }
+ flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
+ for (end = buf; width > 0; width--) {
+ switch (*string) {
+ /*
+ * The 0 digit has special meaning at the beginning of
+ * a number. If we are unsure of the base, it
+ * indicates that we are in base 8 or base 16 (if it is
+ * followed by an 'x').
+ */
+ case '0':
+ if (base == 0) {
+ base = 8;
+ flags |= SCAN_XOK;
+ }
+ if (flags & SCAN_NOZERO) {
+ flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
+ | SCAN_NOZERO);
+ } else {
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK
+ | SCAN_NODIGITS);
+ }
+ goto addToInt;
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ if (base == 0) {
+ base = 10;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case '8': case '9':
+ if (base == 0) {
+ base = 10;
+ }
+ if (base <= 8) {
+ break;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case 'A': case 'B': case 'C':
+ case 'D': case 'E': case 'F':
+ case 'a': case 'b': case 'c':
+ case 'd': case 'e': case 'f':
+ if (base <= 10) {
+ break;
+ }
+ flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+ goto addToInt;
+
+ case '+': case '-':
+ if (flags & SCAN_SIGNOK) {
+ flags &= ~SCAN_SIGNOK;
+ goto addToInt;
+ }
+ break;
+
+ case 'x': case 'X':
+ if ((flags & SCAN_XOK) && (end == buf+1)) {
+ base = 16;
+ flags &= ~SCAN_XOK;
+ goto addToInt;
+ }
+ break;
+ }
+
+ /*
+ * We got an illegal character so we are done accumulating.
+ */
+
+ break;
+
+ addToInt:
+ /*
+ * Add the character to the temporary buffer.
+ */
+
+ *end++ = *string++;
+ if (*string == '\0') {
+ break;
+ }
+ }
+
+ /*
+ * Check to see if we need to back up because we only got a
+ * sign or a trailing x after a 0.
+ */
+
+ if (flags & SCAN_NODIGITS) {
+ goto done;
+ } else if (end[-1] == 'x' || end[-1] == 'X') {
+ end--;
+ string--;
+ }
+
+
+ /*
+ * Scan the value from the temporary buffer. If we are
+ * returning a large unsigned value, we have to convert it back
+ * to a string since Tcl only supports signed values.
+ */
+
+ if (!(flags & SCAN_SUPPRESS)) {
+ *end = '\0';
+ value = (int) (*fn)(buf, NULL, base);
+ if ((flags & SCAN_UNSIGNED) && (value < 0)) {
+ sprintf(buf, "%u", value); /* INTL: ISO digit */
+ objPtr = Tcl_NewStringObj(buf, -1);
+ } else {
+ objPtr = Tcl_NewIntObj(value);
+ }
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+
+ break;
+
+ case 'f':
+ /*
+ * Scan a floating point number
+ */
+
+ if ((width == 0) || (width > sizeof(buf) - 1)) {
+ width = sizeof(buf) - 1;
+ }
+ flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
+ for (end = buf; width > 0; width--) {
+ switch (*string) {
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ case '8': case '9':
+ flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
+ goto addToFloat;
+ case '+': case '-':
+ if (flags & SCAN_SIGNOK) {
+ flags &= ~SCAN_SIGNOK;
+ goto addToFloat;
+ }
+ break;
+ case '.':
+ if (flags & SCAN_PTOK) {
+ flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
+ goto addToFloat;
+ }
+ break;
+ case 'e': case 'E':
+ /*
+ * An exponent is not allowed until there has
+ * been at least one digit.
+ */
+
+ if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
+ == SCAN_EXPOK) {
+ flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
+ | SCAN_SIGNOK | SCAN_NODIGITS;
+ goto addToFloat;
+ }
+ break;
+ }
+
+ /*
+ * We got an illegal character so we are done accumulating.
+ */
+
+ break;
+
+ addToFloat:
+ /*
+ * Add the character to the temporary buffer.
+ */
+
+ *end++ = *string++;
+ if (*string == '\0') {
+ break;
+ }
+ }
+
+ /*
+ * Check to see if we need to back up because we saw a
+ * trailing 'e' or sign.
+ */
+
+ if (flags & SCAN_NODIGITS) {
+ if (flags & SCAN_EXPOK) {
+ /*
+ * There were no digits at all so scanning has
+ * failed and we are done.
+ */
+ goto done;
+ }
+
+ /*
+ * We got a bad exponent ('e' and maybe a sign).
+ */
+
+ end--;
+ string--;
+ if (*end != 'e' && *end != 'E') {
+ end--;
+ string--;
+ }
+ }
+
+ /*
+ * Scan the value from the temporary buffer.
+ */
+
+ if (!(flags & SCAN_SUPPRESS)) {
+ double dvalue;
+ *end = '\0';
+ dvalue = strtod(buf, NULL);
+ objPtr = Tcl_NewDoubleObj(dvalue);
+ Tcl_IncrRefCount(objPtr);
+ objs[objIndex++] = objPtr;
+ }
+ break;
+ }
+ }
+
+ done:
+ result = 0;
+ code = TCL_OK;
+
+ for (i = 0; i < numVars; i++) {
+ if (objs[i] != NULL) {
+ result++;
+ if (Tcl_SetObjVar2(interp, Tcl_GetString(objv[i+3]),
+ NULL, objs[i], 0) == NULL) {
+ Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
+ "couldn't set variable \"",
+ Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
+ code = TCL_ERROR;
+ }
+ Tcl_DecrRefCount(objs[i]);
+ }
+ }
+ ckfree((char*) objs);
+ if (code == TCL_OK) {
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(result));
+ }
+ return code;
+}
diff --git a/tests/scan.test b/tests/scan.test
index e343742..bbae718 100644
--- a/tests/scan.test
+++ b/tests/scan.test
@@ -10,10 +10,38 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: scan.test,v 1.1.2.2 1998/09/24 23:59:35 stanton Exp $
+# RCS: @(#) $Id: scan.test,v 1.1.2.3 1998/11/16 20:45:23 stanton Exp $
if {[string compare test [info procs test]] == 1} then {source defs}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%d%1$d} x} msg] $msg
+} {1 {cannot mix "%" and "%n$" conversion specifiers}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%d%1$d} x} msg] $msg
+} {1 {cannot mix "%" and "%n$" conversion specifiers}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%2$d%d} x} msg] $msg
+} {1 {"%n$" argument index out of range}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} %d} msg] $msg
+} {1 {different numbers of variable names and field specifiers}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%10c} a} msg] $msg
+} {1 {field width may not be specified in %c conversion}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%*1$d} a} msg] $msg
+} {1 {bad scan conversion character "$"}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} {%1$d%1$d} a} msg] $msg
+} {1 {variable is assigned by multiple "%n$" conversion specifiers}}
+test scan-1.1 {ValidateFormat} {
+ list [catch {scan {} a x} msg] $msg
+} {1 {variable is not assigend by any conversion specifiers}}
+
+
+
+
test scan-1.1 {integer scanning} {
set a {}; set b {}; set c {}; set d {}
list [scan "-20 1476 \n33 0" "%d %d %d %d" a b c d] $a $b $c $d
@@ -130,6 +158,10 @@ test scan-3.7 {string and character scanning, unicode} {
set a {}; set b {}
list [scan "ab\u00c7cdef" "ab%c%c" a b] $a $b
} "2 199 99"
+test scan-3.7 {string and character scanning, unicode} {
+ set a {}; set b {}
+ list [scan "ab\ufeffdef" "%\[ab\ufeff\]" a] $a
+} "1 ab\ufeff"
test scan-4.1 {error conditions} {
catch {scan a}
@@ -138,34 +170,27 @@ test scan-4.2 {error conditions} {
catch {scan a} msg
set msg
} {wrong # args: should be "scan string format ?varName varName ...?"}
-test scan-4.3 {error conditions} {
- catch {scan "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d" a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20 a21}
-} 1
-test scan-4.4 {error conditions} {
- catch {scan "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21" "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d" a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20 a21} msg
- set msg
-} {too many fields to scan}
test scan-4.5 {error conditions} {
- list [catch {scan a %D} msg] $msg
+ list [catch {scan a %D x} msg] $msg
} {1 {bad scan conversion character "D"}}
test scan-4.6 {error conditions} {
- list [catch {scan a %O} msg] $msg
+ list [catch {scan a %O x} msg] $msg
} {1 {bad scan conversion character "O"}}
test scan-4.7 {error conditions} {
- list [catch {scan a %X} msg] $msg
+ list [catch {scan a %X x} msg] $msg
} {1 {bad scan conversion character "X"}}
test scan-4.8 {error conditions} {
- list [catch {scan a %F} msg] $msg
+ list [catch {scan a %F x} msg] $msg
} {1 {bad scan conversion character "F"}}
test scan-4.9 {error conditions} {
- list [catch {scan a %E} msg] $msg
+ list [catch {scan a %E x} msg] $msg
} {1 {bad scan conversion character "E"}}
test scan-4.10 {error conditions} {
list [catch {scan a "%d %d" a} msg] $msg
} {1 {different numbers of variable names and field specifiers}}
test scan-4.11 {error conditions} {
list [catch {scan a "%d %d" a b c} msg] $msg
-} {1 {different numbers of variable names and field specifiers}}
+} {1 {variable is not assigend by any conversion specifiers}}
test scan-4.12 {error conditions} {
set a {}; set b {}; set c {}; set d {}
list [expr {[scan " a" " a %d %d %d %d" a b c d] <= 0}] $a $b $c $d
@@ -204,7 +229,16 @@ test scan-4.19 {error conditions} {
list [catch {scan 44 %2c a} msg] $msg
} {1 {field width may not be specified in %c conversion}}
test scan-4.20 {error conditions} {
- list [catch {scan abc {%[}} msg] $msg
+ list [catch {scan abc {%[} x} msg] $msg
+} {1 {unmatched [ in format string}}
+test scan-4.20 {error conditions} {
+ list [catch {scan abc {%[^a} x} msg] $msg
+} {1 {unmatched [ in format string}}
+test scan-4.20 {error conditions} {
+ list [catch {scan abc {%[^]a} x} msg] $msg
+} {1 {unmatched [ in format string}}
+test scan-4.20 {error conditions} {
+ list [catch {scan abc {%[]a} x} msg] $msg
} {1 {unmatched [ in format string}}
test scan-5.1 {lots of arguments} {
diff --git a/win/makefile.vc b/win/makefile.vc
index 79c4352..c18b180 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -4,7 +4,7 @@
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# Copyright (c) 1995-1996 Sun Microsystems, Inc.
-# RCS: @(#) $Id: makefile.vc,v 1.1.2.9 1998/11/11 04:08:39 stanton Exp $
+# RCS: @(#) $Id: makefile.vc,v 1.1.2.10 1998/11/16 20:45:23 stanton Exp $
# Does not depend on the presence of any environment variables in
# order to compile tcl; all needed information is derived from
@@ -168,6 +168,7 @@ TCLOBJS = \
$(TMPDIR)\tclProc.obj \
$(TMPDIR)\tclRegexp.obj \
$(TMPDIR)\tclResult.obj \
+ $(TMPDIR)\tclScan.obj \
$(TMPDIR)\tclStringObj.obj \
$(TMPDIR)\tclThread.obj \
$(TMPDIR)\tclTimer.obj \