diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-29 09:41:05 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-03-29 09:41:05 (GMT) |
commit | 554f692cc41dac694d2cb30ec90b94b9f59df484 (patch) | |
tree | 00d19ed085b41bd06adcdfedea3227b1e930a59e | |
parent | 3cd9927e9bf6f937e8cd932675df66c01011c8ed (diff) | |
download | tcl-554f692cc41dac694d2cb30ec90b94b9f59df484.zip tcl-554f692cc41dac694d2cb30ec90b94b9f59df484.tar.gz tcl-554f692cc41dac694d2cb30ec90b94b9f59df484.tar.bz2 |
Change regexp C API to handle indexes > 2G. New TIP upcoming.
-rw-r--r-- | doc/RegExp.3 | 8 | ||||
-rw-r--r-- | generic/regcomp.c | 5 | ||||
-rw-r--r-- | generic/regex.h | 7 | ||||
-rw-r--r-- | generic/regexec.c | 6 | ||||
-rw-r--r-- | generic/tcl.h | 9 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 26 | ||||
-rw-r--r-- | generic/tclInt.decls | 4 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclIntDecls.h | 4 | ||||
-rw-r--r-- | generic/tclRegexp.c | 12 | ||||
-rw-r--r-- | generic/tclTest.c | 29 |
11 files changed, 53 insertions, 59 deletions
diff --git a/doc/RegExp.3 b/doc/RegExp.3 index aa757bc..5fd8176 100644 --- a/doc/RegExp.3 +++ b/doc/RegExp.3 @@ -337,9 +337,9 @@ defined as follows: .PP .CS typedef struct Tcl_RegExpInfo { - int \fInsubs\fR; + size_t \fInsubs\fR; Tcl_RegExpIndices *\fImatches\fR; - long \fIextendStart\fR; + size_t \fIextendStart\fR; } \fBTcl_RegExpInfo\fR; .CE .PP @@ -355,8 +355,8 @@ follows: .PP .CS typedef struct Tcl_RegExpIndices { - long \fIstart\fR; - long \fIend\fR; + size_t \fIstart\fR; + size_t \fIend\fR; } \fBTcl_RegExpIndices\fR; .CE .PP diff --git a/generic/regcomp.c b/generic/regcomp.c index 47f06c8..9bf862c 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -338,7 +338,6 @@ compile( v->spaceused = 0; re->re_magic = REMAGIC; re->re_info = 0; /* bits get set during parse */ - re->re_csize = sizeof(chr); re->re_guts = NULL; re->re_fns = (void*)(&functions); @@ -2085,8 +2084,8 @@ dump( } fprintf(f, "\n\n\n========= DUMP ==========\n"); - fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - (int) re->re_nsub, re->re_info, re->re_csize, g->ntree); + fprintf(f, "nsub %" TCL_Z_MODIFIER "d, info 0%lo, ntree %d\n", + re->re_nsub, re->re_info, g->ntree); dumpcolors(&g->cmap, f); if (!NULLCNFA(g->search)) { diff --git a/generic/regex.h b/generic/regex.h index 81f98a4..47a8103 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -117,8 +117,8 @@ extern "C" { /* the biggie, a compiled RE (or rather, a front end to same) */ typedef struct { int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ long re_info; /* information about RE */ + size_t re_nsub; /* number of subexpressions */ #define REG_UBACKREF 000001 #define REG_ULOOKAHEAD 000002 #define REG_UBOUNDS 000004 @@ -133,7 +133,6 @@ typedef struct { #define REG_UEMPTYMATCH 004000 #define REG_UIMPOSSIBLE 010000 #define REG_USHORTEST 020000 - int re_csize; /* sizeof(character) */ char *re_endp; /* backward compatibility kludge */ /* the rest is opaque pointers to hidden innards */ char *re_guts; /* `char *' is more portable than `void *' */ @@ -142,8 +141,8 @@ typedef struct { /* result reporting (may acquire more fields later) */ typedef struct { - long rm_so; /* start of substring */ - long rm_eo; /* end of substring */ + size_t rm_so; /* start of substring */ + size_t rm_eo; /* end of substring */ } regmatch_t; /* supplementary control and reporting */ diff --git a/generic/regexec.c b/generic/regexec.c index c57f42c..5ad8b17 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -187,10 +187,6 @@ exec( FreeVars(v); return REG_INVARG; } - if (re->re_csize != sizeof(chr)) { - FreeVars(v); - return REG_MIXED; - } /* * Setup. @@ -889,7 +885,7 @@ cbrdissect( MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max)); /* get the backreferenced string */ - if (v->pmatch[n].rm_so == -1) { + if (v->pmatch[n].rm_so == (size_t)-1) { return REG_NOMATCH; } brstring = v->start + v->pmatch[n].rm_so; diff --git a/generic/tcl.h b/generic/tcl.h index a0fd3b3..7ca204a 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -459,19 +459,18 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData); */ typedef struct Tcl_RegExpIndices { - long start; /* Character offset of first character in + size_t start; /* Character offset of first character in * match. */ - long end; /* Character offset of first character after + size_t end; /* Character offset of first character after * the match. */ } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { - int nsubs; /* Number of subexpressions in the compiled + size_t nsubs; /* Number of subexpressions in the compiled * expression. */ Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ - long extendStart; /* The offset at which a subsequent match + size_t extendStart; /* The offset at which a subsequent match * might begin. */ - long reserved; /* Reserved for later use. */ } Tcl_RegExpInfo; /* diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 831c3c4..4d51c73 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -127,9 +127,9 @@ Tcl_RegexpObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - size_t offset; + size_t offset, stringLength, matchLength; int i, indices, match, about, all, doinline, numMatchesSaved; - int cflags, eflags, stringLength, matchLength; + int cflags, eflags; Tcl_RegExp regExpr; Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; Tcl_RegExpInfo info; @@ -309,7 +309,7 @@ Tcl_RegexpObjCmd( if (offset == TCL_INDEX_START) { eflags = 0; - } else if (offset + 1 > (size_t)stringLength + 1) { + } else if (offset + 1 > stringLength + 1) { eflags = TCL_REG_NOTBOL; } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') { eflags = 0; @@ -373,7 +373,7 @@ Tcl_RegexpObjCmd( * area. (Scriptics Bug 4391/SF Bug #219232) */ - if (i <= info.nsubs && info.matches[i].start >= 0) { + if (i <= (int)info.nsubs && info.matches[i].start != TCL_INDEX_NONE) { start = offset + info.matches[i].start; end = offset + info.matches[i].end; @@ -395,7 +395,7 @@ Tcl_RegexpObjCmd( newPtr = Tcl_NewListObj(2, objs); } else { - if (i <= info.nsubs) { + if (i <= (int)info.nsubs) { newPtr = Tcl_GetRange(objPtr, offset + info.matches[i].start, offset + info.matches[i].end - 1); @@ -445,7 +445,7 @@ Tcl_RegexpObjCmd( offset++; } all++; - if (offset + 1 >= (size_t)stringLength + 1) { + if (offset + 1 >= stringLength + 1) { break; } } @@ -783,7 +783,7 @@ Tcl_RegsubObjCmd( args = Tcl_Alloc(sizeof(Tcl_Obj*) * numArgs); memcpy(args, parts, sizeof(Tcl_Obj*) * numParts); - for (idx = 0 ; idx <= info.nsubs ; idx++) { + for (idx = 0 ; idx <= (int)info.nsubs ; idx++) { subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { @@ -807,7 +807,7 @@ Tcl_RegsubObjCmd( */ result = Tcl_EvalObjv(interp, numArgs, args, 0); - for (idx = 0 ; idx <= info.nsubs ; idx++) { + for (idx = 0 ; idx <= (int)info.nsubs ; idx++) { TclDecrRefCount(args[idx + numParts]); } Tcl_Free(args); @@ -887,7 +887,7 @@ Tcl_RegsubObjCmd( wsrc - wfirstChar); } - if (idx <= info.nsubs) { + if (idx <= (int)info.nsubs) { subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { @@ -3734,13 +3734,13 @@ TclNRSwitchObjCmd( TclNewObj(indicesObj); } - for (j=0 ; j<=info.nsubs ; j++) { + for (j=0 ; j<=(int)info.nsubs ; j++) { if (indexVarObj != NULL) { Tcl_Obj *rangeObjAry[2]; - if (info.matches[j].end > 0) { - rangeObjAry[0] = Tcl_NewWideIntObj(info.matches[j].start); - rangeObjAry[1] = Tcl_NewWideIntObj(info.matches[j].end-1); + if (info.matches[j].end + 1 > 1) { + rangeObjAry[0] = TclNewWideIntObjFromSize(info.matches[j].start); + rangeObjAry[1] = TclNewWideIntObjFromSize(info.matches[j].end-1); } else { rangeObjAry[0] = rangeObjAry[1] = Tcl_NewWideIntObj(-1); } diff --git a/generic/tclInt.decls b/generic/tclInt.decls index c0d7696..096f06f 100644 --- a/generic/tclInt.decls +++ b/generic/tclInt.decls @@ -614,8 +614,8 @@ declare 150 { int TclRegAbout(Tcl_Interp *interp, Tcl_RegExp re) } declare 151 { - void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index, int *startPtr, - int *endPtr) + void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index, size_t *startPtr, + size_t *endPtr) } declare 152 { void TclSetLibraryPath(Tcl_Obj *pathPtr) diff --git a/generic/tclInt.h b/generic/tclInt.h index e0de48f..44d0ec6 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4936,7 +4936,7 @@ MODULE_SCOPE Tcl_PackageInitProc Procbodytest_SafeInit; # define TclNewWideIntObjFromSize(value) \ Tcl_NewWideIntObj(TclWideIntFromSize(value)) #else -# define TclWideIntFromSize(value) (value) +# define TclWideIntFromSize(value) ((Tcl_WideInt)(value)) # define TclNewWideIntObjFromSize Tcl_NewWideIntObj #endif diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h index 61249c0..bea4381 100644 --- a/generic/tclIntDecls.h +++ b/generic/tclIntDecls.h @@ -337,7 +337,7 @@ EXTERN void TclHandleRelease(TclHandle handle); EXTERN int TclRegAbout(Tcl_Interp *interp, Tcl_RegExp re); /* 151 */ EXTERN void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index, - int *startPtr, int *endPtr); + size_t *startPtr, size_t *endPtr); /* 152 */ EXTERN void TclSetLibraryPath(Tcl_Obj *pathPtr); /* 153 */ @@ -739,7 +739,7 @@ typedef struct TclIntStubs { TclHandle (*tclHandlePreserve) (TclHandle handle); /* 148 */ void (*tclHandleRelease) (TclHandle handle); /* 149 */ int (*tclRegAbout) (Tcl_Interp *interp, Tcl_RegExp re); /* 150 */ - void (*tclRegExpRangeUniChar) (Tcl_RegExp re, size_t index, int *startPtr, int *endPtr); /* 151 */ + void (*tclRegExpRangeUniChar) (Tcl_RegExp re, size_t index, size_t *startPtr, size_t *endPtr); /* 151 */ void (*tclSetLibraryPath) (Tcl_Obj *pathPtr); /* 152 */ Tcl_Obj * (*tclGetLibraryPath) (void); /* 153 */ void (*reserved154)(void); diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 67195bb..f793cb8 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -264,7 +264,7 @@ Tcl_RegExpRange( if (index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; - } else if (regexpPtr->matches[index].rm_so == -1) { + } else if (regexpPtr->matches[index].rm_so == (size_t)-1) { *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { @@ -365,17 +365,17 @@ TclRegExpRangeUniChar( * > 0 means give the range of a matching * subrange, -1 means the range of the * rm_extend field. */ - int *startPtr, /* Store address of first character in + size_t *startPtr, /* Store address of first character in * (sub-)range here. */ - int *endPtr) /* Store address of character just after last + size_t *endPtr) /* Store address of character just after last * in (sub-)range here. */ { TclRegexp *regexpPtr = (TclRegexp *) re; - if ((regexpPtr->flags®_EXPECT) && index == TCL_AUTO_LENGTH) { + if ((regexpPtr->flags®_EXPECT) && (index == TCL_INDEX_NONE)) { *startPtr = regexpPtr->details.rm_extend.rm_so; *endPtr = regexpPtr->details.rm_extend.rm_eo; - } else if (index > regexpPtr->re.re_nsub) { + } else if (index + 1 > regexpPtr->re.re_nsub + 1) { *startPtr = -1; *endPtr = -1; } else { @@ -677,7 +677,7 @@ TclRegAbout( resultObj = Tcl_NewObj(); Tcl_ListObjAppendElement(NULL, resultObj, - Tcl_NewWideIntObj((Tcl_WideInt) regexpPtr->re.re_nsub)); + TclNewWideIntObjFromSize(regexpPtr->re.re_nsub)); /* * Now append a list of all the bit-flags set for the RE. diff --git a/generic/tclTest.c b/generic/tclTest.c index 4953133..ad66688 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -342,7 +342,7 @@ static int TestreturnObjCmd(void *dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); static void TestregexpXflags(const char *string, - int length, int *cflagsPtr, int *eflagsPtr); + size_t length, int *cflagsPtr, int *eflagsPtr); static int TestsaveresultCmd(void *dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); @@ -3878,12 +3878,12 @@ TestregexpObjCmd( if (objc > 2 && (cflags®_EXPECT) && indices) { const char *varName; const char *value; - int start, end; + size_t start, end; char resinfo[TCL_INTEGER_SPACE * 2]; varName = Tcl_GetString(objv[2]); TclRegExpRangeUniChar(regExpr, -1, &start, &end); - sprintf(resinfo, "%d %d", start, end-1); + sprintf(resinfo, "%" TCL_LL_MODIFIER "d %" TCL_LL_MODIFIER "d", TclWideIntFromSize(start), TclWideIntFromSize(end-1)); value = Tcl_SetVar2(interp, varName, NULL, resinfo, 0); if (value == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -3897,7 +3897,7 @@ TestregexpObjCmd( Tcl_RegExpGetInfo(regExpr, &info); varName = Tcl_GetString(objv[2]); - sprintf(resinfo, "%ld", info.extendStart); + sprintf(resinfo, "%" TCL_LL_MODIFIER "d", TclWideIntFromSize(info.extendStart)); value = Tcl_SetVar2(interp, varName, NULL, resinfo, 0); if (value == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -3918,7 +3918,7 @@ TestregexpObjCmd( Tcl_RegExpGetInfo(regExpr, &info); for (i = 0; i < objc; i++) { - int start, end; + size_t start, end; Tcl_Obj *newPtr, *varPtr, *valuePtr; varPtr = objv[i]; @@ -3928,9 +3928,9 @@ TestregexpObjCmd( if (ii == -1) { TclRegExpRangeUniChar(regExpr, ii, &start, &end); - } else if (ii > info.nsubs) { - start = -1; - end = -1; + } else if (ii > (int)info.nsubs) { + start = TCL_INDEX_NONE; + end = TCL_INDEX_NONE; } else { start = info.matches[ii].start; end = info.matches[ii].end; @@ -3941,19 +3941,19 @@ TestregexpObjCmd( * instead of the first character after the match. */ - if (end >= 0) { + if (end != TCL_INDEX_NONE) { end--; } - objs[0] = Tcl_NewWideIntObj(start); - objs[1] = Tcl_NewWideIntObj(end); + objs[0] = TclNewWideIntObjFromSize(start); + objs[1] = TclNewWideIntObjFromSize(end); newPtr = Tcl_NewListObj(2, objs); } else { if (ii == -1) { TclRegExpRangeUniChar(regExpr, ii, &start, &end); newPtr = Tcl_GetRange(objPtr, start, end); - } else if (ii > info.nsubs) { + } else if (ii > (int)info.nsubs) { newPtr = Tcl_NewObj(); } else { newPtr = Tcl_GetRange(objPtr, info.matches[ii].start, @@ -3994,11 +3994,12 @@ TestregexpObjCmd( static void TestregexpXflags( const char *string, /* The string of flags. */ - int length, /* The length of the string in bytes. */ + size_t length, /* The length of the string in bytes. */ int *cflagsPtr, /* compile flags word */ int *eflagsPtr) /* exec flags word */ { - int i, cflags, eflags; + size_t i; + int cflags, eflags; cflags = *cflagsPtr; eflags = *eflagsPtr; |