summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-29 09:41:05 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-03-29 09:41:05 (GMT)
commit554f692cc41dac694d2cb30ec90b94b9f59df484 (patch)
tree00d19ed085b41bd06adcdfedea3227b1e930a59e
parent3cd9927e9bf6f937e8cd932675df66c01011c8ed (diff)
downloadtcl-554f692cc41dac694d2cb30ec90b94b9f59df484.zip
tcl-554f692cc41dac694d2cb30ec90b94b9f59df484.tar.gz
tcl-554f692cc41dac694d2cb30ec90b94b9f59df484.tar.bz2
Change regexp C API to handle indexes > 2G. New TIP upcoming.
-rw-r--r--doc/RegExp.38
-rw-r--r--generic/regcomp.c5
-rw-r--r--generic/regex.h7
-rw-r--r--generic/regexec.c6
-rw-r--r--generic/tcl.h9
-rw-r--r--generic/tclCmdMZ.c26
-rw-r--r--generic/tclInt.decls4
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclIntDecls.h4
-rw-r--r--generic/tclRegexp.c12
-rw-r--r--generic/tclTest.c29
11 files changed, 53 insertions, 59 deletions
diff --git a/doc/RegExp.3 b/doc/RegExp.3
index aa757bc..5fd8176 100644
--- a/doc/RegExp.3
+++ b/doc/RegExp.3
@@ -337,9 +337,9 @@ defined as follows:
.PP
.CS
typedef struct Tcl_RegExpInfo {
- int \fInsubs\fR;
+ size_t \fInsubs\fR;
Tcl_RegExpIndices *\fImatches\fR;
- long \fIextendStart\fR;
+ size_t \fIextendStart\fR;
} \fBTcl_RegExpInfo\fR;
.CE
.PP
@@ -355,8 +355,8 @@ follows:
.PP
.CS
typedef struct Tcl_RegExpIndices {
- long \fIstart\fR;
- long \fIend\fR;
+ size_t \fIstart\fR;
+ size_t \fIend\fR;
} \fBTcl_RegExpIndices\fR;
.CE
.PP
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 47f06c8..9bf862c 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -338,7 +338,6 @@ compile(
v->spaceused = 0;
re->re_magic = REMAGIC;
re->re_info = 0; /* bits get set during parse */
- re->re_csize = sizeof(chr);
re->re_guts = NULL;
re->re_fns = (void*)(&functions);
@@ -2085,8 +2084,8 @@ dump(
}
fprintf(f, "\n\n\n========= DUMP ==========\n");
- fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
- (int) re->re_nsub, re->re_info, re->re_csize, g->ntree);
+ fprintf(f, "nsub %" TCL_Z_MODIFIER "d, info 0%lo, ntree %d\n",
+ re->re_nsub, re->re_info, g->ntree);
dumpcolors(&g->cmap, f);
if (!NULLCNFA(g->search)) {
diff --git a/generic/regex.h b/generic/regex.h
index 81f98a4..47a8103 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -117,8 +117,8 @@ extern "C" {
/* the biggie, a compiled RE (or rather, a front end to same) */
typedef struct {
int re_magic; /* magic number */
- size_t re_nsub; /* number of subexpressions */
long re_info; /* information about RE */
+ size_t re_nsub; /* number of subexpressions */
#define REG_UBACKREF 000001
#define REG_ULOOKAHEAD 000002
#define REG_UBOUNDS 000004
@@ -133,7 +133,6 @@ typedef struct {
#define REG_UEMPTYMATCH 004000
#define REG_UIMPOSSIBLE 010000
#define REG_USHORTEST 020000
- int re_csize; /* sizeof(character) */
char *re_endp; /* backward compatibility kludge */
/* the rest is opaque pointers to hidden innards */
char *re_guts; /* `char *' is more portable than `void *' */
@@ -142,8 +141,8 @@ typedef struct {
/* result reporting (may acquire more fields later) */
typedef struct {
- long rm_so; /* start of substring */
- long rm_eo; /* end of substring */
+ size_t rm_so; /* start of substring */
+ size_t rm_eo; /* end of substring */
} regmatch_t;
/* supplementary control and reporting */
diff --git a/generic/regexec.c b/generic/regexec.c
index c57f42c..5ad8b17 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -187,10 +187,6 @@ exec(
FreeVars(v);
return REG_INVARG;
}
- if (re->re_csize != sizeof(chr)) {
- FreeVars(v);
- return REG_MIXED;
- }
/*
* Setup.
@@ -889,7 +885,7 @@ cbrdissect(
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
/* get the backreferenced string */
- if (v->pmatch[n].rm_so == -1) {
+ if (v->pmatch[n].rm_so == (size_t)-1) {
return REG_NOMATCH;
}
brstring = v->start + v->pmatch[n].rm_so;
diff --git a/generic/tcl.h b/generic/tcl.h
index a0fd3b3..7ca204a 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -459,19 +459,18 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData);
*/
typedef struct Tcl_RegExpIndices {
- long start; /* Character offset of first character in
+ size_t start; /* Character offset of first character in
* match. */
- long end; /* Character offset of first character after
+ size_t end; /* Character offset of first character after
* the match. */
} Tcl_RegExpIndices;
typedef struct Tcl_RegExpInfo {
- int nsubs; /* Number of subexpressions in the compiled
+ size_t nsubs; /* Number of subexpressions in the compiled
* expression. */
Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */
- long extendStart; /* The offset at which a subsequent match
+ size_t extendStart; /* The offset at which a subsequent match
* might begin. */
- long reserved; /* Reserved for later use. */
} Tcl_RegExpInfo;
/*
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 831c3c4..4d51c73 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -127,9 +127,9 @@ Tcl_RegexpObjCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- size_t offset;
+ size_t offset, stringLength, matchLength;
int i, indices, match, about, all, doinline, numMatchesSaved;
- int cflags, eflags, stringLength, matchLength;
+ int cflags, eflags;
Tcl_RegExp regExpr;
Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL;
Tcl_RegExpInfo info;
@@ -309,7 +309,7 @@ Tcl_RegexpObjCmd(
if (offset == TCL_INDEX_START) {
eflags = 0;
- } else if (offset + 1 > (size_t)stringLength + 1) {
+ } else if (offset + 1 > stringLength + 1) {
eflags = TCL_REG_NOTBOL;
} else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') {
eflags = 0;
@@ -373,7 +373,7 @@ Tcl_RegexpObjCmd(
* area. (Scriptics Bug 4391/SF Bug #219232)
*/
- if (i <= info.nsubs && info.matches[i].start >= 0) {
+ if (i <= (int)info.nsubs && info.matches[i].start != TCL_INDEX_NONE) {
start = offset + info.matches[i].start;
end = offset + info.matches[i].end;
@@ -395,7 +395,7 @@ Tcl_RegexpObjCmd(
newPtr = Tcl_NewListObj(2, objs);
} else {
- if (i <= info.nsubs) {
+ if (i <= (int)info.nsubs) {
newPtr = Tcl_GetRange(objPtr,
offset + info.matches[i].start,
offset + info.matches[i].end - 1);
@@ -445,7 +445,7 @@ Tcl_RegexpObjCmd(
offset++;
}
all++;
- if (offset + 1 >= (size_t)stringLength + 1) {
+ if (offset + 1 >= stringLength + 1) {
break;
}
}
@@ -783,7 +783,7 @@ Tcl_RegsubObjCmd(
args = Tcl_Alloc(sizeof(Tcl_Obj*) * numArgs);
memcpy(args, parts, sizeof(Tcl_Obj*) * numParts);
- for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ for (idx = 0 ; idx <= (int)info.nsubs ; idx++) {
subStart = info.matches[idx].start;
subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
@@ -807,7 +807,7 @@ Tcl_RegsubObjCmd(
*/
result = Tcl_EvalObjv(interp, numArgs, args, 0);
- for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ for (idx = 0 ; idx <= (int)info.nsubs ; idx++) {
TclDecrRefCount(args[idx + numParts]);
}
Tcl_Free(args);
@@ -887,7 +887,7 @@ Tcl_RegsubObjCmd(
wsrc - wfirstChar);
}
- if (idx <= info.nsubs) {
+ if (idx <= (int)info.nsubs) {
subStart = info.matches[idx].start;
subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
@@ -3734,13 +3734,13 @@ TclNRSwitchObjCmd(
TclNewObj(indicesObj);
}
- for (j=0 ; j<=info.nsubs ; j++) {
+ for (j=0 ; j<=(int)info.nsubs ; j++) {
if (indexVarObj != NULL) {
Tcl_Obj *rangeObjAry[2];
- if (info.matches[j].end > 0) {
- rangeObjAry[0] = Tcl_NewWideIntObj(info.matches[j].start);
- rangeObjAry[1] = Tcl_NewWideIntObj(info.matches[j].end-1);
+ if (info.matches[j].end + 1 > 1) {
+ rangeObjAry[0] = TclNewWideIntObjFromSize(info.matches[j].start);
+ rangeObjAry[1] = TclNewWideIntObjFromSize(info.matches[j].end-1);
} else {
rangeObjAry[0] = rangeObjAry[1] = Tcl_NewWideIntObj(-1);
}
diff --git a/generic/tclInt.decls b/generic/tclInt.decls
index c0d7696..096f06f 100644
--- a/generic/tclInt.decls
+++ b/generic/tclInt.decls
@@ -614,8 +614,8 @@ declare 150 {
int TclRegAbout(Tcl_Interp *interp, Tcl_RegExp re)
}
declare 151 {
- void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index, int *startPtr,
- int *endPtr)
+ void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index, size_t *startPtr,
+ size_t *endPtr)
}
declare 152 {
void TclSetLibraryPath(Tcl_Obj *pathPtr)
diff --git a/generic/tclInt.h b/generic/tclInt.h
index e0de48f..44d0ec6 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -4936,7 +4936,7 @@ MODULE_SCOPE Tcl_PackageInitProc Procbodytest_SafeInit;
# define TclNewWideIntObjFromSize(value) \
Tcl_NewWideIntObj(TclWideIntFromSize(value))
#else
-# define TclWideIntFromSize(value) (value)
+# define TclWideIntFromSize(value) ((Tcl_WideInt)(value))
# define TclNewWideIntObjFromSize Tcl_NewWideIntObj
#endif
diff --git a/generic/tclIntDecls.h b/generic/tclIntDecls.h
index 61249c0..bea4381 100644
--- a/generic/tclIntDecls.h
+++ b/generic/tclIntDecls.h
@@ -337,7 +337,7 @@ EXTERN void TclHandleRelease(TclHandle handle);
EXTERN int TclRegAbout(Tcl_Interp *interp, Tcl_RegExp re);
/* 151 */
EXTERN void TclRegExpRangeUniChar(Tcl_RegExp re, size_t index,
- int *startPtr, int *endPtr);
+ size_t *startPtr, size_t *endPtr);
/* 152 */
EXTERN void TclSetLibraryPath(Tcl_Obj *pathPtr);
/* 153 */
@@ -739,7 +739,7 @@ typedef struct TclIntStubs {
TclHandle (*tclHandlePreserve) (TclHandle handle); /* 148 */
void (*tclHandleRelease) (TclHandle handle); /* 149 */
int (*tclRegAbout) (Tcl_Interp *interp, Tcl_RegExp re); /* 150 */
- void (*tclRegExpRangeUniChar) (Tcl_RegExp re, size_t index, int *startPtr, int *endPtr); /* 151 */
+ void (*tclRegExpRangeUniChar) (Tcl_RegExp re, size_t index, size_t *startPtr, size_t *endPtr); /* 151 */
void (*tclSetLibraryPath) (Tcl_Obj *pathPtr); /* 152 */
Tcl_Obj * (*tclGetLibraryPath) (void); /* 153 */
void (*reserved154)(void);
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 67195bb..f793cb8 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -264,7 +264,7 @@ Tcl_RegExpRange(
if (index > regexpPtr->re.re_nsub) {
*startPtr = *endPtr = NULL;
- } else if (regexpPtr->matches[index].rm_so == -1) {
+ } else if (regexpPtr->matches[index].rm_so == (size_t)-1) {
*startPtr = *endPtr = NULL;
} else {
if (regexpPtr->objPtr) {
@@ -365,17 +365,17 @@ TclRegExpRangeUniChar(
* > 0 means give the range of a matching
* subrange, -1 means the range of the
* rm_extend field. */
- int *startPtr, /* Store address of first character in
+ size_t *startPtr, /* Store address of first character in
* (sub-)range here. */
- int *endPtr) /* Store address of character just after last
+ size_t *endPtr) /* Store address of character just after last
* in (sub-)range here. */
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- if ((regexpPtr->flags&REG_EXPECT) && index == TCL_AUTO_LENGTH) {
+ if ((regexpPtr->flags&REG_EXPECT) && (index == TCL_INDEX_NONE)) {
*startPtr = regexpPtr->details.rm_extend.rm_so;
*endPtr = regexpPtr->details.rm_extend.rm_eo;
- } else if (index > regexpPtr->re.re_nsub) {
+ } else if (index + 1 > regexpPtr->re.re_nsub + 1) {
*startPtr = -1;
*endPtr = -1;
} else {
@@ -677,7 +677,7 @@ TclRegAbout(
resultObj = Tcl_NewObj();
Tcl_ListObjAppendElement(NULL, resultObj,
- Tcl_NewWideIntObj((Tcl_WideInt) regexpPtr->re.re_nsub));
+ TclNewWideIntObjFromSize(regexpPtr->re.re_nsub));
/*
* Now append a list of all the bit-flags set for the RE.
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 4953133..ad66688 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -342,7 +342,7 @@ static int TestreturnObjCmd(void *dummy,
Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[]);
static void TestregexpXflags(const char *string,
- int length, int *cflagsPtr, int *eflagsPtr);
+ size_t length, int *cflagsPtr, int *eflagsPtr);
static int TestsaveresultCmd(void *dummy,
Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[]);
@@ -3878,12 +3878,12 @@ TestregexpObjCmd(
if (objc > 2 && (cflags&REG_EXPECT) && indices) {
const char *varName;
const char *value;
- int start, end;
+ size_t start, end;
char resinfo[TCL_INTEGER_SPACE * 2];
varName = Tcl_GetString(objv[2]);
TclRegExpRangeUniChar(regExpr, -1, &start, &end);
- sprintf(resinfo, "%d %d", start, end-1);
+ sprintf(resinfo, "%" TCL_LL_MODIFIER "d %" TCL_LL_MODIFIER "d", TclWideIntFromSize(start), TclWideIntFromSize(end-1));
value = Tcl_SetVar2(interp, varName, NULL, resinfo, 0);
if (value == NULL) {
Tcl_AppendResult(interp, "couldn't set variable \"",
@@ -3897,7 +3897,7 @@ TestregexpObjCmd(
Tcl_RegExpGetInfo(regExpr, &info);
varName = Tcl_GetString(objv[2]);
- sprintf(resinfo, "%ld", info.extendStart);
+ sprintf(resinfo, "%" TCL_LL_MODIFIER "d", TclWideIntFromSize(info.extendStart));
value = Tcl_SetVar2(interp, varName, NULL, resinfo, 0);
if (value == NULL) {
Tcl_AppendResult(interp, "couldn't set variable \"",
@@ -3918,7 +3918,7 @@ TestregexpObjCmd(
Tcl_RegExpGetInfo(regExpr, &info);
for (i = 0; i < objc; i++) {
- int start, end;
+ size_t start, end;
Tcl_Obj *newPtr, *varPtr, *valuePtr;
varPtr = objv[i];
@@ -3928,9 +3928,9 @@ TestregexpObjCmd(
if (ii == -1) {
TclRegExpRangeUniChar(regExpr, ii, &start, &end);
- } else if (ii > info.nsubs) {
- start = -1;
- end = -1;
+ } else if (ii > (int)info.nsubs) {
+ start = TCL_INDEX_NONE;
+ end = TCL_INDEX_NONE;
} else {
start = info.matches[ii].start;
end = info.matches[ii].end;
@@ -3941,19 +3941,19 @@ TestregexpObjCmd(
* instead of the first character after the match.
*/
- if (end >= 0) {
+ if (end != TCL_INDEX_NONE) {
end--;
}
- objs[0] = Tcl_NewWideIntObj(start);
- objs[1] = Tcl_NewWideIntObj(end);
+ objs[0] = TclNewWideIntObjFromSize(start);
+ objs[1] = TclNewWideIntObjFromSize(end);
newPtr = Tcl_NewListObj(2, objs);
} else {
if (ii == -1) {
TclRegExpRangeUniChar(regExpr, ii, &start, &end);
newPtr = Tcl_GetRange(objPtr, start, end);
- } else if (ii > info.nsubs) {
+ } else if (ii > (int)info.nsubs) {
newPtr = Tcl_NewObj();
} else {
newPtr = Tcl_GetRange(objPtr, info.matches[ii].start,
@@ -3994,11 +3994,12 @@ TestregexpObjCmd(
static void
TestregexpXflags(
const char *string, /* The string of flags. */
- int length, /* The length of the string in bytes. */
+ size_t length, /* The length of the string in bytes. */
int *cflagsPtr, /* compile flags word */
int *eflagsPtr) /* exec flags word */
{
- int i, cflags, eflags;
+ size_t i;
+ int cflags, eflags;
cflags = *cflagsPtr;
eflags = *eflagsPtr;