summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorstanton <stanton>1999-06-10 04:28:49 (GMT)
committerstanton <stanton>1999-06-10 04:28:49 (GMT)
commit81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc (patch)
treed6b1bb3e195e2ca1b7f57a6a2987c276d5013ffa /generic
parentd94b3c0f6b26564b83b3767980271dd332314d06 (diff)
downloadtcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.zip
tcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.tar.gz
tcl-81e8aa10e3e8c90c32e241356ce9bc68b5e4b3dc.tar.bz2
* generic/tclUnicodeObj.c: Lots of cleanup and simplification.
Fixed several memory bugs. Added TclAppendUnicodeToObj. * generic/tclInt.h: Added declarations for various Unicode string functions. * generic/tclRegexp.c: * generic/tclCmdMZ.c: Changed to use new Unicode string interfaces for better performance. * generic/tclRegexp.h: * generic/tclRegexp.c: * generic/tcl.h: * generic/tcl.decls: Added Tcl_RegExpMatchObj and Tcl_RegExpGetInfo calls to access lower level regexp API. These features are needed by Expect. This is a preliminary implementation pending final review and cleanup. * generic/tclCmdMZ.c: * tests/string.test: Fixed bug where string map failed on null strings.
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls10
-rw-r--r--generic/tcl.h42
-rw-r--r--generic/tclCmdMZ.c178
-rw-r--r--generic/tclDecls.h19
-rw-r--r--generic/tclInt.h7
-rw-r--r--generic/tclRegexp.c142
-rw-r--r--generic/tclRegexp.h12
-rw-r--r--generic/tclStubInit.c4
-rw-r--r--generic/tclUnicodeObj.c214
9 files changed, 435 insertions, 193 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 68d50db..8447520 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.14 1999/05/25 01:00:24 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.15 1999/06/10 04:28:49 stanton Exp $
library tcl
@@ -1277,6 +1277,14 @@ declare 374 generic {
declare 375 generic {
int Tcl_UniCharIsPunct(int ch)
}
+declare 376 generic {
+ int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_RegExp regexp, \
+ Tcl_Obj *objPtr, int offset, int nmatches, int flags)
+}
+declare 377 generic {
+ void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
+}
+
##############################################################################
diff --git a/generic/tcl.h b/generic/tcl.h
index 829f0b7..2c7ea05 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tcl.h,v 1.44 1999/05/22 01:20:11 stanton Exp $
+ * RCS: @(#) $Id: tcl.h,v 1.45 1999/06/10 04:28:49 stanton Exp $
*/
#ifndef _TCL
@@ -373,6 +373,46 @@ typedef struct Tcl_Trace_ *Tcl_Trace;
typedef struct Tcl_Var_ *Tcl_Var;
/*
+ * Flag values passed to Tcl_GetRegExpFromObj.
+ */
+
+#define TCL_REG_BASIC 000000 /* BREs (convenience) */
+#define TCL_REG_EXTENDED 000001 /* EREs */
+#define TCL_REG_ADVF 000002 /* advanced features in EREs */
+#define TCL_REG_ADVANCED 000003 /* AREs (which are also EREs) */
+#define TCL_REG_QUOTE 000004 /* no special characters, none */
+#define TCL_REG_NOCASE 000010 /* ignore case */
+#define TCL_REG_NOSUB 000020 /* don't care about subexpressions */
+#define TCL_REG_EXPANDED 000040 /* expanded format, white space &
+ * comments */
+#define TCL_REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
+#define TCL_REG_NLANCH 000200 /* ^ matches after \n, $ before */
+#define TCL_REG_NEWLINE 000300 /* newlines are line terminators */
+#define TCL_REG_CANMATCH 001000 /* report details on partial/limited
+ * matches */
+
+/*
+ * Structures filled in by Tcl_RegExpInfo. Note that all offset values are
+ * relative to the start of the match string, not the beginning of the
+ * entire string.
+ */
+
+typedef struct Tcl_RegExpIndices {
+ long start; /* character offset of first character in match */
+ long end; /* character offset of first character after the
+ * match. */
+} Tcl_RegExpIndices;
+
+typedef struct Tcl_RegExpInfo {
+ int nsubs; /* number of subexpressions in the
+ * compiled expression*/
+ Tcl_RegExpIndices *matches; /* array of nsubs match offset
+ * pairs */
+ long extendStart; /* The offset at which a subsequent
+ * match might begin. */
+} Tcl_RegExpInfo;
+
+/*
* Picky compilers complain if this typdef doesn't appear before the
* struct's reference in tclDecls.h.
*/
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ebea22b..4f20815 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.14 1999/06/10 04:28:50 stanton Exp $
*/
#include "tclInt.h"
@@ -126,12 +126,11 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, result, indices, stringLength, wLen, match, about;
+ int i, indices, match, about;
int cflags, eflags;
Tcl_RegExp regExpr;
- char *string;
- Tcl_DString stringBuffer, valueBuffer;
- Tcl_UniChar *wStart;
+ Tcl_Obj *objPtr;
+ Tcl_RegExpInfo info;
static char *options[] = {
"-indices", "-nocase", "-about", "-expanded",
"-line", "-linestop", "-lineanchor",
@@ -209,6 +208,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
if (regExpr == NULL) {
return TCL_ERROR;
}
+ objPtr = objv[1];
if (about) {
if (TclRegAbout(interp, regExpr) < 0) {
@@ -217,27 +217,21 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
return TCL_OK;
}
- result = TCL_OK;
- string = Tcl_GetStringFromObj(objv[1], &stringLength);
+ match = Tcl_RegExpMatchObj(interp, regExpr, objPtr, 0 /* offset */,
+ objc-2 /* nmatches */, eflags);
- Tcl_DStringInit(&valueBuffer);
-
- Tcl_DStringInit(&stringBuffer);
- wStart = Tcl_UtfToUniCharDString(string, stringLength, &stringBuffer);
- wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
- match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, objc-2, eflags);
if (match < 0) {
- result = TCL_ERROR;
- goto done;
+ return TCL_ERROR;
}
+
if (match == 0) {
/*
- * Set the interpreter's object result to an integer object w/ value 0.
+ * Set the interpreter's object result to an integer object w/
+ * value 0.
*/
Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
- goto done;
+ return TCL_OK;
}
/*
@@ -248,37 +242,51 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
objc -= 2;
objv += 2;
+ Tcl_RegExpGetInfo(regExpr, &info);
for (i = 0; i < objc; i++) {
- char *varName, *value;
- int start, end;
+ Tcl_Obj *varPtr, *valuePtr, *newPtr;
- varName = Tcl_GetString(objv[i]);
+ varPtr = objv[i];
+ if (indices) {
+ int start, end;
+ Tcl_Obj *objs[2];
+
+ if (i <= info.nsubs) {
+ start = info.matches[i].start;
+ end = info.matches[i].end;
- TclRegExpRangeUniChar(regExpr, i, &start, &end);
- if (start < 0) {
- if (indices) {
- value = Tcl_SetVar(interp, varName, "-1 -1", 0);
+ /*
+ * Adjust index so it refers to the last character in the
+ * match instead of the first character after the match.
+ */
+
+ if (end >= 0) {
+ end--;
+ }
} else {
- value = Tcl_SetVar(interp, varName, "", 0);
+ start = -1;
+ end = -1;
}
+
+ objs[0] = Tcl_NewLongObj(start);
+ objs[1] = Tcl_NewLongObj(end);
+
+ newPtr = Tcl_NewListObj(2, objs);
} else {
- if (indices) {
- char info[TCL_INTEGER_SPACE * 2];
-
- sprintf(info, "%d %d", start, end - 1);
- value = Tcl_SetVar(interp, varName, info, 0);
+ if (i <= info.nsubs) {
+ newPtr = TclGetRangeFromObj(objPtr, info.matches[i].start,
+ info.matches[i].end - 1);
} else {
- value = Tcl_UniCharToUtfDString(wStart + start, end - start,
- &valueBuffer);
- value = Tcl_SetVar(interp, varName, value, 0);
- Tcl_DStringSetLength(&valueBuffer, 0);
+ newPtr = Tcl_NewObj();
+
}
}
- if (value == NULL) {
+ valuePtr = Tcl_ObjSetVar2(interp, varPtr, NULL, newPtr, 0);
+ if (valuePtr == NULL) {
+ Tcl_DecrRefCount(newPtr);
Tcl_AppendResult(interp, "couldn't set variable \"",
- varName, "\"", (char *) NULL);
- result = TCL_ERROR;
- goto done;
+ Tcl_GetString(varPtr), "\"", (char *) NULL);
+ return TCL_ERROR;
}
}
@@ -287,11 +295,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
*/
Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
-
- done:
- Tcl_DStringFree(&stringBuffer);
- Tcl_DStringFree(&valueBuffer);
- return result;
+ return TCL_OK;
}
/*
@@ -319,11 +323,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, result, flags, all, stringLength, numMatches;
+ int i, result, cflags, all, wlen, numMatches, offset;
Tcl_RegExp regExpr;
- Tcl_DString resultBuffer, stringBuffer;
- CONST Tcl_UniChar *w, *wStart, *wEnd;
- char *string, *subspec, *varname;
+ Tcl_Obj *resultPtr, *varPtr, *objPtr;
+ Tcl_UniChar *wstring;
+ char *subspec;
+
static char *options[] = {
"-all", "-nocase", "--", NULL
};
@@ -331,7 +336,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
REGSUB_ALL, REGSUB_NOCASE, REGSUB_LAST
};
- flags = 0;
+ cflags = REG_ADVANCED;
all = 0;
for (i = 1; i < objc; i++) {
@@ -352,7 +357,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
break;
}
case REGSUB_NOCASE: {
- flags |= REG_ICASE;
+ cflags |= REG_ICASE;
break;
}
case REGSUB_LAST: {
@@ -369,17 +374,21 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
}
objv += i;
- regExpr = Tcl_GetRegExpFromObj(interp, objv[0], flags | REG_ADVANCED);
+
+ regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
if (regExpr == NULL) {
return TCL_ERROR;
}
result = TCL_OK;
- string = Tcl_GetStringFromObj(objv[1], &stringLength);
- subspec = Tcl_GetString(objv[2]);
- varname = Tcl_GetString(objv[3]);
+ resultPtr = Tcl_NewObj();
+ Tcl_IncrRefCount(resultPtr);
- Tcl_DStringInit(&resultBuffer);
+ objPtr = objv[1];
+ wlen = TclGetUnicodeLengthFromObj(objPtr);
+ wstring = TclGetUnicodeFromObj(objPtr);
+ subspec = Tcl_GetString(objv[2]);
+ varPtr = objv[3];
/*
* The following loop is to handle multiple matches within the
@@ -388,23 +397,22 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* then the loop body only gets executed once.
*/
- Tcl_DStringInit(&stringBuffer);
- wStart = Tcl_UtfToUniCharDString(string, stringLength, &stringBuffer);
- wEnd = wStart + Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
numMatches = 0;
- for (w = wStart; w < wEnd; ) {
+ offset = 0;
+ for (offset = 0; offset < wlen; ) {
int start, end, subStart, subEnd, match;
char *src, *firstChar;
char c;
+ Tcl_RegExpInfo info;
/*
* The flags argument is set if string is part of a larger string,
* so that "^" won't match.
*/
- match = TclRegExpExecUniChar(interp, regExpr, w, wEnd - w, 10,
- ((w > wStart) ? REG_NOTBOL : 0));
+ match = Tcl_RegExpMatchObj(interp, regExpr, objPtr, offset,
+ 10 /* matches */, ((offset > 0) ? REG_NOTBOL : 0));
+
if (match < 0) {
result = TCL_ERROR;
goto done;
@@ -419,9 +427,11 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* result variable.
*/
- TclRegExpRangeUniChar(regExpr, 0, &start, &end);
- Tcl_UniCharToUtfDString(w, start, &resultBuffer);
-
+ Tcl_RegExpGetInfo(regExpr, &info);
+ start = info.matches[0].start;
+ end = info.matches[0].end;
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, start);
+
/*
* Append the subSpec argument to the variable, making appropriate
* substitutions. This code is a bit hairy because of the backslash
@@ -441,9 +451,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
if ((c >= '0') && (c <= '9')) {
index = c - '0';
} else if ((c == '\\') || (c == '&')) {
- Tcl_DStringAppend(&resultBuffer, firstChar,
- src - firstChar);
- Tcl_DStringAppend(&resultBuffer, &c, 1);
+ Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
+ Tcl_AppendToObj(resultPtr, &c, 1);
firstChar = src + 2;
src++;
continue;
@@ -454,12 +463,13 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
continue;
}
if (firstChar != src) {
- Tcl_DStringAppend(&resultBuffer, firstChar, src - firstChar);
+ Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
}
- TclRegExpRangeUniChar(regExpr, index, &subStart, &subEnd);
+ subStart = info.matches[index].start;
+ subEnd = info.matches[index].end;
if ((subStart >= 0) && (subEnd >= 0)) {
- Tcl_UniCharToUtfDString(w + subStart, subEnd - subStart,
- &resultBuffer);
+ TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart,
+ subEnd - subStart);
}
if (*src == '\\') {
src++;
@@ -467,7 +477,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
firstChar = src + 1;
}
if (firstChar != src) {
- Tcl_DStringAppend(&resultBuffer, firstChar, src - firstChar);
+ Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
}
if (end == 0) {
/*
@@ -475,10 +485,10 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* in order to prevent infinite loops.
*/
- Tcl_UniCharToUtfDString(w, 1, &resultBuffer);
- w++;
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ offset++;
}
- w += end;
+ offset += end;
if (!all) {
break;
}
@@ -489,13 +499,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* result variable.
*/
- if ((w < wEnd) || (numMatches == 0)) {
- Tcl_UniCharToUtfDString(w, wEnd - w, &resultBuffer);
+ if ((offset < wlen) || (numMatches == 0)) {
+ TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
}
- if (Tcl_SetVar(interp, varname, Tcl_DStringValue(&resultBuffer),
- 0) == NULL) {
- Tcl_AppendResult(interp, "couldn't set variable \"", varname, "\"",
- (char *) NULL);
+ if (Tcl_ObjSetVar2(interp, varPtr, NULL, resultPtr, 0) == NULL) {
+ Tcl_AppendResult(interp, "couldn't set variable \"",
+ Tcl_GetString(varPtr), "\"", (char *) NULL);
result = TCL_ERROR;
} else {
/*
@@ -507,8 +516,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
}
done:
- Tcl_DStringFree(&stringBuffer);
- Tcl_DStringFree(&resultBuffer);
+ Tcl_DecrRefCount(resultPtr);
return result;
}
@@ -1496,7 +1504,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
} else {
uselen = length2;
}
- if ((uselen <= length1) &&
+ if ((uselen > 0) && (uselen <= length1) &&
(str_comp_fn(string2, string1, uselen) == 0)) {
/*
* Adjust len to be full length of matched string
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 9eaf421..2a7ac93 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.14 1999/05/25 01:00:25 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.15 1999/06/10 04:28:50 stanton Exp $
*/
#ifndef _TCLDECLS
@@ -1168,6 +1168,13 @@ EXTERN int Tcl_UniCharIsGraph _ANSI_ARGS_((int ch));
EXTERN int Tcl_UniCharIsPrint _ANSI_ARGS_((int ch));
/* 375 */
EXTERN int Tcl_UniCharIsPunct _ANSI_ARGS_((int ch));
+/* 376 */
+EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp,
+ Tcl_RegExp regexp, Tcl_Obj * objPtr,
+ int offset, int nmatches, int flags));
+/* 377 */
+EXTERN void Tcl_RegExpGetInfo _ANSI_ARGS_((Tcl_RegExp regexp,
+ Tcl_RegExpInfo * infoPtr));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1611,6 +1618,8 @@ typedef struct TclStubs {
int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */
int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */
int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
+ int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */
+ void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */
} TclStubs;
#ifdef __cplusplus
@@ -3151,6 +3160,14 @@ extern TclStubs *tclStubsPtr;
#define Tcl_UniCharIsPunct \
(tclStubsPtr->tcl_UniCharIsPunct) /* 375 */
#endif
+#ifndef Tcl_RegExpMatchObj
+#define Tcl_RegExpMatchObj \
+ (tclStubsPtr->tcl_RegExpMatchObj) /* 376 */
+#endif
+#ifndef Tcl_RegExpGetInfo
+#define Tcl_RegExpGetInfo \
+ (tclStubsPtr->tcl_RegExpGetInfo) /* 377 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 506c953..d30d439 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclInt.h,v 1.31 1999/06/08 23:30:24 hershey Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.32 1999/06/10 04:28:51 stanton Exp $
*/
#ifndef _TCLINT
@@ -1546,6 +1546,9 @@ EXTERN void TclAllocateFreeObjects _ANSI_ARGS_((void));
EXTERN Tcl_Obj * TclAppendObjToUnicodeObj _ANSI_ARGS_((
register Tcl_Obj *targetObjPtr,
register Tcl_Obj *srcObjPtr));
+EXTERN void TclAppendUnicodeToObj _ANSI_ARGS_((
+ register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
+ int length));
EXTERN int TclArraySet _ANSI_ARGS_((Tcl_Interp *interp,
Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj));
EXTERN int TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp,
@@ -1692,6 +1695,8 @@ EXTERN Var * TclLookupVar _ANSI_ARGS_((Tcl_Interp *interp,
EXTERN int TclMathInProgress _ANSI_ARGS_((void));
EXTERN int TclNeedSpace _ANSI_ARGS_((char *start, char *end));
EXTERN Tcl_Obj * TclNewProcBodyObj _ANSI_ARGS_((Proc *procPtr));
+EXTERN Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
+ int numChars));
EXTERN int TclObjCommandComplete _ANSI_ARGS_((Tcl_Obj *cmdPtr));
EXTERN int TclObjInterpProc _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, int objc,
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 2318780..7590c8d 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.6 1999/06/02 01:53:31 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $
*/
#include "tclInt.h"
@@ -177,33 +177,21 @@ Tcl_RegExpExec(interp, re, string, start)
* this identifies beginning of larger
* string, so that "^" won't match. */
{
- int result, numChars;
- Tcl_DString stringBuffer;
- Tcl_UniChar *uniString;
-
- TclRegexp *regexpPtr = (TclRegexp *) re;
+ int flags;
/*
- * Remember the UTF-8 string so Tcl_RegExpRange() can convert the
- * matches from character to byte offsets.
+ * If the starting point is offset from the beginning of the buffer,
+ * then we need to tell the regexp engine not to match "^".
*/
- regexpPtr->string = string;
-
- Tcl_DStringInit(&stringBuffer);
- uniString = Tcl_UtfToUniCharDString(string, -1, &stringBuffer);
- numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
- /*
- * Perform the regexp match.
- */
-
- result = TclRegExpExecUniChar(interp, re, uniString, numChars, -1,
- ((string > start) ? REG_NOTBOL : 0));
-
- Tcl_DStringFree(&stringBuffer);
+ if (string > start) {
+ flags = REG_NOTBOL;
+ } else {
+ flags = 0;
+ }
- return result;
+ return Tcl_RegExpMatchObj(interp, re, Tcl_NewStringObj(string, -1),
+ 0 /* offset */, -1 /* nmatches */, flags);
}
/*
@@ -238,16 +226,16 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
* in (sub-) range here. */
{
TclRegexp *regexpPtr = (TclRegexp *) re;
+ char *string;
if ((size_t) index > regexpPtr->re.re_nsub) {
*startPtr = *endPtr = NULL;
} else if (regexpPtr->matches[index].rm_so < 0) {
*startPtr = *endPtr = NULL;
} else {
- *startPtr = Tcl_UtfAtIndex(regexpPtr->string,
- regexpPtr->matches[index].rm_so);
- *endPtr = Tcl_UtfAtIndex(regexpPtr->string,
- regexpPtr->matches[index].rm_eo);
+ string = Tcl_GetString(regexpPtr->objPtr);
+ *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
+ *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
}
}
@@ -290,8 +278,9 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
size_t last = regexpPtr->re.re_nsub + 1;
size_t nm = last;
- if (nmatches >= 0 && (size_t) nmatches < nm)
+ if (nmatches >= 0 && (size_t) nmatches < nm) {
nm = (size_t) nmatches;
+ }
status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
&regexpPtr->details, nm, regexpPtr->matches, flags);
@@ -398,6 +387,100 @@ Tcl_RegExpMatch(interp, string, pattern)
/*
*----------------------------------------------------------------------
*
+ * Tcl_RegExpMatchObj --
+ *
+ * Match a precompiled regexp against the given object.
+ *
+ * Results:
+ * If an error occurs during the matching operation then -1
+ * is returned and the interp's result contains an error message.
+ * Otherwise the return value is 1 if "string" matches "pattern"
+ * and 0 otherwise.
+ *
+ * Side effects:
+ * Converts the object to a Unicode object.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags)
+ Tcl_Interp *interp; /* Interpreter to use for error reporting. */
+ Tcl_RegExp re; /* Compiled regular expression; must have
+ * been returned by previous call to
+ * Tcl_GetRegExpFromObj. */
+ Tcl_Obj *objPtr; /* String against which to match re. */
+ int offset; /* Character index that marks where matching
+ * should begin. */
+ int nmatches; /* How many subexpression matches (counting
+ * the whole match as subexpression 0) are
+ * of interest. -1 means all of them. */
+ int flags; /* Regular expression execution flags. */
+{
+ TclRegexp *regexpPtr = (TclRegexp *) re;
+ Tcl_Obj *oldPtr = regexpPtr->objPtr;
+ Tcl_UniChar *udata;
+ int length;
+
+ /*
+ * Bump the refcount before we do anything in case the object
+ * was newly created.
+ */
+
+ Tcl_IncrRefCount(objPtr);
+
+ udata = TclGetUnicodeFromObj(objPtr);
+ length = TclGetUnicodeLengthFromObj(objPtr);
+
+ /*
+ * Save the target object so we can extract strings from it later.
+ */
+
+ regexpPtr->objPtr = objPtr;
+ if (oldPtr) {
+ Tcl_DecrRefCount(oldPtr);
+ }
+
+ if (offset > length) {
+ offset = length;
+ }
+ udata += offset;
+ length -= offset;
+
+ return TclRegExpExecUniChar(interp, re, udata, length, nmatches, flags);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_RegExpGetInfo --
+ *
+ * Retrieve information about the current match.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_RegExpGetInfo(regexp, infoPtr)
+ Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
+ Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
+{
+ TclRegexp *regexpPtr = (TclRegexp *) regexp;
+
+ infoPtr->nsubs = regexpPtr->re.re_nsub;
+ infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
+ infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TclRegExpMatchObj --
*
* See if a string matches a regular expression pattern object.
@@ -790,6 +873,9 @@ CompileRegexp(interp, string, length, flags)
*/
regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
+ regexpPtr->objPtr = NULL;
+ regexpPtr->details.rm_extend.rm_so = -1;
+ regexpPtr->details.rm_extend.rm_eo = -1;
/*
* Get the up-to-date string representation and map to unicode.
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 7a6fb2a..5cee78e 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.h,v 1.8 1999/06/02 01:53:31 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.9 1999/06/10 04:28:51 stanton Exp $
*/
#ifndef _TCLREGEXP
@@ -33,10 +33,10 @@ typedef struct TclRegexp {
int flags; /* Regexp compile flags. */
regex_t re; /* Compiled re, includes number of
* subexpressions. */
- CONST char *string; /* Last string matched with this regexp
- * (UTF-8), so Tcl_RegExpRange() can convert
- * the matches from character indices to UTF-8
- * byte offsets. */
+ Tcl_Obj *objPtr; /* Last object match with this regexp, so
+ * Tcl_RegExpRange() can convert the matches
+ * from character indices to UTF-8 byte
+ * offsets. */
regmatch_t *matches; /* Array of indices into the Tcl_UniChar
* representation of the last string matched
* with this regexp to indicate the location
@@ -53,8 +53,6 @@ typedef struct TclRegexp {
EXTERN int TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp,
Tcl_RegExp re));
-EXTERN VOID TclRegXflags _ANSI_ARGS_((char *string, int length,
- int *cflagsPtr, int *eflagsPtr));
EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
Tcl_RegExp re, CONST Tcl_UniChar *uniString,
int numChars, int nmatches, int flags));
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 1bcfb63..9a3b4f3 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.16 1999/05/25 01:00:27 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.17 1999/06/10 04:28:51 stanton Exp $
*/
#include "tclInt.h"
@@ -756,6 +756,8 @@ TclStubs tclStubs = {
Tcl_UniCharIsGraph, /* 373 */
Tcl_UniCharIsPrint, /* 374 */
Tcl_UniCharIsPunct, /* 375 */
+ Tcl_RegExpMatchObj, /* 376 */
+ Tcl_RegExpGetInfo, /* 377 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
index 3a4709b..315644d 100644
--- a/generic/tclUnicodeObj.c
+++ b/generic/tclUnicodeObj.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUnicodeObj.c,v 1.4 1999/06/09 17:06:57 hershey Exp $
+ * RCS: @(#) $Id: tclUnicodeObj.c,v 1.5 1999/06/10 04:28:51 stanton Exp $
*/
#include <math.h>
@@ -20,23 +20,19 @@
* Prototypes for local procedures defined in this file:
*/
+static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr,
+ Tcl_UniChar *unichars, int numNewChars));
static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
Tcl_Obj *copyPtr));
static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr));
-static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_Obj *objPtr));
-
-static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void TclAppendUniCharStrToObj _ANSI_ARGS_((
- register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
- int numChars));
-static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
- int numChars));
static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
int numChars));
static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
char *src, int numBytes, int numChars));
+static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_Obj *objPtr));
/*
* The following object type represents a Unicode string. A Unicode string
@@ -68,13 +64,13 @@ typedef struct Unicode {
int numChars; /* The number of chars in the unicode
* string. */
size_t allocated; /* The amount of space actually allocated. */
- unsigned char chars[4]; /* The array of chars. The actual size of
+ Tcl_UniChar chars[2]; /* The array of chars. The actual size of
* this field depends on the 'allocated' field
* above. */
} Unicode;
#define UNICODE_SIZE(len) \
- ((unsigned) (sizeof(Unicode) - 4 + (len)))
+ ((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len)))
#define GET_UNICODE(objPtr) \
((Unicode *) (objPtr)->internalRep.otherValuePtr)
#define SET_UNICODE(objPtr, unicodePtr) \
@@ -104,7 +100,6 @@ Tcl_UniChar *
TclGetUnicodeFromObj(objPtr)
Tcl_Obj *objPtr; /* The object to find the unicode string for. */
{
- Tcl_UniChar *unicharPtr;
Unicode *unicodePtr;
int numBytes;
char *src;
@@ -124,9 +119,15 @@ TclGetUnicodeFromObj(objPtr)
src = Tcl_GetStringFromObj(objPtr, &numBytes);
SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars);
+
+ /*
+ * We need to fetch the pointer again because we have just
+ * reallocated the structure to make room for the Unicode data.
+ */
+
+ unicodePtr = GET_UNICODE(objPtr);
}
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- return unicharPtr;
+ return unicodePtr->chars;
}
/*
@@ -185,7 +186,7 @@ TclGetUniCharFromObj(objPtr, index)
Tcl_Obj *objPtr; /* The Unicode object. */
int index; /* Get the index'th character. */
{
- Tcl_UniChar *unicharPtr, unichar;
+ Tcl_UniChar unichar;
Unicode *unicodePtr;
int length;
@@ -206,8 +207,7 @@ TclGetUniCharFromObj(objPtr, index)
str = Tcl_GetStringFromObj(objPtr, &length);
Tcl_UtfToUniChar(&str[index], &unichar);
} else {
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- unichar = unicharPtr[index];
+ unichar = unicodePtr->chars[index];
}
return unichar;
}
@@ -217,11 +217,11 @@ TclGetUniCharFromObj(objPtr, index)
*
* TclGetRangeFromObj --
*
- * Create a Tcl Object that contains the chars between first and
- * last of the object indicated by "objPtr". If the object is not
- * already a Unicode object, an attempt will be made to convert it
- * to one. The first and last indices are assumed to be in the
- * appropriate range.
+ * Create a Tcl Object that contains the chars between first and last
+ * of the object indicated by "objPtr". If the object is not already
+ * a Unicode object, an attempt will be made to convert it to one.
+ * The first and last indices are assumed to be in the appropriate
+ * range.
*
* Results:
* Returns a new Tcl Object of either "string" or "unicode" type,
@@ -241,7 +241,6 @@ TclGetRangeFromObj(objPtr, first, last)
int last; /* Last index of the range. */
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
- Tcl_UniChar *unicharPtr;
Unicode *unicodePtr;
int length;
@@ -250,8 +249,7 @@ TclGetRangeFromObj(objPtr, first, last)
length = objPtr->length;
if (unicodePtr->numChars != length) {
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1);
+ newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1);
} else {
int length;
char *str;
@@ -273,7 +271,7 @@ TclGetRangeFromObj(objPtr, first, last)
*
* TclAppendObjToUnicodeObj --
*
- * This procedure appends the contest of "srcObjPtr" to the Unicode
+ * This procedure appends the contents of "srcObjPtr" to the Unicode
* object "destPtr".
*
* Results:
@@ -367,7 +365,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
} else {
unicodePtr = GET_UNICODE(srcObjPtr);
numChars = unicodePtr->numChars;
- unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars;
+ unicharSrcStr = unicodePtr->chars;
}
} else {
utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
@@ -383,7 +381,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
* Append the unichar src string to the result object.
*/
- TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
+ AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
Tcl_DStringFree(&dsPtr);
return resultObjPtr;
}
@@ -391,7 +389,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
/*
*----------------------------------------------------------------------
*
- * TclAppendUniCharStrToObj --
+ * AppendUniCharStrToObj --
*
* This procedure appends the contents of "srcObjPtr" to the
* Unicode object "objPtr".
@@ -406,31 +404,25 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
*----------------------------------------------------------------------
*/
-void
-TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
+static void
+AppendUniCharStrToObj(objPtr, unichars, numNewChars)
register Tcl_Obj *objPtr; /* Points to the object to append to. */
Tcl_UniChar *unichars; /* The unicode string to append to the
* object. */
int numNewChars; /* Number of chars in "unichars". */
{
Unicode *unicodePtr;
- int usedBytes, numNewBytes, totalNumBytes, totalNumChars;
-
- /*
- * Invalidate the StringRep.
- */
-
- Tcl_InvalidateStringRep(objPtr);
+ int numChars;
+ size_t numBytes;
+ SetUnicodeFromAny(NULL, objPtr);
unicodePtr = GET_UNICODE(objPtr);
- usedBytes = unicodePtr->numChars * sizeof(Tcl_UniChar);
- totalNumChars = numNewChars + unicodePtr->numChars;
- totalNumBytes = totalNumChars * sizeof(Tcl_UniChar);
- numNewBytes = numNewChars * sizeof(Tcl_UniChar);
+ numChars = numNewChars + unicodePtr->numChars;
+ numBytes = (numChars + 1) * sizeof(Tcl_UniChar);
- if (unicodePtr->allocated <= totalNumBytes) {
- int allocatedBytes = totalNumBytes * 2;
+ if (unicodePtr->allocated < numBytes) {
+ int allocatedBytes = numBytes * 2;
/*
* There isn't currently enough space in the Unicode
@@ -439,15 +431,101 @@ TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
* having to reallocate again.
*/
- unicodePtr = (Unicode *) ckrealloc(unicodePtr,
+ unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr,
UNICODE_SIZE(allocatedBytes));
unicodePtr->allocated = allocatedBytes;
unicodePtr = SET_UNICODE(objPtr, unicodePtr);
}
- memcpy((VOID *) (unicodePtr->chars + usedBytes),
- (VOID *) unichars, (size_t) numNewBytes);
- *((Tcl_UniChar *)unicodePtr->chars + totalNumChars) = 0;
- unicodePtr->numChars = totalNumChars;
+ memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars),
+ (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar));
+ unicodePtr->chars[numChars] = 0;
+ unicodePtr->numChars = numChars;
+
+ /*
+ * Invalidate the StringRep.
+ */
+
+ Tcl_InvalidateStringRep(objPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclAppendUnicodeToObj --
+ *
+ * This procedure appends a Unicode string to an object in the
+ * most efficient manner possible.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Invalidates the string rep and creates a new Unicode string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclAppendUnicodeToObj(objPtr, unichars, length)
+ register Tcl_Obj *objPtr; /* Points to the object to append to. */
+ Tcl_UniChar *unichars; /* The unicode string to append to the
+ * object. */
+ int length; /* Number of chars in "unichars". */
+{
+ Unicode *unicodePtr;
+ int numChars, i;
+ size_t newSize;
+ char *src;
+ Tcl_UniChar *dst;
+
+ if (Tcl_IsShared(objPtr)) {
+ panic("TclAppendUnicodeToObj called with shared object");
+ }
+
+ SetUnicodeFromAny(NULL, objPtr);
+ unicodePtr = GET_UNICODE(objPtr);
+
+ /*
+ * Make the buffer big enough for the result.
+ */
+
+ numChars = unicodePtr->numChars + length;
+ newSize = (numChars + 1) * sizeof(Tcl_UniChar);
+
+ if (newSize > unicodePtr->allocated) {
+ int allocated = newSize * 2;
+
+ unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr,
+ UNICODE_SIZE(allocated));
+
+ if (unicodePtr->allocated == 0) {
+ /*
+ * If the original string was not in Unicode form, add it to the
+ * beginning of the buffer.
+ */
+
+ src = objPtr->bytes;
+ dst = unicodePtr->chars;
+ for (i = 0; i < unicodePtr->numChars; i++) {
+ src += Tcl_UtfToUniChar(src, dst++);
+ }
+ }
+ unicodePtr->allocated = allocated;
+ }
+
+ /*
+ * Copy the new string onto the end of the old string, then add the
+ * trailing null.
+ */
+
+ memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars,
+ length * sizeof(Tcl_UniChar));
+ unicodePtr->numChars = numChars;
+ unicodePtr->chars[numChars] = 0;
+
+ SET_UNICODE(objPtr, unicodePtr);
+
+ Tcl_InvalidateStringRep(objPtr);
}
/*
@@ -497,7 +575,7 @@ TclNewUnicodeObj(unichars, numChars)
unicodePtr->numChars = numChars;
unicodePtr->allocated = allocated;
memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes);
- *((Tcl_UniChar *)unicodePtr->chars + numChars) = 0;
+ unicodePtr->chars[numChars] = 0;
SET_UNICODE(objPtr, unicodePtr);
return objPtr;
}
@@ -572,12 +650,10 @@ DupUnicodeInternalRep(srcPtr, copyPtr)
*/
if (AllSingleByteChars(srcPtr)) {
- copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
+ copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+ copyUnicodePtr->allocated = 0;
} else {
int allocated = srcUnicodePtr->allocated;
- Tcl_UniChar *unichars;
-
- unichars = (Tcl_UniChar *)srcUnicodePtr->chars;
copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
@@ -624,7 +700,7 @@ UpdateStringOfUnicode(objPtr)
Unicode *unicodePtr;
unicodePtr = GET_UNICODE(objPtr);
- src = (Tcl_UniChar *) unicodePtr->chars;
+ src = unicodePtr->chars;
length = unicodePtr->numChars * sizeof(Tcl_UniChar);
/*
@@ -672,16 +748,20 @@ SetOptUnicodeFromAny(objPtr, numChars)
{
Tcl_ObjType *typePtr;
Unicode *unicodePtr;
-
- unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = 0;
typePtr = objPtr->typePtr;
if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
(*typePtr->freeIntRepProc)(objPtr);
}
objPtr->typePtr = &tclUnicodeType;
+
+ /*
+ * Allocate enough space for the basic Unicode structure.
+ */
+
+ unicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+ unicodePtr->numChars = numChars;
+ unicodePtr->allocated = 0;
SET_UNICODE(objPtr, unicodePtr);
}
@@ -719,7 +799,7 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length));
srcEnd = src + numBytes;
- for (dst = (Tcl_UniChar *) unicodePtr->chars; src < srcEnd; dst++) {
+ for (dst = unicodePtr->chars; src < srcEnd; dst++) {
src += Tcl_UtfToUniChar(src, dst);
}
*dst = 0;
@@ -747,10 +827,10 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
*
* Side effects:
* A Unicode object is stored as the internal rep of objPtr. The Unicode
- * ojbect is opitmized for the case where each UTF char in a string is only
- * one byte. In this case, we store the value of numChars, but we don't copy
- * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if
- * all chars are 1 byte long.
+ * object is opitmized for the case where each UTF char in a string is
+ * only one byte. In this case, we store the value of numChars, but we
+ * don't copy the bytes to the unicodeObj->chars. Before accessing
+ * obj->chars, check if all chars are 1 byte long.
*
*---------------------------------------------------------------------------
*/
@@ -760,12 +840,10 @@ SetUnicodeFromAny(interp, objPtr)
Tcl_Interp *interp; /* Not used. */
Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
{
- Tcl_ObjType *typePtr;
int numBytes, numChars;
char *src;
- typePtr = objPtr->typePtr;
- if (typePtr != &tclUnicodeType) {
+ if (objPtr->typePtr != &tclUnicodeType) {
src = Tcl_GetStringFromObj(objPtr, &numBytes);
numChars = Tcl_NumUtfChars(src, numBytes);