10 files changed, 439 insertions, 194 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 68d50db..8447520 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 # 
-# RCS: @(#) $Id: tcl.decls,v 1.14 1999/05/25 01:00:24 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.15 1999/06/10 04:28:49 stanton Exp $
 
 library tcl
 
@@ -1277,6 +1277,14 @@ declare 374 generic {
 declare 375 generic {
     int Tcl_UniCharIsPunct(int ch)
 }
+declare 376 generic {
+    int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_RegExp regexp, \
+	    Tcl_Obj *objPtr, int offset, int nmatches, int flags)
+}
+declare 377 generic {
+    void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
+}
+
 
 ##############################################################################
 
diff --git a/generic/tcl.h b/generic/tcl.h
index 829f0b7..2c7ea05 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -12,7 +12,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tcl.h,v 1.44 1999/05/22 01:20:11 stanton Exp $
+ * RCS: @(#) $Id: tcl.h,v 1.45 1999/06/10 04:28:49 stanton Exp $
  */
 
 #ifndef _TCL
@@ -373,6 +373,46 @@ typedef struct Tcl_Trace_ *Tcl_Trace;
 typedef struct Tcl_Var_ *Tcl_Var;
 
 /*
+ * Flag values passed to Tcl_GetRegExpFromObj.
+ */
+
+#define	TCL_REG_BASIC		000000	/* BREs (convenience) */
+#define	TCL_REG_EXTENDED	000001	/* EREs */
+#define	TCL_REG_ADVF		000002	/* advanced features in EREs */
+#define	TCL_REG_ADVANCED	000003	/* AREs (which are also EREs) */
+#define	TCL_REG_QUOTE		000004	/* no special characters, none */
+#define	TCL_REG_NOCASE		000010	/* ignore case */
+#define	TCL_REG_NOSUB		000020	/* don't care about subexpressions */
+#define	TCL_REG_EXPANDED	000040	/* expanded format, white space &
+					 * comments */
+#define	TCL_REG_NLSTOP		000100  /* \n doesn't match . or [^ ] */
+#define	TCL_REG_NLANCH		000200  /* ^ matches after \n, $ before */
+#define	TCL_REG_NEWLINE		000300  /* newlines are line terminators */
+#define	TCL_REG_CANMATCH	001000  /* report details on partial/limited
+					 * matches */
+
+/*
+ * Structures filled in by Tcl_RegExpInfo.  Note that all offset values are
+ * relative to the start of the match string, not the beginning of the
+ * entire string.
+ */
+
+typedef struct Tcl_RegExpIndices {
+	long start;	/* character offset of first character in match */
+	long end;	/* character offset of first character after the
+			 * match. */
+} Tcl_RegExpIndices;
+
+typedef struct Tcl_RegExpInfo {
+	int nsubs;			/* number of subexpressions in the
+					 * compiled expression*/
+	Tcl_RegExpIndices *matches;	/* array of nsubs match offset
+					 * pairs */
+	long extendStart;		/* The offset at which a subsequent
+					 * match might begin. */
+} Tcl_RegExpInfo;
+
+/*
  * Picky compilers complain if this typdef doesn't appear before the
  * struct's reference in tclDecls.h.
  */
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ebea22b..4f20815 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.14 1999/06/10 04:28:50 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -126,12 +126,11 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     int objc;				/* Number of arguments. */
     Tcl_Obj *CONST objv[];		/* Argument objects. */
 {
-    int i, result, indices, stringLength, wLen, match, about;
+    int i, indices, match, about;
     int cflags, eflags;
     Tcl_RegExp regExpr;
-    char *string;
-    Tcl_DString stringBuffer, valueBuffer;
-    Tcl_UniChar *wStart;
+    Tcl_Obj *objPtr;
+    Tcl_RegExpInfo info;
     static char *options[] = {
 	"-indices",	"-nocase",	"-about",	"-expanded",
 	"-line",	"-linestop",	"-lineanchor",
@@ -209,6 +208,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     if (regExpr == NULL) {
 	return TCL_ERROR;
     }
+    objPtr = objv[1];
 
     if (about) {
 	if (TclRegAbout(interp, regExpr) < 0) {
@@ -217,27 +217,21 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
 	return TCL_OK;
     }
 
-    result = TCL_OK;
-    string = Tcl_GetStringFromObj(objv[1], &stringLength);
+    match = Tcl_RegExpMatchObj(interp, regExpr, objPtr, 0 /* offset */,
+	    objc-2 /* nmatches */, eflags);
 
-    Tcl_DStringInit(&valueBuffer);
-    
-    Tcl_DStringInit(&stringBuffer);
-    wStart = Tcl_UtfToUniCharDString(string, stringLength, &stringBuffer);
-    wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
-    match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, objc-2, eflags);
     if (match < 0) {
-	result = TCL_ERROR;
-	goto done;
+	return TCL_ERROR;
     }
+
     if (match == 0) {
 	/*
-	 * Set the interpreter's object result to an integer object w/ value 0. 
+	 * Set the interpreter's object result to an integer object w/
+	 * value 0.
 	 */
 	
 	Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
-	goto done;
+	return TCL_OK;
     }
 
     /*
@@ -248,37 +242,51 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
     objc -= 2;
     objv += 2;
 
+    Tcl_RegExpGetInfo(regExpr, &info);
     for (i = 0; i < objc; i++) {
-	char *varName, *value;
-	int start, end;
+	Tcl_Obj *varPtr, *valuePtr, *newPtr;
 	
-	varName = Tcl_GetString(objv[i]);
+	varPtr = objv[i];
+	if (indices) {
+	    int start, end;
+	    Tcl_Obj *objs[2];
+
+	    if (i <= info.nsubs) {
+		start = info.matches[i].start;
+		end = info.matches[i].end;
 
-	TclRegExpRangeUniChar(regExpr, i, &start, &end);
-	if (start < 0) {
-	    if (indices) {
-		value = Tcl_SetVar(interp, varName, "-1 -1", 0);
+		/*
+		 * Adjust index so it refers to the last character in the
+		 * match instead of the first character after the match.
+		 */
+
+		if (end >= 0) {
+		    end--;
+		}
 	    } else {
-		value = Tcl_SetVar(interp, varName, "", 0);
+		start = -1;
+		end = -1;
 	    }
+
+	    objs[0] = Tcl_NewLongObj(start);
+	    objs[1] = Tcl_NewLongObj(end);
+
+	    newPtr = Tcl_NewListObj(2, objs);
 	} else {
-	    if (indices) {
-		char info[TCL_INTEGER_SPACE * 2];
-		
-		sprintf(info, "%d %d", start, end - 1);
-		value = Tcl_SetVar(interp, varName, info, 0);
+	    if (i <= info.nsubs) {
+		newPtr = TclGetRangeFromObj(objPtr, info.matches[i].start,
+			info.matches[i].end - 1);
 	    } else {
-		value = Tcl_UniCharToUtfDString(wStart + start, end - start,
-			&valueBuffer);
-		value = Tcl_SetVar(interp, varName, value, 0);
-		Tcl_DStringSetLength(&valueBuffer, 0);
+		newPtr = Tcl_NewObj();
+		
 	    }
 	}
-	if (value == NULL) {
+	valuePtr = Tcl_ObjSetVar2(interp, varPtr, NULL, newPtr, 0);
+	if (valuePtr == NULL) {
+	    Tcl_DecrRefCount(newPtr);
 	    Tcl_AppendResult(interp, "couldn't set variable \"",
-		    varName, "\"", (char *) NULL);
-	    result = TCL_ERROR;
-	    goto done;
+		    Tcl_GetString(varPtr), "\"", (char *) NULL);
+	    return TCL_ERROR;
 	}
     }
 
@@ -287,11 +295,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
      */
 	
     Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
-
-    done:
-    Tcl_DStringFree(&stringBuffer);
-    Tcl_DStringFree(&valueBuffer);
-    return result;
+    return TCL_OK;
 }
 
 /*
@@ -319,11 +323,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
     int objc;				/* Number of arguments. */
     Tcl_Obj *CONST objv[];		/* Argument objects. */
 {
-    int i, result, flags, all, stringLength, numMatches;
+    int i, result, cflags, all, wlen, numMatches, offset;
     Tcl_RegExp regExpr;
-    Tcl_DString resultBuffer, stringBuffer;
-    CONST Tcl_UniChar *w, *wStart, *wEnd;
-    char *string, *subspec, *varname;
+    Tcl_Obj *resultPtr, *varPtr, *objPtr;
+    Tcl_UniChar *wstring;
+    char *subspec;
+
     static char *options[] = {
 	"-all",		"-nocase",	"--",		NULL
     };
@@ -331,7 +336,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	REGSUB_ALL,	REGSUB_NOCASE,	REGSUB_LAST
     };
 
-    flags = 0;
+    cflags = REG_ADVANCED;
     all = 0;
 
     for (i = 1; i < objc; i++) {
@@ -352,7 +357,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 		break;
 	    }
 	    case REGSUB_NOCASE: {
-		flags |= REG_ICASE;
+		cflags |= REG_ICASE;
 		break;
 	    }
 	    case REGSUB_LAST: {
@@ -369,17 +374,21 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
     }
 
     objv += i;
-    regExpr = Tcl_GetRegExpFromObj(interp, objv[0], flags | REG_ADVANCED);
+
+    regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
     if (regExpr == NULL) {
 	return TCL_ERROR;
     }
 
     result = TCL_OK;
-    string = Tcl_GetStringFromObj(objv[1], &stringLength);
-    subspec = Tcl_GetString(objv[2]);
-    varname = Tcl_GetString(objv[3]);
+    resultPtr = Tcl_NewObj();
+    Tcl_IncrRefCount(resultPtr);
 
-    Tcl_DStringInit(&resultBuffer);
+    objPtr = objv[1];
+    wlen = TclGetUnicodeLengthFromObj(objPtr);
+    wstring = TclGetUnicodeFromObj(objPtr);
+    subspec = Tcl_GetString(objv[2]);
+    varPtr = objv[3];
 
     /*
      * The following loop is to handle multiple matches within the
@@ -388,23 +397,22 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
      * then the loop body only gets executed once.
      */
 
-    Tcl_DStringInit(&stringBuffer);
-    wStart = Tcl_UtfToUniCharDString(string, stringLength, &stringBuffer);
-    wEnd = wStart + Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
     numMatches = 0;
-    for (w = wStart; w < wEnd; ) {
+    offset = 0;
+    for (offset = 0; offset < wlen; ) {
 	int start, end, subStart, subEnd, match;
 	char *src, *firstChar;
 	char c;
+	Tcl_RegExpInfo info;
 
 	/*
 	 * The flags argument is set if string is part of a larger string,
 	 * so that "^" won't match.
 	 */
 
-	match = TclRegExpExecUniChar(interp, regExpr, w, wEnd - w, 10,
-		((w > wStart) ? REG_NOTBOL : 0));
+	match = Tcl_RegExpMatchObj(interp, regExpr, objPtr, offset,
+		10 /* matches */, ((offset > 0) ? REG_NOTBOL : 0));
+
 	if (match < 0) {
 	    result = TCL_ERROR;
 	    goto done;
@@ -419,9 +427,11 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	 * result variable.
 	 */
 
-	TclRegExpRangeUniChar(regExpr, 0, &start, &end);
-	Tcl_UniCharToUtfDString(w, start, &resultBuffer);
-    
+	Tcl_RegExpGetInfo(regExpr, &info);
+	start = info.matches[0].start;
+	end = info.matches[0].end;
+	TclAppendUnicodeToObj(resultPtr, wstring + offset, start);
+
 	/*
 	 * Append the subSpec argument to the variable, making appropriate
 	 * substitutions.  This code is a bit hairy because of the backslash
@@ -441,9 +451,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 		if ((c >= '0') && (c <= '9')) {
 		    index = c - '0';
 		} else if ((c == '\\') || (c == '&')) {
-		    Tcl_DStringAppend(&resultBuffer, firstChar,
-			    src - firstChar);
-		    Tcl_DStringAppend(&resultBuffer, &c, 1);
+		    Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
+		    Tcl_AppendToObj(resultPtr, &c, 1);
 		    firstChar = src + 2;
 		    src++;
 		    continue;
@@ -454,12 +463,13 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 		continue;
 	    }
 	    if (firstChar != src) {
-		Tcl_DStringAppend(&resultBuffer, firstChar, src - firstChar);
+		Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
 	    }
-	    TclRegExpRangeUniChar(regExpr, index, &subStart, &subEnd);
+	    subStart = info.matches[index].start;
+	    subEnd = info.matches[index].end;
 	    if ((subStart >= 0) && (subEnd >= 0)) {
-		Tcl_UniCharToUtfDString(w + subStart, subEnd - subStart,
-			&resultBuffer);
+		TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart,
+			subEnd - subStart);
 	    }
 	    if (*src == '\\') {
 		src++;
@@ -467,7 +477,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	    firstChar = src + 1;
 	}
 	if (firstChar != src) {
-	    Tcl_DStringAppend(&resultBuffer, firstChar, src - firstChar);
+	    Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
 	}
 	if (end == 0) {
 	    /*
@@ -475,10 +485,10 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
 	     * in order to prevent infinite loops.
 	     */
 
-	    Tcl_UniCharToUtfDString(w, 1, &resultBuffer);
-	    w++;
+	    TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
+	    offset++;
 	}
-	w += end;
+	offset += end;
 	if (!all) {
 	    break;
 	}
@@ -489,13 +499,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
      * result variable.
      */
 
-    if ((w < wEnd) || (numMatches == 0)) {
-	Tcl_UniCharToUtfDString(w, wEnd - w, &resultBuffer);
+    if ((offset < wlen) || (numMatches == 0)) {
+	TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
     }
-    if (Tcl_SetVar(interp, varname, Tcl_DStringValue(&resultBuffer),
-	    0) == NULL) {
-	Tcl_AppendResult(interp, "couldn't set variable \"", varname, "\"",
-		(char *) NULL);
+    if (Tcl_ObjSetVar2(interp, varPtr, NULL, resultPtr, 0) == NULL) {
+	Tcl_AppendResult(interp, "couldn't set variable \"",
+		Tcl_GetString(varPtr), "\"", (char *) NULL);
 	result = TCL_ERROR;
     } else {
 	/*
@@ -507,8 +516,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
     }
 
     done:
-    Tcl_DStringFree(&stringBuffer);
-    Tcl_DStringFree(&resultBuffer);
+    Tcl_DecrRefCount(resultPtr);
     return result;
 }
 
@@ -1496,7 +1504,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		    } else {
 			uselen = length2;
 		    }
-		    if ((uselen <= length1) &&
+		    if ((uselen > 0) && (uselen <= length1) &&
 			(str_comp_fn(string2, string1, uselen) == 0)) {
 			/*
 			 * Adjust len to be full length of matched string
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 9eaf421..2a7ac93 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclDecls.h,v 1.14 1999/05/25 01:00:25 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.15 1999/06/10 04:28:50 stanton Exp $
  */
 
 #ifndef _TCLDECLS
@@ -1168,6 +1168,13 @@ EXTERN int		Tcl_UniCharIsGraph _ANSI_ARGS_((int ch));
 EXTERN int		Tcl_UniCharIsPrint _ANSI_ARGS_((int ch));
 /* 375 */
 EXTERN int		Tcl_UniCharIsPunct _ANSI_ARGS_((int ch));
+/* 376 */
+EXTERN int		Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp, 
+				Tcl_RegExp regexp, Tcl_Obj * objPtr, 
+				int offset, int nmatches, int flags));
+/* 377 */
+EXTERN void		Tcl_RegExpGetInfo _ANSI_ARGS_((Tcl_RegExp regexp, 
+				Tcl_RegExpInfo * infoPtr));
 
 typedef struct TclStubHooks {
     struct TclPlatStubs *tclPlatStubs;
@@ -1611,6 +1618,8 @@ typedef struct TclStubs {
     int (*tcl_UniCharIsGraph) _ANSI_ARGS_((int ch)); /* 373 */
     int (*tcl_UniCharIsPrint) _ANSI_ARGS_((int ch)); /* 374 */
     int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
+    int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */
+    void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */
 } TclStubs;
 
 #ifdef __cplusplus
@@ -3151,6 +3160,14 @@ extern TclStubs *tclStubsPtr;
 #define Tcl_UniCharIsPunct \
 	(tclStubsPtr->tcl_UniCharIsPunct) /* 375 */
 #endif
+#ifndef Tcl_RegExpMatchObj
+#define Tcl_RegExpMatchObj \
+	(tclStubsPtr->tcl_RegExpMatchObj) /* 376 */
+#endif
+#ifndef Tcl_RegExpGetInfo
+#define Tcl_RegExpGetInfo \
+	(tclStubsPtr->tcl_RegExpGetInfo) /* 377 */
+#endif
 
 #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
 
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 506c953..d30d439 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclInt.h,v 1.31 1999/06/08 23:30:24 hershey Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.32 1999/06/10 04:28:51 stanton Exp $
  */
 
 #ifndef _TCLINT
@@ -1546,6 +1546,9 @@ EXTERN void		TclAllocateFreeObjects _ANSI_ARGS_((void));
 EXTERN Tcl_Obj *	TclAppendObjToUnicodeObj _ANSI_ARGS_((
 	    		    register Tcl_Obj *targetObjPtr,
 	    		    register Tcl_Obj *srcObjPtr));
+EXTERN void		TclAppendUnicodeToObj _ANSI_ARGS_((
+	    		    register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
+			    int length));
 EXTERN int		TclArraySet _ANSI_ARGS_((Tcl_Interp *interp,
 			    Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj));
 EXTERN int		TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp,
@@ -1692,6 +1695,8 @@ EXTERN Var *		TclLookupVar _ANSI_ARGS_((Tcl_Interp *interp,
 EXTERN int		TclMathInProgress _ANSI_ARGS_((void));
 EXTERN int		TclNeedSpace _ANSI_ARGS_((char *start, char *end));
 EXTERN Tcl_Obj *	TclNewProcBodyObj _ANSI_ARGS_((Proc *procPtr));
+EXTERN Tcl_Obj *	TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
+			    int numChars));
 EXTERN int		TclObjCommandComplete _ANSI_ARGS_((Tcl_Obj *cmdPtr));
 EXTERN int		TclObjInterpProc _ANSI_ARGS_((ClientData clientData,
 			    Tcl_Interp *interp, int objc,
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 2318780..7590c8d 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.c,v 1.6 1999/06/02 01:53:31 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -177,33 +177,21 @@ Tcl_RegExpExec(interp, re, string, start)
 				 * this identifies beginning of larger
 				 * string, so that "^" won't match. */
 {
-    int result, numChars;
-    Tcl_DString stringBuffer;
-    Tcl_UniChar *uniString;
-
-    TclRegexp *regexpPtr = (TclRegexp *) re;
+    int flags;
 
     /*
-     * Remember the UTF-8 string so Tcl_RegExpRange() can convert the
-     * matches from character to byte offsets.
+     * If the starting point is offset from the beginning of the buffer,
+     * then we need to tell the regexp engine not to match "^".
      */
 
-    regexpPtr->string = string;
-
-    Tcl_DStringInit(&stringBuffer);
-    uniString = Tcl_UtfToUniCharDString(string, -1, &stringBuffer);
-    numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
-    /*
-     * Perform the regexp match.
-     */
-
-    result = TclRegExpExecUniChar(interp, re, uniString, numChars, -1,
-	    ((string > start) ? REG_NOTBOL : 0));
-
-    Tcl_DStringFree(&stringBuffer);
+    if (string > start) {
+	flags = REG_NOTBOL;
+    } else {
+	flags = 0;
+    }
 
-    return result;
+    return Tcl_RegExpMatchObj(interp, re, Tcl_NewStringObj(string, -1),
+	    0 /* offset */, -1 /* nmatches */, flags);
 }
 
 /*
@@ -238,16 +226,16 @@ Tcl_RegExpRange(re, index, startPtr, endPtr)
 				 * in (sub-) range here. */
 {
     TclRegexp *regexpPtr = (TclRegexp *) re;
+    char *string;
 
     if ((size_t) index > regexpPtr->re.re_nsub) {
 	*startPtr = *endPtr = NULL;
     } else if (regexpPtr->matches[index].rm_so < 0) {
 	*startPtr = *endPtr = NULL;
     } else {
-	*startPtr = Tcl_UtfAtIndex(regexpPtr->string,
-		regexpPtr->matches[index].rm_so);
-	*endPtr = Tcl_UtfAtIndex(regexpPtr->string,
-		regexpPtr->matches[index].rm_eo);
+	string = Tcl_GetString(regexpPtr->objPtr);
+	*startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
+	*endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
     }
 }
 
@@ -290,8 +278,9 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
     size_t last = regexpPtr->re.re_nsub + 1;
     size_t nm = last;
 
-    if (nmatches >= 0 && (size_t) nmatches < nm)
+    if (nmatches >= 0 && (size_t) nmatches < nm) {
 	nm = (size_t) nmatches;
+    }
 
     status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
 	    &regexpPtr->details, nm, regexpPtr->matches, flags);
@@ -398,6 +387,100 @@ Tcl_RegExpMatch(interp, string, pattern)
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_RegExpMatchObj --
+ *
+ *	Match a precompiled regexp against the given object.
+ *
+ * Results:
+ *	If an error occurs during the matching operation then -1
+ *	is returned and the interp's result contains an error message.
+ *	Otherwise the return value is 1 if "string" matches "pattern"
+ *	and 0 otherwise.
+ *
+ * Side effects:
+ *	Converts the object to a Unicode object.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags)
+    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */
+    Tcl_RegExp re;		/* Compiled regular expression;  must have
+				 * been returned by previous call to
+				 * Tcl_GetRegExpFromObj. */
+    Tcl_Obj *objPtr;		/* String against which to match re. */
+    int offset;			/* Character index that marks where matching
+				 * should begin. */
+    int nmatches;		/* How many subexpression matches (counting
+				 * the whole match as subexpression 0) are
+				 * of interest.  -1 means all of them. */
+    int flags;			/* Regular expression execution flags. */
+{
+    TclRegexp *regexpPtr = (TclRegexp *) re;
+    Tcl_Obj *oldPtr = regexpPtr->objPtr;
+    Tcl_UniChar *udata;
+    int length;
+
+    /*
+     * Bump the refcount before we do anything in case the object
+     * was newly created.
+     */
+
+    Tcl_IncrRefCount(objPtr);
+
+    udata = TclGetUnicodeFromObj(objPtr);
+    length = TclGetUnicodeLengthFromObj(objPtr);
+
+    /*
+     * Save the target object so we can extract strings from it later.
+     */
+
+    regexpPtr->objPtr = objPtr;
+    if (oldPtr) {
+	Tcl_DecrRefCount(oldPtr);
+    }
+
+    if (offset > length) {
+	offset = length;
+    }
+    udata += offset;
+    length -= offset;
+    
+    return TclRegExpExecUniChar(interp, re, udata, length, nmatches, flags);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_RegExpGetInfo --
+ *
+ *	Retrieve information about the current match.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_RegExpGetInfo(regexp, infoPtr)
+    Tcl_RegExp regexp;		/* Pattern from which to get subexpressions. */
+    Tcl_RegExpInfo *infoPtr;	/* Match information is stored here.  */
+{
+    TclRegexp *regexpPtr = (TclRegexp *) regexp;
+
+    infoPtr->nsubs = regexpPtr->re.re_nsub;
+    infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
+    infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * TclRegExpMatchObj --
  *
  *	See if a string matches a regular expression pattern object.
@@ -790,6 +873,9 @@ CompileRegexp(interp, string, length, flags)
      */
     
     regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
+    regexpPtr->objPtr = NULL;
+    regexpPtr->details.rm_extend.rm_so = -1;
+    regexpPtr->details.rm_extend.rm_eo = -1;
 
     /*
      * Get the up-to-date string representation and map to unicode.
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 7a6fb2a..5cee78e 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -10,7 +10,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclRegexp.h,v 1.8 1999/06/02 01:53:31 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.9 1999/06/10 04:28:51 stanton Exp $
  */
 
 #ifndef _TCLREGEXP
@@ -33,10 +33,10 @@ typedef struct TclRegexp {
     int flags;			/* Regexp compile flags. */
     regex_t re;			/* Compiled re, includes number of
 				 * subexpressions. */
-    CONST char *string;		/* Last string matched with this regexp
-				 * (UTF-8), so Tcl_RegExpRange() can convert
-				 * the matches from character indices to UTF-8
-				 * byte offsets. */
+    Tcl_Obj *objPtr;		/* Last object match with this regexp, so
+				 * Tcl_RegExpRange() can convert the matches
+				 * from character indices to UTF-8 byte
+				 * offsets. */
     regmatch_t *matches;	/* Array of indices into the Tcl_UniChar
 				 * representation of the last string matched
 				 * with this regexp to indicate the location
@@ -53,8 +53,6 @@ typedef struct TclRegexp {
 
 EXTERN int		TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp,
 			    Tcl_RegExp re));
-EXTERN VOID		TclRegXflags _ANSI_ARGS_((char *string, int length,
-			    int *cflagsPtr, int *eflagsPtr));
 EXTERN int		TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
 			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,
 			    int numChars, int nmatches, int flags));
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 1bcfb63..9a3b4f3 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStubInit.c,v 1.16 1999/05/25 01:00:27 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.17 1999/06/10 04:28:51 stanton Exp $
  */
 
 #include "tclInt.h"
@@ -756,6 +756,8 @@ TclStubs tclStubs = {
     Tcl_UniCharIsGraph, /* 373 */
     Tcl_UniCharIsPrint, /* 374 */
     Tcl_UniCharIsPunct, /* 375 */
+    Tcl_RegExpMatchObj, /* 376 */
+    Tcl_RegExpGetInfo, /* 377 */
 };
 
 /* !END!: Do not edit above this line. */
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
index 3a4709b..315644d 100644
--- a/generic/tclUnicodeObj.c
+++ b/generic/tclUnicodeObj.c
@@ -9,7 +9,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclUnicodeObj.c,v 1.4 1999/06/09 17:06:57 hershey Exp $
+ * RCS: @(#) $Id: tclUnicodeObj.c,v 1.5 1999/06/10 04:28:51 stanton Exp $
  */
 
 #include <math.h>
@@ -20,23 +20,19 @@
  * Prototypes for local procedures defined in this file:
  */
 
+static int		AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void		AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr,
+			    Tcl_UniChar *unichars, int numNewChars));	
 static void		DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
 			    Tcl_Obj *copyPtr));
 static void		FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
 static void		UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr));
-static int		SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
-			    Tcl_Obj *objPtr));
-
-static int		AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void		TclAppendUniCharStrToObj _ANSI_ARGS_((
-	    		    register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
-			    int numChars));
-static Tcl_Obj *	TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
-			    int numChars));
 static void		SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
 			    int numChars));
 static void		SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
 			    char *src, int numBytes, int numChars));
+static int		SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
+			    Tcl_Obj *objPtr));
 
 /*
  * The following object type represents a Unicode string.  A Unicode string
@@ -68,13 +64,13 @@ typedef struct Unicode {
     int numChars;		/* The number of chars in the unicode
 				 * string. */
     size_t allocated;		/* The amount of space actually allocated. */
-    unsigned char chars[4];	/* The array of chars.  The actual size of
+    Tcl_UniChar chars[2];	/* The array of chars.  The actual size of
 				 * this field depends on the 'allocated' field
 				 * above. */
 } Unicode;
 
 #define UNICODE_SIZE(len)	\
-		((unsigned) (sizeof(Unicode) - 4 + (len)))
+		((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len)))
 #define GET_UNICODE(objPtr) \
 		((Unicode *) (objPtr)->internalRep.otherValuePtr)
 #define SET_UNICODE(objPtr, unicodePtr) \
@@ -104,7 +100,6 @@ Tcl_UniChar *
 TclGetUnicodeFromObj(objPtr)
     Tcl_Obj *objPtr;	/* The object to find the unicode string for. */
 {
-    Tcl_UniChar *unicharPtr;
     Unicode *unicodePtr;
     int numBytes;
     char *src;
@@ -124,9 +119,15 @@ TclGetUnicodeFromObj(objPtr)
 	
 	src = Tcl_GetStringFromObj(objPtr, &numBytes);
 	SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars);
+
+	/*
+	 * We need to fetch the pointer again because we have just
+	 * reallocated the structure to make room for the Unicode data.
+	 */
+	
+	unicodePtr = GET_UNICODE(objPtr);
     }
-    unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
-    return unicharPtr;
+    return unicodePtr->chars;
 }
 
 /*
@@ -185,7 +186,7 @@ TclGetUniCharFromObj(objPtr, index)
     Tcl_Obj *objPtr;		/* The Unicode object. */
     int index;			/* Get the index'th character. */
 {
-    Tcl_UniChar *unicharPtr, unichar;
+    Tcl_UniChar unichar;
     Unicode *unicodePtr;
     int length;
     
@@ -206,8 +207,7 @@ TclGetUniCharFromObj(objPtr, index)
 	str = Tcl_GetStringFromObj(objPtr, &length);
 	Tcl_UtfToUniChar(&str[index], &unichar);	
     } else {
-	unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
-	unichar = unicharPtr[index];
+	unichar = unicodePtr->chars[index];
     }
     return unichar;
 }
@@ -217,11 +217,11 @@ TclGetUniCharFromObj(objPtr, index)
  *
  * TclGetRangeFromObj --
  *
- *	Create a Tcl Object that contains the chars between first and
- *	last of the object indicated by "objPtr".  If the object is not
- *	already a Unicode object, an attempt will be made to convert it
- *	to one.  The first and last indices are assumed to be in the
- *	appropriate range.
+ *	Create a Tcl Object that contains the chars between first and last
+ *	of the object indicated by "objPtr".  If the object is not already
+ *	a Unicode object, an attempt will be made to convert it to one.
+ *	The first and last indices are assumed to be in the appropriate
+ *	range.
  *
  * Results:
  *	Returns a new Tcl Object of either "string" or "unicode" type,
@@ -241,7 +241,6 @@ TclGetRangeFromObj(objPtr, first, last)
     int last;			/* Last index of the range. */
 {
     Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
-    Tcl_UniChar *unicharPtr;
     Unicode *unicodePtr;
     int length;
     
@@ -250,8 +249,7 @@ TclGetRangeFromObj(objPtr, first, last)
     length = objPtr->length;
     
     if (unicodePtr->numChars != length) {
-	unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
-	newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1);
+	newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1);
     } else {
 	int length;
 	char *str;
@@ -273,7 +271,7 @@ TclGetRangeFromObj(objPtr, first, last)
  *
  * TclAppendObjToUnicodeObj --
  *
- *	This procedure appends the contest of "srcObjPtr" to the Unicode
+ *	This procedure appends the contents of "srcObjPtr" to the Unicode
  *	object "destPtr".
  *
  * Results:
@@ -367,7 +365,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
 	} else {
 	    unicodePtr = GET_UNICODE(srcObjPtr);
 	    numChars = unicodePtr->numChars;
-	    unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars;
+	    unicharSrcStr = unicodePtr->chars;
 	}
     } else {
 	utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
@@ -383,7 +381,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
      * Append the unichar src string to the result object.
      */
 
-    TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
+    AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
     Tcl_DStringFree(&dsPtr);
     return resultObjPtr;
 }
@@ -391,7 +389,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
 /*
  *----------------------------------------------------------------------
  *
- * TclAppendUniCharStrToObj --
+ * AppendUniCharStrToObj --
  *
  *	This procedure appends the contents of "srcObjPtr" to the
  *	Unicode object "objPtr".
@@ -406,31 +404,25 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
  *----------------------------------------------------------------------
  */
 
-void
-TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
+static void
+AppendUniCharStrToObj(objPtr, unichars, numNewChars)
     register Tcl_Obj *objPtr;	/* Points to the object to append to. */
     Tcl_UniChar *unichars;	/* The unicode string to append to the
 			         * object. */
     int numNewChars;		/* Number of chars in "unichars". */
 {
     Unicode *unicodePtr;
-    int usedBytes, numNewBytes, totalNumBytes, totalNumChars;
-
-    /*
-     * Invalidate the StringRep.
-     */
-
-    Tcl_InvalidateStringRep(objPtr);
+    int numChars;
+    size_t numBytes;
 
+    SetUnicodeFromAny(NULL, objPtr);
     unicodePtr = GET_UNICODE(objPtr);
     
-    usedBytes = unicodePtr->numChars * sizeof(Tcl_UniChar);
-    totalNumChars = numNewChars + unicodePtr->numChars;
-    totalNumBytes = totalNumChars * sizeof(Tcl_UniChar);
-    numNewBytes = numNewChars * sizeof(Tcl_UniChar);
+    numChars = numNewChars + unicodePtr->numChars;
+    numBytes = (numChars + 1) * sizeof(Tcl_UniChar);
     
-    if (unicodePtr->allocated <= totalNumBytes) {
-	int allocatedBytes = totalNumBytes * 2;
+    if (unicodePtr->allocated < numBytes) {
+	int allocatedBytes = numBytes * 2;
     
 	/*
 	 * There isn't currently enough space in the Unicode
@@ -439,15 +431,101 @@ TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
 	 * having to reallocate again.
 	 */
 
-	unicodePtr = (Unicode *) ckrealloc(unicodePtr,
+	unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr,
 		UNICODE_SIZE(allocatedBytes));
 	unicodePtr->allocated = allocatedBytes;	
 	unicodePtr = SET_UNICODE(objPtr, unicodePtr);
     }
-    memcpy((VOID *) (unicodePtr->chars + usedBytes),
-	    (VOID *) unichars, (size_t) numNewBytes);
-    *((Tcl_UniChar *)unicodePtr->chars + totalNumChars) = 0;
-    unicodePtr->numChars = totalNumChars;
+    memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars),
+	    (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar));
+    unicodePtr->chars[numChars] = 0;
+    unicodePtr->numChars = numChars;
+
+    /*
+     * Invalidate the StringRep.
+     */
+
+    Tcl_InvalidateStringRep(objPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclAppendUnicodeToObj --
+ *
+ *	This procedure appends a Unicode string to an object in the
+ *	most efficient manner possible.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	Invalidates the string rep and creates a new Unicode string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclAppendUnicodeToObj(objPtr, unichars, length)
+    register Tcl_Obj *objPtr;	/* Points to the object to append to. */
+    Tcl_UniChar *unichars;	/* The unicode string to append to the
+			         * object. */
+    int length;			/* Number of chars in "unichars". */
+{
+    Unicode *unicodePtr;
+    int numChars, i;
+    size_t newSize;
+    char *src;
+    Tcl_UniChar *dst;
+
+    if (Tcl_IsShared(objPtr)) {
+	panic("TclAppendUnicodeToObj called with shared object");
+    }
+
+    SetUnicodeFromAny(NULL, objPtr);
+    unicodePtr = GET_UNICODE(objPtr);
+    
+    /*
+     * Make the buffer big enough for the result.
+     */
+
+    numChars = unicodePtr->numChars + length;
+    newSize = (numChars + 1) * sizeof(Tcl_UniChar);
+
+    if (newSize > unicodePtr->allocated) {
+	int allocated = newSize * 2;
+    
+	unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr,
+		UNICODE_SIZE(allocated));
+
+	if (unicodePtr->allocated == 0) {
+	    /*
+	     * If the original string was not in Unicode form, add it to the
+	     * beginning of the buffer.
+	     */
+
+	    src = objPtr->bytes;
+	    dst = unicodePtr->chars;
+	    for (i = 0; i < unicodePtr->numChars; i++) {
+		src += Tcl_UtfToUniChar(src, dst++);
+	    }
+	}
+	unicodePtr->allocated = allocated;
+    }
+
+    /*
+     * Copy the new string onto the end of the old string, then add the
+     * trailing null.
+     */
+
+    memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars,
+	    length * sizeof(Tcl_UniChar));
+    unicodePtr->numChars = numChars;
+    unicodePtr->chars[numChars] = 0;
+
+    SET_UNICODE(objPtr, unicodePtr);
+
+    Tcl_InvalidateStringRep(objPtr);
 }
 
 /*
@@ -497,7 +575,7 @@ TclNewUnicodeObj(unichars, numChars)
     unicodePtr->numChars = numChars;
     unicodePtr->allocated = allocated;
     memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes);
-    *((Tcl_UniChar *)unicodePtr->chars + numChars) = 0;
+    unicodePtr->chars[numChars] = 0;
     SET_UNICODE(objPtr, unicodePtr);
     return objPtr;
 }
@@ -572,12 +650,10 @@ DupUnicodeInternalRep(srcPtr, copyPtr)
      */
     
     if (AllSingleByteChars(srcPtr)) {
-	copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
+	copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+	copyUnicodePtr->allocated = 0;
     } else {
 	int allocated = srcUnicodePtr->allocated;
-	Tcl_UniChar *unichars;
-
-	unichars = (Tcl_UniChar *)srcUnicodePtr->chars;
 
 	copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
 
@@ -624,7 +700,7 @@ UpdateStringOfUnicode(objPtr)
     Unicode *unicodePtr;
 
     unicodePtr = GET_UNICODE(objPtr);
-    src = (Tcl_UniChar *) unicodePtr->chars;
+    src = unicodePtr->chars;
     length = unicodePtr->numChars * sizeof(Tcl_UniChar);
 
     /*
@@ -672,16 +748,20 @@ SetOptUnicodeFromAny(objPtr, numChars)
 {
     Tcl_ObjType *typePtr;
     Unicode *unicodePtr;
-    
-    unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
-    unicodePtr->numChars = numChars;
-    unicodePtr->allocated = 0;
 
     typePtr = objPtr->typePtr;
     if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
 	(*typePtr->freeIntRepProc)(objPtr);
     }
     objPtr->typePtr = &tclUnicodeType;
+
+    /*
+     * Allocate enough space for the basic Unicode structure.
+     */
+
+    unicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+    unicodePtr->numChars = numChars;
+    unicodePtr->allocated = 0;
     SET_UNICODE(objPtr, unicodePtr);
 }
 
@@ -719,7 +799,7 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
     unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length));
     srcEnd = src + numBytes;
 	
-    for (dst = (Tcl_UniChar *) unicodePtr->chars; src < srcEnd; dst++) {
+    for (dst = unicodePtr->chars; src < srcEnd; dst++) {
 	src += Tcl_UtfToUniChar(src, dst);
     }
     *dst = 0;
@@ -747,10 +827,10 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
  *
  * Side effects:
  *	A Unicode object is stored as the internal rep of objPtr.  The Unicode
- * ojbect is opitmized for the case where each UTF char in a string is only
- * one byte.  In this case, we store the value of numChars, but we don't copy
- * the bytes to the unicodeObj->chars.  Before accessing obj->chars, check if
- * all chars are 1 byte long.
+ *	object is opitmized for the case where each UTF char in a string is
+ *	only one byte.  In this case, we store the value of numChars, but we
+ *	don't copy the bytes to the unicodeObj->chars.  Before accessing
+ *	obj->chars, check if all chars are 1 byte long.
  *
  *---------------------------------------------------------------------------
  */
@@ -760,12 +840,10 @@ SetUnicodeFromAny(interp, objPtr)
     Tcl_Interp *interp;		/* Not used. */
     Tcl_Obj *objPtr;		/* The object to convert to type Unicode. */
 {
-    Tcl_ObjType *typePtr;
     int numBytes, numChars;
     char *src;
     
-    typePtr = objPtr->typePtr;
-    if (typePtr != &tclUnicodeType) {
+    if (objPtr->typePtr != &tclUnicodeType) {
 	src = Tcl_GetStringFromObj(objPtr, &numBytes);
 
 	numChars = Tcl_NumUtfChars(src, numBytes);
diff --git a/tests/string.test b/tests/string.test
index 235dba8..5200b48 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -11,7 +11,7 @@
 # See the file "license.terms" for information on usage and redistribution
 # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 #
-# RCS: @(#) $Id: string.test,v 1.12 1999/06/08 02:59:28 hershey Exp $
+# RCS: @(#) $Id: string.test,v 1.13 1999/06/10 04:28:52 stanton Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     source [file join [pwd] [file dirname [info script]] defs.tcl]
@@ -648,6 +648,9 @@ test string-10.12 {string map, unicode} {
 test string-10.13 {string map, -nocase unicode} {
     string map -nocase [list \374 ue UE \334] "a\374ueUE\000EU"
 } aue\334\334\0EU
+test string-10.14 {string map, -nocase null arguments} {
+    string map -nocase {{} abc} foo
+} foo
 
 test string-11.1 {string match, too few args} {
     list [catch {string match a} msg] $msg