diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-22 21:28:16 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-22 21:28:16 (GMT) |
commit | 0a2c1e30b152c1fcbd3180aadaf6b27039f07421 (patch) | |
tree | ed2ffe7437994ef55aa62151723fc6c9a27f50fa | |
parent | d6deb4d6d99f3ea3b0f50de0fdf0f06903f41956 (diff) | |
download | tcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.zip tcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.tar.gz tcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.tar.bz2 |
Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the next step.
-rw-r--r-- | generic/tclCmdMZ.c | 272 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclUtf.c | 14 |
3 files changed, 284 insertions, 4 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 0da143e..0e624c6 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2547,6 +2547,146 @@ StringStartCmd( /* *---------------------------------------------------------------------- * + * StringPrevCharCmd -- + * + * This procedure is invoked to process the "string prevchar" Tcl + * command. See the user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringPrevCharCmd( + TCL_UNUSED(ClientData), + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int ch; + const char *p, *string; + int cur, index, length, numChars; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index >= numChars) { + index = numChars - 1; + } + cur = 0; + if (index > 0) { + p = Tcl_UtfAtIndex(string, index); + + TclUtfToUCS4(p, &ch); + for (cur = index; cur >= 0; cur--) { + int delta = 0; + const char *next; + + if (!Tcl_UniCharIsWordChar(ch)) { + break; + } + + next = Tcl_UtfPrev(p, string); + do { + next += delta; + delta = TclUtfToUCS4(next, &ch); + } while (next + delta < p); + p = next; + } + if (cur != index) { + cur += 1; + } + } + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringPrevWordCmd -- + * + * This procedure is invoked to process the "string prevword" Tcl + * command. See the user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringPrevWordCmd( + TCL_UNUSED(ClientData), + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int ch; + const char *p, *string; + int cur, index, length, numChars; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index >= numChars) { + index = numChars - 1; + } + cur = 0; + if (index > 0) { + p = Tcl_UtfAtIndex(string, index); + + TclUtfToUCS4(p, &ch); + for (cur = index; cur >= 0; cur--) { + int delta = 0; + const char *next; + + if (!Tcl_UniCharIsWordChar(ch)) { + break; + } + + next = Tcl_UtfPrev(p, string); + do { + next += delta; + delta = TclUtfToUCS4(next, &ch); + } while (next + delta < p); + p = next; + } + if (cur != index) { + cur += 1; + } + } + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * StringEndCmd -- * * This procedure is invoked to process the "string wordend" Tcl command. @@ -2605,6 +2745,130 @@ StringEndCmd( return TCL_OK; } + +/* + *---------------------------------------------------------------------- + * + * StringNextCharCmd -- + * + * This procedure is invoked to process the "string nextchar" Tcl command. + * See the user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringNextCharCmd( + TCL_UNUSED(ClientData), + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int ch; + const char *p, *end, *string; + int cur, index, length, numChars; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index < 0) { + index = 0; + } + if (index < numChars) { + p = Tcl_UtfAtIndex(string, index); + end = string+length; + for (cur = index; p < end; cur++) { + p += TclUtfToUCS4(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; + } + } + if (cur == index) { + cur++; + } + } else { + cur = numChars; + } + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); + return TCL_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * StringNextWordCmd -- + * + * This procedure is invoked to process the "string nextword" Tcl command. + * See the user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringNextWordCmd( + TCL_UNUSED(ClientData), + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int ch; + const char *p, *end, *string; + int cur, index, length, numChars; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index < 0) { + index = 0; + } + if (index < numChars) { + p = Tcl_UtfAtIndex(string, index); + end = string+length; + for (cur = index; p < end; cur++) { + p += TclUtfToUCS4(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; + } + } + if (cur == index) { + cur++; + } + } else { + cur = numChars; + } + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); + return TCL_OK; +} + /* *---------------------------------------------------------------------- * @@ -3321,10 +3585,10 @@ TclInitStringCmd( {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, {"match", StringMatchCmd, TclCompileStringMatchCmd, NULL, NULL, 0}, - {"nextchar", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, - {"nextword", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, - {"prevchar", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, - {"prevword", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"nextchar", StringNextCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"nextword", StringNextWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"prevchar", StringPrevCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"prevword", StringPrevWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"range", StringRangeCmd, TclCompileStringRangeCmd, NULL, NULL, 0}, {"repeat", StringReptCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"replace", StringRplcCmd, TclCompileStringReplaceCmd, NULL, NULL, 0}, diff --git a/generic/tclInt.h b/generic/tclInt.h index 1b95754..ef7411a 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3252,8 +3252,10 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar +# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) #else MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); + MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr); #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index fd6ec1b..db2fc02 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2634,6 +2634,20 @@ TclUtfToUCS4( /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ return Tcl_UtfToUniChar(src, ucs4Ptr); } + +int +TclUniCharToUCS4( + const Tcl_UniChar *src, /* The Tcl_UniChar string. */ + int *ucs4Ptr) /* Filled with the UCS4 codepoint represented + * by the Tcl_UniChar string. */ +{ + if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { + *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000; + return 2; + } + *ucs4Ptr = src[0]; + return 1; +} #endif /* |