summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-22 21:28:16 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-22 21:28:16 (GMT)
commit0a2c1e30b152c1fcbd3180aadaf6b27039f07421 (patch)
treeed2ffe7437994ef55aa62151723fc6c9a27f50fa
parentd6deb4d6d99f3ea3b0f50de0fdf0f06903f41956 (diff)
downloadtcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.zip
tcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.tar.gz
tcl-0a2c1e30b152c1fcbd3180aadaf6b27039f07421.tar.bz2
Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the next step.
-rw-r--r--generic/tclCmdMZ.c272
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclUtf.c14
3 files changed, 284 insertions, 4 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 0da143e..0e624c6 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2547,6 +2547,146 @@ StringStartCmd(
/*
*----------------------------------------------------------------------
*
+ * StringPrevCharCmd --
+ *
+ * This procedure is invoked to process the "string prevchar" Tcl
+ * command. See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringPrevCharCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int ch;
+ const char *p, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[1], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ string = TclGetStringFromObj(objv[1], &length);
+ if (index >= numChars) {
+ index = numChars - 1;
+ }
+ cur = 0;
+ if (index > 0) {
+ p = Tcl_UtfAtIndex(string, index);
+
+ TclUtfToUCS4(p, &ch);
+ for (cur = index; cur >= 0; cur--) {
+ int delta = 0;
+ const char *next;
+
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
+ }
+
+ next = Tcl_UtfPrev(p, string);
+ do {
+ next += delta;
+ delta = TclUtfToUCS4(next, &ch);
+ } while (next + delta < p);
+ p = next;
+ }
+ if (cur != index) {
+ cur += 1;
+ }
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringPrevWordCmd --
+ *
+ * This procedure is invoked to process the "string prevword" Tcl
+ * command. See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringPrevWordCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int ch;
+ const char *p, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[1], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ string = TclGetStringFromObj(objv[1], &length);
+ if (index >= numChars) {
+ index = numChars - 1;
+ }
+ cur = 0;
+ if (index > 0) {
+ p = Tcl_UtfAtIndex(string, index);
+
+ TclUtfToUCS4(p, &ch);
+ for (cur = index; cur >= 0; cur--) {
+ int delta = 0;
+ const char *next;
+
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
+ }
+
+ next = Tcl_UtfPrev(p, string);
+ do {
+ next += delta;
+ delta = TclUtfToUCS4(next, &ch);
+ } while (next + delta < p);
+ p = next;
+ }
+ if (cur != index) {
+ cur += 1;
+ }
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* StringEndCmd --
*
* This procedure is invoked to process the "string wordend" Tcl command.
@@ -2605,6 +2745,130 @@ StringEndCmd(
return TCL_OK;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringNextCharCmd --
+ *
+ * This procedure is invoked to process the "string nextchar" Tcl command.
+ * See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringNextCharCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int ch;
+ const char *p, *end, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[1], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ string = TclGetStringFromObj(objv[1], &length);
+ if (index < 0) {
+ index = 0;
+ }
+ if (index < numChars) {
+ p = Tcl_UtfAtIndex(string, index);
+ end = string+length;
+ for (cur = index; p < end; cur++) {
+ p += TclUtfToUCS4(p, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
+ }
+ }
+ if (cur == index) {
+ cur++;
+ }
+ } else {
+ cur = numChars;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
+ return TCL_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringNextWordCmd --
+ *
+ * This procedure is invoked to process the "string nextword" Tcl command.
+ * See the user documentation for details on what it does.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringNextWordCmd(
+ TCL_UNUSED(ClientData),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int ch;
+ const char *p, *end, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[1], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ string = TclGetStringFromObj(objv[1], &length);
+ if (index < 0) {
+ index = 0;
+ }
+ if (index < numChars) {
+ p = Tcl_UtfAtIndex(string, index);
+ end = string+length;
+ for (cur = index; p < end; cur++) {
+ p += TclUtfToUCS4(p, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
+ }
+ }
+ if (cur == index) {
+ cur++;
+ }
+ } else {
+ cur = numChars;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
+ return TCL_OK;
+}
+
/*
*----------------------------------------------------------------------
*
@@ -3321,10 +3585,10 @@ TclInitStringCmd(
{"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0},
{"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0},
{"match", StringMatchCmd, TclCompileStringMatchCmd, NULL, NULL, 0},
- {"nextchar", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
- {"nextword", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
- {"prevchar", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
- {"prevword", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"nextchar", StringNextCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"nextword", StringNextWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"prevchar", StringPrevCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
+ {"prevword", StringPrevWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
{"range", StringRangeCmd, TclCompileStringRangeCmd, NULL, NULL, 0},
{"repeat", StringReptCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0},
{"replace", StringRplcCmd, TclCompileStringReplaceCmd, NULL, NULL, 0},
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 1b95754..ef7411a 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,8 +3252,10 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
+# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
#else
MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fd6ec1b..db2fc02 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2634,6 +2634,20 @@ TclUtfToUCS4(
/* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
return Tcl_UtfToUniChar(src, ucs4Ptr);
}
+
+int
+TclUniCharToUCS4(
+ const Tcl_UniChar *src, /* The Tcl_UniChar string. */
+ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
+ * by the Tcl_UniChar string. */
+{
+ if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ return 2;
+ }
+ *ucs4Ptr = src[0];
+ return 1;
+}
#endif
/*