summaryrefslogtreecommitdiffstats
path: root/doc/regsub.n
diff options
context:
space:
mode:
Diffstat (limited to 'doc/regsub.n')
-rw-r--r--doc/regsub.n74
1 files changed, 74 insertions, 0 deletions
diff --git a/doc/regsub.n b/doc/regsub.n
index a5b79de..29c118a 100644
--- a/doc/regsub.n
+++ b/doc/regsub.n
@@ -68,6 +68,33 @@ and
sequences are handled for each substitution using the information
from the corresponding match.
.TP
+\fB\-command\fR
+.VS 8.7
+Changes the handling of \fIsubSpec\fR so that it is not treated
+as a template for a substitution string and the substrings
+.QW &
+and
+.QW \e\fIn\fR
+no longer have special meaning. Instead \fIsubSpec\fR must be a
+command prefix, that is, a non-empty list. The substring of \fIstring\fR
+that matches \fIexp\fR, and then each substring that matches each
+capturing sub-RE within \fIexp\fR are appended as additional elements
+to that list. (The items appended to the list are much like what
+\fBregexp\fR \fB-inline\fR would return). The completed list is then
+evaluated as a Tcl command, and the result of that command is the
+substitution string. Any error or exception from command evaluation
+becomes an error or exception from the \fBregsub\fR command.
+.RS
+.PP
+If \fB\-all\fR is not also given, the command callback will be invoked at most
+once (exactly when the regular expression matches). If \fB\-all\fR is given,
+the command callback will be invoked for each matched location, in sequence.
+The exact location indices that matched are not made available to the script.
+.PP
+See \fBEXAMPLES\fR below for illustrative cases.
+.RE
+.VE 8.7
+.TP
\fB\-expanded\fR
.
Enables use of the expanded regular expression syntax where
@@ -183,6 +210,53 @@ set substitution {[format \e\e\e\eu%04x [scan "\e\e&" %c]]}
set quoted [subst [string map {\en {\e\eu000a}} \e
[\fBregsub\fR -all $RE $string $substitution]]]
.CE
+.PP
+.VS 8.7
+The above operation can be done using \fBregsub \-command\fR instead, which is
+often faster. (A full pre-computed \fBstring map\fR would be faster still, but
+the cost of computing the map for a transformation as complex as this can be
+quite large.)
+.PP
+.CS
+# This RE is just a character class for everything "bad"
+set RE {[][{};#\e\e\e$\es\eu0080-\euffff]}
+
+# This encodes what the RE described above matches
+proc encodeChar {ch} {
+ # newline is handled specially since backslash-newline is a
+ # special sequence.
+ if {$ch eq "\en"} {
+ return "\e\eu000a"
+ }
+ # No point in writing this as a one-liner
+ scan $ch %c charNumber
+ format "\e\eu%04x" $charNumber
+}
+
+set quoted [\fBregsub\fR -all -command $RE $string encodeChar]
+.CE
+.PP
+Decoding a URL-encoded string using \fBregsub \-command\fR, a lambda term and
+the \fBapply\fR command.
+.PP
+.CS
+# Match one of the sequences in a URL-encoded string that needs
+# fixing, converting + to space and %XX to the right character
+# (e.g., %7e becomes ~)
+set RE {(\e+)|%([0-9A-Fa-f]{2})}
+
+# Note that -command uses a command prefix, not a command name
+set decoded [\fBregsub\fR -all -command $RE $string {apply {{- p h} {
+ # + is a special case; handle directly
+ if {$p eq "+"} {
+ return " "
+ }
+ # convert hex to a char
+ scan $h %x charNumber
+ format %c $charNumber
+}}}]
+.CE
+.VE 8.7
.SH "SEE ALSO"
regexp(n), re_syntax(n), subst(n), string(n)
.SH KEYWORDS