summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2016-04-08 12:26:19 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2016-04-08 12:26:19 (GMT)
commita46470f9963dd69ba76c63ef2fcb438022ae69bb (patch)
tree9da76e5e599eb2cf7dff5b5148d47992912683a2
parentb6e7e4e4283f3809245d72b487eb5cc65cc6e95b (diff)
parent78b187fe71dba0f3710be978050e08fde81efd85 (diff)
downloadtcl-a46470f9963dd69ba76c63ef2fcb438022ae69bb.zip
tcl-a46470f9963dd69ba76c63ef2fcb438022ae69bb.tar.gz
tcl-a46470f9963dd69ba76c63ef2fcb438022ae69bb.tar.bz2
Fix [8663689908d3304a74fee525cd04aa4162e86391|8663689908d3]: regexp \\w missing characters
-rw-r--r--generic/regc_lex.c19
-rw-r--r--tests/utf.test2
2 files changed, 17 insertions, 4 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 16e3ae9..affcb48 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -256,20 +256,33 @@ static const chr brbacks[] = { /* \s within brackets */
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']')
};
+
+#define PUNCT_CONN \
+ CHR('_'), \
+ 0x203f /* UNDERTIE */, \
+ 0x2040 /* CHARACTER TIE */,\
+ 0x2054 /* INVERTED UNDERTIE */,\
+ 0xfe33 /* PRESENTATION FORM FOR VERTICAL LOW LINE */, \
+ 0xfe34 /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE */, \
+ 0xfe4d /* DASHED LOW LINE */, \
+ 0xfe4e /* CENTRELINE LOW LINE */, \
+ 0xfe4f /* WAVY LOW LINE */, \
+ 0xff3f /* FULLWIDTH LOW LINE */
+
static const chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
+ CHR(':'), CHR(']'), PUNCT_CONN, CHR(']')
};
static const chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
+ CHR(':'), CHR(']'), PUNCT_CONN, CHR(']')
};
static const chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_')
+ CHR(':'), CHR(']'), PUNCT_CONN
};
/*
diff --git a/tests/utf.test b/tests/utf.test
index ceb1af7..a03dd6c 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -302,7 +302,7 @@ test utf-21.1 {TclUniCharIsAlnum} {
} {1}
test utf-21.2 {unicode alnum char in regc_locale.c} {
# this returns 1 with Unicode 7 compliance
- list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
+ list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f]
} {1 1}
test utf-21.3 {unicode print char in regc_locale.c} {
# this returns 1 with Unicode 7 compliance