summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--generic/regcomp.c2
-rw-r--r--tests/utf.test34
-rw-r--r--tools/uniParse.tcl10
4 files changed, 32 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index de50354..8e372eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2010-10-18 Jan Nijtmans <nijtmans@users.sf.net>
+
+ * tools/uniParse.tcl: [Bug 3085863]: tclUniData 9 years old
+ Ignore non-BMP characters and fix comment about UnicodeData.txt file.
+ * generic/regcomp.c: fix comment
+ * tests/utf.test: Add some Unicode 6 testcases
+
2010-10-17 Alexandre Ferrieux <ferrieux@users.sourceforge.net>
* doc/info.n: Document [info errorstack] faithfully.
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 9753ca4..d7ae05e 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -2131,7 +2131,7 @@ stdump(
/*
- stid - identify a subtree node for dumping
- ^ static char *stid(struct subre *, char *, size_t);
+ ^ static const char *stid(struct subre *, char *, size_t);
*/
static const char * /* points to buf or constant string */
stid(
diff --git a/tests/utf.test b/tests/utf.test
index 575a5cd..3a45d13 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -8,7 +8,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: utf.test,v 1.14 2007/05/02 01:37:28 kennykb Exp $
+# RCS: @(#) $Id: utf.test,v 1.15 2010/10/18 21:47:36 nijtmans Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -276,12 +276,12 @@ test utf-20.1 {TclUniCharNcmp} {
} {}
test utf-21.1 {TclUniCharIsAlnum} {
- # this returns 1 with Unicode 3 compliance
- string is alnum \u1040\u021f
+ # this returns 1 with Unicode 6 compliance
+ string is alnum \u1040\u021f\u0220
} {1}
test utf-21.2 {unicode alnum char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
+ # this returns 1 with Unicode 6 compliance
+ list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
} {1 1}
test utf-22.1 {TclUniCharIsWordChar} {
@@ -292,30 +292,30 @@ test utf-22.2 {TclUniCharIsWordChar} {
} 10
test utf-23.1 {TclUniCharIsAlpha} {
- # this returns 1 with Unicode 3 compliance
- string is alpha \u021f
+ # this returns 1 with Unicode 6 compliance
+ string is alpha \u021f\u0220
} {1}
test utf-23.2 {unicode alpha char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- regexp {^[[:alpha:]]+$} \u021f
+ # this returns 1 with Unicode 6 compliance
+ regexp {^[[:alpha:]]+$} \u021f\u0220
} {1}
test utf-24.1 {TclUniCharIsDigit} {
- # this returns 1 with Unicode 3 compliance
- string is digit \u1040
+ # this returns 1 with Unicode 6 compliance
+ string is digit \u1040\uabf0
} {1}
test utf-24.2 {unicode digit char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
+ # this returns 1 with Unicode 6 compliance
+ list [regexp {^[[:digit:]]+$} \u1040\uabf0] [regexp {^\d+$} \u1040\uabf0]
} {1 1}
test utf-24.3 {TclUniCharIsSpace} {
- # this returns 1 with Unicode 3 compliance
- string is space \u1680
+ # this returns 1 with Unicode 6 compliance
+ string is space \u1680\u180e
} {1}
test utf-24.4 {unicode space char in regc_locale.c} {
- # this returns 1 with Unicode 3 compliance
- list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
+ # this returns 1 with Unicode 6 compliance
+ list [regexp {^[[:space:]]+$} \u1680\u180e] [regexp {^\s+$} \u1680\u180e]
} {1 1}
testConstraint teststringobj [llength [info commands teststringobj]]
diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl
index a7f4237..0ec0848 100644
--- a/tools/uniParse.tcl
+++ b/tools/uniParse.tcl
@@ -4,12 +4,12 @@
# corresponding tclUniData.c file with compressed character
# data tables. The input to this program should be the latest
# UnicodeData file from:
-# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
+# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
#
# Copyright (c) 1998-1999 by Scriptics Corporation.
# All rights reserved.
#
-# RCS: @(#) $Id: uniParse.tcl,v 1.8 2010/10/15 15:25:52 nijtmans Exp $
+# RCS: @(#) $Id: uniParse.tcl,v 1.9 2010/10/18 21:47:36 nijtmans Exp $
namespace eval uni {
@@ -116,7 +116,11 @@ proc uni::buildTables {data} {
set items [split $line \;]
- scan [lindex $items 0] %4x index
+ scan [lindex $items 0] %x index
+ if {$index > 0xFFFF} then {
+ # Ignore non-BMP characters, as long as Tcl doesn't support them
+ continue
+ }
set index [format 0x%0.4x $index]
set gIndex [getGroup [getValue $items $index]]