summaryrefslogtreecommitdiffstats
path: root/Demo/scripts
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2003-01-01 20:26:47 (GMT)
committerSkip Montanaro <skip@pobox.com>2003-01-01 20:26:47 (GMT)
commitf2c47114e75e26d9535f24b5ce15e199b2e6496e (patch)
treee572a901978c205116bd651952a01dd669ff99a4 /Demo/scripts
parentdecc6a47df823a988845d3753a4cfb7a85b80828 (diff)
downloadcpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.zip
cpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.tar.gz
cpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.tar.bz2
Search for Unicode character names using regular expressions.
Diffstat (limited to 'Demo/scripts')
-rw-r--r--Demo/scripts/find-uname.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/Demo/scripts/find-uname.py b/Demo/scripts/find-uname.py
new file mode 100644
index 0000000..b76b9f0
--- /dev/null
+++ b/Demo/scripts/find-uname.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+"""
+For each argument on the command line, look for it in the set of all Unicode
+names. Arguments are treated as case-insensitive regular expressions, e.g.:
+
+ % find-uname 'small letter a$' 'horizontal line'
+ *** small letter a$ matches ***
+ LATIN SMALL LETTER A (97)
+ COMBINING LATIN SMALL LETTER A (867)
+ CYRILLIC SMALL LETTER A (1072)
+ PARENTHESIZED LATIN SMALL LETTER A (9372)
+ CIRCLED LATIN SMALL LETTER A (9424)
+ FULLWIDTH LATIN SMALL LETTER A (65345)
+ *** horizontal line matches ***
+ HORIZONTAL LINE EXTENSION (9135)
+"""
+
+import unicodedata
+import sys
+import re
+
+def main(args):
+ unicode_names= []
+ for ix in range(sys.maxunicode+1):
+ try:
+ unicode_names.append( (ix, unicodedata.name(unichr(ix))) )
+ except ValueError: # no name for the character
+ pass
+ for arg in args:
+ pat = re.compile(arg, re.I)
+ matches = [(x,y) for (x,y) in unicode_names
+ if pat.search(y) is not None]
+ if matches:
+ print "***", arg, "matches", "***"
+ for (x,y) in matches:
+ print "%s (%d)" % (y,x)
+
+if __name__ == "__main__":
+ main(sys.argv[1:])