diff options
author | Skip Montanaro <skip@pobox.com> | 2003-01-01 20:26:47 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2003-01-01 20:26:47 (GMT) |
commit | f2c47114e75e26d9535f24b5ce15e199b2e6496e (patch) | |
tree | e572a901978c205116bd651952a01dd669ff99a4 /Demo/scripts | |
parent | decc6a47df823a988845d3753a4cfb7a85b80828 (diff) | |
download | cpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.zip cpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.tar.gz cpython-f2c47114e75e26d9535f24b5ce15e199b2e6496e.tar.bz2 |
Search for Unicode character names using regular expressions.
Diffstat (limited to 'Demo/scripts')
-rw-r--r-- | Demo/scripts/find-uname.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/Demo/scripts/find-uname.py b/Demo/scripts/find-uname.py new file mode 100644 index 0000000..b76b9f0 --- /dev/null +++ b/Demo/scripts/find-uname.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +For each argument on the command line, look for it in the set of all Unicode +names. Arguments are treated as case-insensitive regular expressions, e.g.: + + % find-uname 'small letter a$' 'horizontal line' + *** small letter a$ matches *** + LATIN SMALL LETTER A (97) + COMBINING LATIN SMALL LETTER A (867) + CYRILLIC SMALL LETTER A (1072) + PARENTHESIZED LATIN SMALL LETTER A (9372) + CIRCLED LATIN SMALL LETTER A (9424) + FULLWIDTH LATIN SMALL LETTER A (65345) + *** horizontal line matches *** + HORIZONTAL LINE EXTENSION (9135) +""" + +import unicodedata +import sys +import re + +def main(args): + unicode_names= [] + for ix in range(sys.maxunicode+1): + try: + unicode_names.append( (ix, unicodedata.name(unichr(ix))) ) + except ValueError: # no name for the character + pass + for arg in args: + pat = re.compile(arg, re.I) + matches = [(x,y) for (x,y) in unicode_names + if pat.search(y) is not None] + if matches: + print "***", arg, "matches", "***" + for (x,y) in matches: + print "%s (%d)" % (y,x) + +if __name__ == "__main__": + main(sys.argv[1:]) |