diff options
Diffstat (limited to 'Tools/scripts/parseentities.py')
-rwxr-xr-x | Tools/scripts/parseentities.py | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/Tools/scripts/parseentities.py b/Tools/scripts/parseentities.py index 0229d3a..4a44fb4 100755 --- a/Tools/scripts/parseentities.py +++ b/Tools/scripts/parseentities.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """ Utility for parsing HTML entity definitions available from: http://www.w3.org/ as e.g. @@ -13,8 +13,9 @@ """ import re,sys +import TextTools -entityRE = re.compile(r'<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->') +entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->') def parse(text,pos=0,endpos=None): @@ -34,31 +35,30 @@ def parse(text,pos=0,endpos=None): def writefile(f,defs): f.write("entitydefs = {\n") - items = sorted(defs.items()) - for name, (charcode,comment) in items: + items = defs.items() + items.sort() + for name,(charcode,comment) in items: if charcode[:2] == '&#': code = int(charcode[2:-1]) if code < 256: - charcode = r"'\%o'" % code + charcode = "'\%o'" % code else: charcode = repr(charcode) else: charcode = repr(charcode) - comment = ' '.join(comment.split()) + comment = TextTools.collapse(comment) f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment)) f.write('\n}\n') if __name__ == '__main__': if len(sys.argv) > 1: - with open(sys.argv[1]) as infile: - text = infile.read() + infile = open(sys.argv[1]) else: - text = sys.stdin.read() - - defs = parse(text) - + infile = sys.stdin if len(sys.argv) > 2: - with open(sys.argv[2],'w') as outfile: - writefile(outfile, defs) + outfile = open(sys.argv[2],'w') else: - writefile(sys.stdout, defs) + outfile = sys.stdout + text = infile.read() + defs = parse(text) + writefile(outfile,defs) |