diff options
author | Barry Warsaw <barry@python.org> | 2002-06-07 15:48:52 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-06-07 15:48:52 (GMT) |
commit | aef8371acb087411cde39d2e278ee8f779308740 (patch) | |
tree | 7035441a3aaf0f3a38071a664107be84b73ad939 /Tools/world | |
parent | 9e9d4f8ed8d467d0558251f43c5decc754712d53 (diff) | |
download | cpython-aef8371acb087411cde39d2e278ee8f779308740.zip cpython-aef8371acb087411cde39d2e278ee8f779308740.tar.gz cpython-aef8371acb087411cde39d2e278ee8f779308740.tar.bz2 |
Added the 7 new top level domains, and reworded the nameorgs output.
Not sure this is better in all cases.
parse(): Fixed a bug in the output; the dict is referred to in the
code as `countries' not `country'. Also added no-case-fold for the
string "U.S." since the Virgin Islands name no longer wraps those in
parentheses.
main(): Fixed the argument parsing to agree with the docstring, i.e.
--outputdict instead of --output.
In the module docstring:
- updated my email address
- we don't need to explain about Python 1.5 regexps <wink>
We also don't need to wrap the import of re with a try/except.
Other style fixes:
- untabification
- revert back to <> style everywhere (and consistently)
Diffstat (limited to 'Tools/world')
-rwxr-xr-x | Tools/world/world | 223 |
1 files changed, 110 insertions, 113 deletions
diff --git a/Tools/world/world b/Tools/world/world index 3dc83b1..1c2d980 100755 --- a/Tools/world/world +++ b/Tools/world/world @@ -3,7 +3,7 @@ """world -- Print mappings between country names and DNS country codes. Contact: Barry Warsaw -Email: bwarsaw@python.org +Email: barry@python.org Version: %(__version__)s This script will take a list of Internet addresses and print out where in the @@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms: host.domain.xx -- any Internet host or network name somebody@where.xx -- an Internet email address -If no match is found, the address is interpreted as a regular expression [*] -and a reverse lookup is attempted. This script will search the country names -and print a list of matching entries. You can force reverse mappings with the +If no match is found, the address is interpreted as a regular expression and a +reverse lookup is attempted. This script will search the country names and +print a list of matching entries. You can force reverse mappings with the `-r' flag (see below). For example: @@ -34,10 +34,6 @@ For example: tz: Tanzania, United Republic of gb: United Kingdom - - [*] Note that regular expressions must conform to Python 1.5's re.py module - syntax. The comparison is done with the search() method. - Country codes are maintained by the RIPE Network Coordination Centre, in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The authoritative source of country code mappings is: @@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] When used in conjunction with the `-p' option, output is in the form of a Python dictionary, and country names are normalized w.r.t. capitalization. This makes it appropriate for cutting and - pasting back into this file. + pasting back into this file. Output is always to standard out. --reverse -r @@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] -h --help Print this message. - """ __version__ = '$Revision$' import sys import getopt -try: - import re -except ImportError: - print sys.argv[0], 'requires Python 1.5' - sys.exit(1) +import re PROGRAM = sys.argv[0] @@ -110,22 +101,18 @@ def usage(code, msg=''): def resolve(rawaddr): parts = rawaddr.split('.') if not len(parts): - # no top level domain found, bounce it to the next step - return rawaddr + # no top level domain found, bounce it to the next step + return rawaddr addr = parts[-1] if nameorgs.has_key(addr): - if nameorgs[addr][0].lower() in 'aeiou': - ana = 'an' - else: - ana = 'a' - print rawaddr, 'is from', ana, nameorgs[addr], 'organization' - return None + print rawaddr, 'is in the', nameorgs[addr], 'top level domain' + return None elif countries.has_key(addr): - print rawaddr, 'originated from', countries[addr] - return None + print rawaddr, 'originated from', countries[addr] + return None else: - # Not resolved, bounce it to the next step - return rawaddr + # Not resolved, bounce it to the next step + return rawaddr @@ -133,82 +120,83 @@ def reverse(regexp): matches = [] cre = re.compile(regexp, re.IGNORECASE) for code, country in all.items(): - mo = cre.search(country) - if mo: - matches.append(code) + mo = cre.search(country) + if mo: + matches.append(code) # print results if not matches: - # not resolved, bounce it to the next step - return regexp + # not resolved, bounce it to the next step + return regexp if len(matches) == 1: - code = matches[0] - print regexp, "matches code `%s', %s" % (code, all[code]) + code = matches[0] + print regexp, "matches code `%s', %s" % (code, all[code]) else: - print regexp, 'matches %d countries:' % len(matches) - for code in matches: - print " %s: %s" % (code, all[code]) + print regexp, 'matches %d countries:' % len(matches) + for code in matches: + print " %s: %s" % (code, all[code]) return None def parse(file, normalize): try: - fp = open(file) + fp = open(file) except IOError, (err, msg): - print msg, ':', file + print msg, ':', file cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}') scanning = 0 if normalize: - print 'country = {' + print 'countries = {' while 1: - line = fp.readline() - if line == '': - break # EOF - if scanning: - mo = cre.match(line) - if not mo: - line = line.strip() - if not line: - continue - elif line[0] == '-': - break - else: - print 'Could not parse line:', line - continue - country, code = mo.group(1, 2) - if normalize: - words = country.split() - for i in range(len(words)): - w = words[i] - # XXX special cases - if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): - words[i] = w.lower() - elif w == 'THE' and i != 1: - words[i] = w.lower() - elif len(w) > 3 and w[1] == "'": - words[i] = w[0:3].upper() + w[3:].lower() - elif w == '(U.S.)': - pass - elif w[0] == '(' and w != '(local': - words[i] = '(' + w[1:].capitalize() - elif w.find('-') != -1: - words[i] = '-'.join([s.capitalize() for s in w.split('-')]) - else: - words[i] = w.capitalize() - code = code.lower() - country = ' '.join(words) - print ' "%s": "%s",' % (code, country) - else: - print code, country - - elif line[0] == '-': - scanning = 1 + line = fp.readline() + if line == '': + break # EOF + if scanning: + mo = cre.match(line) + if not mo: + line = line.strip() + if not line: + continue + elif line[0] == '-': + break + else: + print 'Could not parse line:', line + continue + country, code = mo.group(1, 2) + if normalize: + words = country.split() + for i in range(len(words)): + w = words[i] + # XXX special cases + if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): + words[i] = w.lower() + elif w == 'THE' and i <> 1: + words[i] = w.lower() + elif len(w) > 3 and w[1] == "'": + words[i] = w[0:3].upper() + w[3:].lower() + elif w in ('(U.S.)', 'U.S.'): + pass + elif w[0] == '(' and w <> '(local': + words[i] = '(' + w[1:].capitalize() + elif w.find('-') <> -1: + words[i] = '-'.join( + [s.capitalize() for s in w.split('-')]) + else: + words[i] = w.capitalize() + code = code.lower() + country = ' '.join(words) + print ' "%s": "%s",' % (code, country) + else: + print code, country + + elif line[0] == '-': + scanning = 1 if normalize: - print ' }' + print ' }' def main(): @@ -228,53 +216,62 @@ def main(): usage(1, msg) for opt, arg in opts: - if opt in ('-h', '--help'): - help = 1 - elif opt in ('-d', '--dump'): - dump = 1 - elif opt in ('-p', '--parse'): - parsefile = arg - elif opt in ('-o', '--output'): - normalize = 1 - elif opt in ('-r', '--reverse'): - forcerev = 1 + if opt in ('-h', '--help'): + help = 1 + elif opt in ('-d', '--dump'): + dump = 1 + elif opt in ('-p', '--parse'): + parsefile = arg + elif opt in ('-o', '--outputdict'): + normalize = 1 + elif opt in ('-r', '--reverse'): + forcerev = 1 if help: - usage(status) + usage(status) if dump: - print 'Non-geographic domains:' - codes = nameorgs.keys() - codes.sort() - for code in codes: - print ' %4s:' % code, nameorgs[code] - - print '\nCountry coded domains:' - codes = countries.keys() - codes.sort() - for code in codes: - print ' %2s:' % code, countries[code] + print 'Non-geographic domains:' + codes = nameorgs.keys() + codes.sort() + for code in codes: + print ' %4s:' % code, nameorgs[code] + + print '\nCountry coded domains:' + codes = countries.keys() + codes.sort() + for code in codes: + print ' %2s:' % code, countries[code] elif parsefile: - parse(parsefile, normalize) + parse(parsefile, normalize) else: - if not forcerev: - args = filter(None, map(resolve, args)) - args = filter(None, map(reverse, args)) - for arg in args: - print 'Where in the world is %s?' % arg + if not forcerev: + args = filter(None, map(resolve, args)) + args = filter(None, map(reverse, args)) + for arg in args: + print 'Where in the world is %s?' % arg # The mappings nameorgs = { + # New top level domains as described by ICANN + # http://www.icann.org/tlds/ + "aero": "air-transport industry", "arpa": "Arpanet", + "biz": "business", "com": "commercial", + "coop": "cooperatives", "edu": "educational", "gov": "government", + "info": "unrestricted `info'", + "int": "international", "mil": "military", + "museum": "museums", + "name": "`name' (for registration by individuals)", "net": "networking", "org": "non-commercial", - "int": "international", + "pro": "professionals", # This isn't in the same class as those above, but is included here # because `uk' is the common practice country code for the United Kingdom. # AFAICT, the official `gb' code is routinely ignored! @@ -525,7 +522,7 @@ countries = { "ve": "Venezuela", "vn": "Viet Nam", "vg": "Virgin Islands, British", - "vi": "Virgin Islands, U.s.", + "vi": "Virgin Islands, U.S.", "wf": "Wallis and Futuna", "eh": "Western Sahara", "ye": "Yemen", |