summaryrefslogtreecommitdiffstats
path: root/Tools/world
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2002-06-07 15:48:52 (GMT)
committerBarry Warsaw <barry@python.org>2002-06-07 15:48:52 (GMT)
commitaef8371acb087411cde39d2e278ee8f779308740 (patch)
tree7035441a3aaf0f3a38071a664107be84b73ad939 /Tools/world
parent9e9d4f8ed8d467d0558251f43c5decc754712d53 (diff)
downloadcpython-aef8371acb087411cde39d2e278ee8f779308740.zip
cpython-aef8371acb087411cde39d2e278ee8f779308740.tar.gz
cpython-aef8371acb087411cde39d2e278ee8f779308740.tar.bz2
Added the 7 new top level domains, and reworded the nameorgs output.
Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)
Diffstat (limited to 'Tools/world')
-rwxr-xr-xTools/world/world223
1 files changed, 110 insertions, 113 deletions
diff --git a/Tools/world/world b/Tools/world/world
index 3dc83b1..1c2d980 100755
--- a/Tools/world/world
+++ b/Tools/world/world
@@ -3,7 +3,7 @@
"""world -- Print mappings between country names and DNS country codes.
Contact: Barry Warsaw
-Email: bwarsaw@python.org
+Email: barry@python.org
Version: %(__version__)s
This script will take a list of Internet addresses and print out where in the
@@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms:
host.domain.xx -- any Internet host or network name
somebody@where.xx -- an Internet email address
-If no match is found, the address is interpreted as a regular expression [*]
-and a reverse lookup is attempted. This script will search the country names
-and print a list of matching entries. You can force reverse mappings with the
+If no match is found, the address is interpreted as a regular expression and a
+reverse lookup is attempted. This script will search the country names and
+print a list of matching entries. You can force reverse mappings with the
`-r' flag (see below).
For example:
@@ -34,10 +34,6 @@ For example:
tz: Tanzania, United Republic of
gb: United Kingdom
-
- [*] Note that regular expressions must conform to Python 1.5's re.py module
- syntax. The comparison is done with the search() method.
-
Country codes are maintained by the RIPE Network Coordination Centre,
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
authoritative source of country code mappings is:
@@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
When used in conjunction with the `-p' option, output is in the form
of a Python dictionary, and country names are normalized
w.r.t. capitalization. This makes it appropriate for cutting and
- pasting back into this file.
+ pasting back into this file. Output is always to standard out.
--reverse
-r
@@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
-h
--help
Print this message.
-
"""
__version__ = '$Revision$'
import sys
import getopt
-try:
- import re
-except ImportError:
- print sys.argv[0], 'requires Python 1.5'
- sys.exit(1)
+import re
PROGRAM = sys.argv[0]
@@ -110,22 +101,18 @@ def usage(code, msg=''):
def resolve(rawaddr):
parts = rawaddr.split('.')
if not len(parts):
- # no top level domain found, bounce it to the next step
- return rawaddr
+ # no top level domain found, bounce it to the next step
+ return rawaddr
addr = parts[-1]
if nameorgs.has_key(addr):
- if nameorgs[addr][0].lower() in 'aeiou':
- ana = 'an'
- else:
- ana = 'a'
- print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
- return None
+ print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
+ return None
elif countries.has_key(addr):
- print rawaddr, 'originated from', countries[addr]
- return None
+ print rawaddr, 'originated from', countries[addr]
+ return None
else:
- # Not resolved, bounce it to the next step
- return rawaddr
+ # Not resolved, bounce it to the next step
+ return rawaddr
@@ -133,82 +120,83 @@ def reverse(regexp):
matches = []
cre = re.compile(regexp, re.IGNORECASE)
for code, country in all.items():
- mo = cre.search(country)
- if mo:
- matches.append(code)
+ mo = cre.search(country)
+ if mo:
+ matches.append(code)
# print results
if not matches:
- # not resolved, bounce it to the next step
- return regexp
+ # not resolved, bounce it to the next step
+ return regexp
if len(matches) == 1:
- code = matches[0]
- print regexp, "matches code `%s', %s" % (code, all[code])
+ code = matches[0]
+ print regexp, "matches code `%s', %s" % (code, all[code])
else:
- print regexp, 'matches %d countries:' % len(matches)
- for code in matches:
- print " %s: %s" % (code, all[code])
+ print regexp, 'matches %d countries:' % len(matches)
+ for code in matches:
+ print " %s: %s" % (code, all[code])
return None
def parse(file, normalize):
try:
- fp = open(file)
+ fp = open(file)
except IOError, (err, msg):
- print msg, ':', file
+ print msg, ':', file
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
scanning = 0
if normalize:
- print 'country = {'
+ print 'countries = {'
while 1:
- line = fp.readline()
- if line == '':
- break # EOF
- if scanning:
- mo = cre.match(line)
- if not mo:
- line = line.strip()
- if not line:
- continue
- elif line[0] == '-':
- break
- else:
- print 'Could not parse line:', line
- continue
- country, code = mo.group(1, 2)
- if normalize:
- words = country.split()
- for i in range(len(words)):
- w = words[i]
- # XXX special cases
- if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
- words[i] = w.lower()
- elif w == 'THE' and i != 1:
- words[i] = w.lower()
- elif len(w) > 3 and w[1] == "'":
- words[i] = w[0:3].upper() + w[3:].lower()
- elif w == '(U.S.)':
- pass
- elif w[0] == '(' and w != '(local':
- words[i] = '(' + w[1:].capitalize()
- elif w.find('-') != -1:
- words[i] = '-'.join([s.capitalize() for s in w.split('-')])
- else:
- words[i] = w.capitalize()
- code = code.lower()
- country = ' '.join(words)
- print ' "%s": "%s",' % (code, country)
- else:
- print code, country
-
- elif line[0] == '-':
- scanning = 1
+ line = fp.readline()
+ if line == '':
+ break # EOF
+ if scanning:
+ mo = cre.match(line)
+ if not mo:
+ line = line.strip()
+ if not line:
+ continue
+ elif line[0] == '-':
+ break
+ else:
+ print 'Could not parse line:', line
+ continue
+ country, code = mo.group(1, 2)
+ if normalize:
+ words = country.split()
+ for i in range(len(words)):
+ w = words[i]
+ # XXX special cases
+ if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
+ words[i] = w.lower()
+ elif w == 'THE' and i <> 1:
+ words[i] = w.lower()
+ elif len(w) > 3 and w[1] == "'":
+ words[i] = w[0:3].upper() + w[3:].lower()
+ elif w in ('(U.S.)', 'U.S.'):
+ pass
+ elif w[0] == '(' and w <> '(local':
+ words[i] = '(' + w[1:].capitalize()
+ elif w.find('-') <> -1:
+ words[i] = '-'.join(
+ [s.capitalize() for s in w.split('-')])
+ else:
+ words[i] = w.capitalize()
+ code = code.lower()
+ country = ' '.join(words)
+ print ' "%s": "%s",' % (code, country)
+ else:
+ print code, country
+
+ elif line[0] == '-':
+ scanning = 1
if normalize:
- print ' }'
+ print ' }'
def main():
@@ -228,53 +216,62 @@ def main():
usage(1, msg)
for opt, arg in opts:
- if opt in ('-h', '--help'):
- help = 1
- elif opt in ('-d', '--dump'):
- dump = 1
- elif opt in ('-p', '--parse'):
- parsefile = arg
- elif opt in ('-o', '--output'):
- normalize = 1
- elif opt in ('-r', '--reverse'):
- forcerev = 1
+ if opt in ('-h', '--help'):
+ help = 1
+ elif opt in ('-d', '--dump'):
+ dump = 1
+ elif opt in ('-p', '--parse'):
+ parsefile = arg
+ elif opt in ('-o', '--outputdict'):
+ normalize = 1
+ elif opt in ('-r', '--reverse'):
+ forcerev = 1
if help:
- usage(status)
+ usage(status)
if dump:
- print 'Non-geographic domains:'
- codes = nameorgs.keys()
- codes.sort()
- for code in codes:
- print ' %4s:' % code, nameorgs[code]
-
- print '\nCountry coded domains:'
- codes = countries.keys()
- codes.sort()
- for code in codes:
- print ' %2s:' % code, countries[code]
+ print 'Non-geographic domains:'
+ codes = nameorgs.keys()
+ codes.sort()
+ for code in codes:
+ print ' %4s:' % code, nameorgs[code]
+
+ print '\nCountry coded domains:'
+ codes = countries.keys()
+ codes.sort()
+ for code in codes:
+ print ' %2s:' % code, countries[code]
elif parsefile:
- parse(parsefile, normalize)
+ parse(parsefile, normalize)
else:
- if not forcerev:
- args = filter(None, map(resolve, args))
- args = filter(None, map(reverse, args))
- for arg in args:
- print 'Where in the world is %s?' % arg
+ if not forcerev:
+ args = filter(None, map(resolve, args))
+ args = filter(None, map(reverse, args))
+ for arg in args:
+ print 'Where in the world is %s?' % arg
# The mappings
nameorgs = {
+ # New top level domains as described by ICANN
+ # http://www.icann.org/tlds/
+ "aero": "air-transport industry",
"arpa": "Arpanet",
+ "biz": "business",
"com": "commercial",
+ "coop": "cooperatives",
"edu": "educational",
"gov": "government",
+ "info": "unrestricted `info'",
+ "int": "international",
"mil": "military",
+ "museum": "museums",
+ "name": "`name' (for registration by individuals)",
"net": "networking",
"org": "non-commercial",
- "int": "international",
+ "pro": "professionals",
# This isn't in the same class as those above, but is included here
# because `uk' is the common practice country code for the United Kingdom.
# AFAICT, the official `gb' code is routinely ignored!
@@ -525,7 +522,7 @@ countries = {
"ve": "Venezuela",
"vn": "Viet Nam",
"vg": "Virgin Islands, British",
- "vi": "Virgin Islands, U.s.",
+ "vi": "Virgin Islands, U.S.",
"wf": "Wallis and Futuna",
"eh": "Western Sahara",
"ye": "Yemen",