blob: f4b6096ebe3f25ee9176aa9b4e5f6373b476f3fb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
"""A helper to download input files needed by assorted encoding tests.
fetch_data_files.py [directory]
Files are downloaded to directory `directory`. If a directory isn't given,
it defaults to the current directory (.).
"""
DATA_URLS = """
http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT
http://people.freebsd.org/~perky/i18n/EUC-CN.TXT
http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT
http://people.freebsd.org/~perky/i18n/EUC-JP.TXT
http://people.freebsd.org/~perky/i18n/EUC-KR.TXT
http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
http://www.unicode.org/Public/3.2-Update/NormalizationTest-3.2.0.txt
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT
"""
# Adapted from test_support.open_urlresource() in Python 2.5.
# Fetch the file give by `url` off the web, and store it in directory
# `directory`. The file name is extracted from the last URL component.
# If the file already exists, it's not fetched again.
def fetch_file_from_url(url, directory):
import urllib, urlparse
import os.path
filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's a URL!
target = os.path.join(directory, filename)
if os.path.exists(target):
print "\tskipping %r -- already exists" % target
else:
print "\tfetching %s ..." % url
urllib.urlretrieve(url, target)
def main(urls, directory):
print "Downloading data files to %r" % directory
for url in urls.split():
fetch_file_from_url(url, directory)
if __name__ == "__main__":
import sys
n = len(sys.argv)
if n == 1:
directory = "."
elif n == 2:
directory = sys.argv[1]
else:
raise ValueError("no more than one argument allowed")
main(DATA_URLS, directory)
|