summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker/websucker.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/webchecker/websucker.py')
-rwxr-xr-xTools/webchecker/websucker.py123
1 files changed, 0 insertions, 123 deletions
diff --git a/Tools/webchecker/websucker.py b/Tools/webchecker/websucker.py
deleted file mode 100755
index 4657b52..0000000
--- a/Tools/webchecker/websucker.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#! /usr/bin/env python3
-
-"""A variant on webchecker that creates a mirror copy of a remote site."""
-
-__version__ = "$Revision$"
-
-import os
-import sys
-import getopt
-import urllib.parse
-
-import webchecker
-
-# Extract real version number if necessary
-if __version__[0] == '$':
- _v = __version__.split()
- if len(_v) == 3:
- __version__ = _v[1]
-
-def main():
- verbose = webchecker.VERBOSE
- try:
- opts, args = getopt.getopt(sys.argv[1:], "qv")
- except getopt.error as msg:
- print(msg)
- print("usage:", sys.argv[0], "[-qv] ... [rooturl] ...")
- return 2
- for o, a in opts:
- if o == "-q":
- verbose = 0
- if o == "-v":
- verbose = verbose + 1
- c = Sucker()
- c.setflags(verbose=verbose)
- c.urlopener.addheaders = [
- ('User-agent', 'websucker/%s' % __version__),
- ]
- for arg in args:
- print("Adding root", arg)
- c.addroot(arg)
- print("Run...")
- c.run()
-
-class Sucker(webchecker.Checker):
-
- checkext = 0
- nonames = 1
-
- # SAM 11/13/99: in general, URLs are now URL pairs.
- # Since we've suppressed name anchor checking,
- # we can ignore the second dimension.
-
- def readhtml(self, url_pair):
- url = url_pair[0]
- text = None
- path = self.savefilename(url)
- try:
- f = open(path, "rb")
- except IOError:
- f = self.openpage(url_pair)
- if f:
- info = f.info()
- nurl = f.geturl()
- if nurl != url:
- url = nurl
- path = self.savefilename(url)
- text = f.read()
- f.close()
- self.savefile(text, path)
- if not self.checkforhtml(info, url):
- text = None
- else:
- if self.checkforhtml({}, url):
- text = f.read()
- f.close()
- return text, url
-
- def savefile(self, text, path):
- dir, base = os.path.split(path)
- makedirs(dir)
- try:
- f = open(path, "wb")
- f.write(text)
- f.close()
- self.message("saved %s", path)
- except IOError as msg:
- self.message("didn't save %s: %s", path, str(msg))
-
- def savefilename(self, url):
- type, rest = urllib.parse.splittype(url)
- host, path = urllib.parse.splithost(rest)
- path = path.lstrip("/")
- user, host = urllib.parse.splituser(host)
- host, port = urllib.parse.splitnport(host)
- host = host.lower()
- if not path or path[-1] == "/":
- path = path + "index.html"
- if os.sep != "/":
- path = os.sep.join(path.split("/"))
- path = os.path.join(host, path)
- return path
-
-def makedirs(dir):
- if not dir:
- return
- if os.path.exists(dir):
- if not os.path.isdir(dir):
- try:
- os.rename(dir, dir + ".bak")
- os.mkdir(dir)
- os.rename(dir + ".bak", os.path.join(dir, "index.html"))
- except os.error:
- pass
- return
- head, tail = os.path.split(dir)
- if not tail:
- print("Huh? Don't know how to make dir", dir)
- return
- makedirs(head)
- os.mkdir(dir, 0o777)
-
-if __name__ == '__main__':
- sys.exit(main() or 0)