summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker/websucker.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/webchecker/websucker.py')
-rwxr-xr-xTools/webchecker/websucker.py110
1 files changed, 55 insertions, 55 deletions
diff --git a/Tools/webchecker/websucker.py b/Tools/webchecker/websucker.py
index 6169446..852df07 100755
--- a/Tools/webchecker/websucker.py
+++ b/Tools/webchecker/websucker.py
@@ -16,29 +16,29 @@ import webchecker
if __version__[0] == '$':
_v = string.split(__version__)
if len(_v) == 3:
- __version__ = _v[1]
+ __version__ = _v[1]
def main():
verbose = webchecker.VERBOSE
try:
- opts, args = getopt.getopt(sys.argv[1:], "qv")
+ opts, args = getopt.getopt(sys.argv[1:], "qv")
except getopt.error, msg:
- print msg
- print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..."
- return 2
+ print msg
+ print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..."
+ return 2
for o, a in opts:
- if o == "-q":
- verbose = 0
- if o == "-v":
- verbose = verbose + 1
+ if o == "-q":
+ verbose = 0
+ if o == "-v":
+ verbose = verbose + 1
c = Sucker()
c.setflags(verbose=verbose)
c.urlopener.addheaders = [
- ('User-agent', 'websucker/%s' % __version__),
- ]
+ ('User-agent', 'websucker/%s' % __version__),
+ ]
for arg in args:
- print "Adding root", arg
- c.addroot(arg)
+ print "Adding root", arg
+ c.addroot(arg)
print "Run..."
c.run()
@@ -47,57 +47,57 @@ class Sucker(webchecker.Checker):
checkext = 0
def readhtml(self, url):
- text = None
- path = self.savefilename(url)
- try:
- f = open(path, "rb")
- except IOError:
- f = self.openpage(url)
- if f:
- info = f.info()
- nurl = f.geturl()
- if nurl != url:
- url = nurl
- path = self.savefilename(url)
- text = f.read()
- f.close()
- self.savefile(text, path)
- if not self.checkforhtml(info, url):
- text = None
- else:
- if self.checkforhtml({}, url):
- text = f.read()
- f.close()
- return text, url
+ text = None
+ path = self.savefilename(url)
+ try:
+ f = open(path, "rb")
+ except IOError:
+ f = self.openpage(url)
+ if f:
+ info = f.info()
+ nurl = f.geturl()
+ if nurl != url:
+ url = nurl
+ path = self.savefilename(url)
+ text = f.read()
+ f.close()
+ self.savefile(text, path)
+ if not self.checkforhtml(info, url):
+ text = None
+ else:
+ if self.checkforhtml({}, url):
+ text = f.read()
+ f.close()
+ return text, url
def savefile(self, text, path):
- dir, base = os.path.split(path)
- makedirs(dir)
- f = open(path, "wb")
- f.write(text)
- f.close()
- print "saved", path
+ dir, base = os.path.split(path)
+ makedirs(dir)
+ f = open(path, "wb")
+ f.write(text)
+ f.close()
+ print "saved", path
def savefilename(self, url):
- type, rest = urllib.splittype(url)
- host, path = urllib.splithost(rest)
- while path[:1] == "/": path = path[1:]
- user, host = urllib.splituser(host)
- host, port = urllib.splitnport(host)
- host = string.lower(host)
- path = os.path.join(host, path)
- if path[-1] == "/": path = path + "index.html"
- if os.sep != "/":
- path = string.join(string.split(path, "/"), os.sep)
- return path
+ type, rest = urllib.splittype(url)
+ host, path = urllib.splithost(rest)
+ while path[:1] == "/": path = path[1:]
+ user, host = urllib.splituser(host)
+ host, port = urllib.splitnport(host)
+ host = string.lower(host)
+ path = os.path.join(host, path)
+ if path[-1] == "/": path = path + "index.html"
+ if os.sep != "/":
+ path = string.join(string.split(path, "/"), os.sep)
+ return path
def makedirs(dir):
if not dir or os.path.exists(dir):
- return
+ return
head, tail = os.path.split(dir)
if not tail:
- print "Huh? Don't know how to make dir", dir
- return
+ print "Huh? Don't know how to make dir", dir
+ return
makedirs(head)
os.mkdir(dir, 0777)