diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-03-18 10:41:20 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-03-18 10:41:20 (GMT) |
commit | d22368ffb368198320d29518264a64a87b4f9b03 (patch) | |
tree | 6bd55dbad7691f212bdc390bc6a683d39eacd9fe /Lib/robotparser.py | |
parent | 73e618734df1f50ce3ff1c093f5a823d04d74ee1 (diff) | |
download | cpython-d22368ffb368198320d29518264a64a87b4f9b03.zip cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.gz cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.bz2 |
Patch #499513: use readline() instead of readlines(). Removed the
unnecessary redirection limit code which is already in FancyURLopener.
Diffstat (limited to 'Lib/robotparser.py')
-rw-r--r-- | Lib/robotparser.py | 22 |
1 files changed, 6 insertions, 16 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index bfc0739..aace3a4 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -4,7 +4,7 @@ You can choose between two licenses when using this package: 1) GNU GPLv2 - 2) PYTHON 2.0 OPEN SOURCE LICENSE + 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html @@ -42,7 +42,11 @@ class RobotFileParser: def read(self): opener = URLopener() f = opener.open(self.url) - lines = f.readlines() + lines = [] + line = f.readline() + while line: + lines.append(line.strip()) + line = f.readline() self.errcode = opener.errcode if self.errcode == 401 or self.errcode == 403: self.disallow_all = 1 @@ -63,7 +67,6 @@ class RobotFileParser: entry = Entry() for line in lines: - line = line.strip() linenumber = linenumber + 1 if not line: if state==1: @@ -209,25 +212,12 @@ class URLopener(urllib.FancyURLopener): def __init__(self, *args): apply(urllib.FancyURLopener.__init__, (self,) + args) self.errcode = 200 - self.tries = 0 - self.maxtries = 10 def http_error_default(self, url, fp, errcode, errmsg, headers): self.errcode = errcode return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers) - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - self.tries += 1 - if self.tries >= self.maxtries: - return self.http_error_default(url, fp, 500, - "Internal Server Error: Redirect Recursion", - headers) - result = urllib.FancyURLopener.http_error_302(self, url, fp, errcode, - errmsg, headers, data) - self.tries = 0 - return result - def _check(a,b): if not b: ac = "access denied" |