Patch #499513: use readline() instead of readlines(). Removed the

unnecessary redirection limit code which is already in FancyURLopener.
author: Martin v. Löwis <martin@v.loewis.de> 2002-03-18 10:41:20 (GMT)
committer: Martin v. Löwis <martin@v.loewis.de> 2002-03-18 10:41:20 (GMT)
commit: d22368ffb368198320d29518264a64a87b4f9b03 (patch)
tree: 6bd55dbad7691f212bdc390bc6a683d39eacd9fe /Lib/robotparser.py
parent: 73e618734df1f50ce3ff1c093f5a823d04d74ee1 (diff)
download: cpython-d22368ffb368198320d29518264a64a87b4f9b03.zip
cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.gz
cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.bz2
1 files changed, 6 insertions, 16 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py
index bfc0739..aace3a4 100644
--- a/Lib/robotparser.py
+++ b/Lib/robotparser.py
@@ -4,7 +4,7 @@
 
     You can choose between two licenses when using this package:
     1) GNU GPLv2
-    2) PYTHON 2.0 OPEN SOURCE LICENSE
+    2) PSF license for Python 2.2
 
     The robots.txt Exclusion Protocol is implemented as specified in
     http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
@@ -42,7 +42,11 @@ class RobotFileParser:
     def read(self):
         opener = URLopener()
         f = opener.open(self.url)
-        lines = f.readlines()
+        lines = []
+        line = f.readline()
+        while line:
+            lines.append(line.strip())
+            line = f.readline()
         self.errcode = opener.errcode
         if self.errcode == 401 or self.errcode == 403:
             self.disallow_all = 1
@@ -63,7 +67,6 @@ class RobotFileParser:
         entry = Entry()
 
         for line in lines:
-            line = line.strip()
             linenumber = linenumber + 1
             if not line:
                 if state==1:
@@ -209,25 +212,12 @@ class URLopener(urllib.FancyURLopener):
     def __init__(self, *args):
         apply(urllib.FancyURLopener.__init__, (self,) + args)
         self.errcode = 200
-        self.tries = 0
-        self.maxtries = 10
 
     def http_error_default(self, url, fp, errcode, errmsg, headers):
         self.errcode = errcode
         return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
                                                         errmsg, headers)
 
-    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
-        self.tries += 1
-        if self.tries >= self.maxtries:
-            return self.http_error_default(url, fp, 500,
-                                           "Internal Server Error: Redirect Recursion",
-                                           headers)
-        result = urllib.FancyURLopener.http_error_302(self, url, fp, errcode,
-                                                      errmsg, headers, data)
-        self.tries = 0
-        return result
-
 def _check(a,b):
     if not b:
         ac = "access denied"
author	Martin v. Löwis <martin@v.loewis.de>	2002-03-18 10:41:20 (GMT)
committer	Martin v. Löwis <martin@v.loewis.de>	2002-03-18 10:41:20 (GMT)
commit	d22368ffb368198320d29518264a64a87b4f9b03 (patch)
tree	6bd55dbad7691f212bdc390bc6a683d39eacd9fe /Lib/robotparser.py
parent	73e618734df1f50ce3ff1c093f5a823d04d74ee1 (diff)
download	cpython-d22368ffb368198320d29518264a64a87b4f9b03.zip cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.gz cpython-d22368ffb368198320d29518264a64a87b4f9b03.tar.bz2