summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2010-08-01 22:00:39 (GMT)
committerGeorg Brandl <georg@python.org>2010-08-01 22:00:39 (GMT)
commit0ba1f01adfe9132340324203e10671cf9b419b1c (patch)
treee45d45f9f40fb5d6a649d9c5849baba18ec40fdf
parent86edb140570f0e1761e6444d78d2a027d58dd240 (diff)
downloadcpython-0ba1f01adfe9132340324203e10671cf9b419b1c.zip
cpython-0ba1f01adfe9132340324203e10671cf9b419b1c.tar.gz
cpython-0ba1f01adfe9132340324203e10671cf9b419b1c.tar.bz2
Merged revisions 83449 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/release27-maint ................ r83449 | georg.brandl | 2010-08-01 22:59:03 +0200 (So, 01 Aug 2010) | 9 lines Merged revisions 83238 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r83238 | georg.brandl | 2010-07-29 19:55:01 +0200 (Do, 29 Jul 2010) | 1 line #4108: the first default entry (User-agent: *) wins. ........ ................
-rw-r--r--Lib/robotparser.py6
-rw-r--r--Lib/test/test_robotparser.py14
-rw-r--r--Misc/NEWS3
3 files changed, 21 insertions, 2 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py
index 447563f..726854b 100644
--- a/Lib/robotparser.py
+++ b/Lib/robotparser.py
@@ -68,7 +68,9 @@ class RobotFileParser:
def _add_entry(self, entry):
if "*" in entry.useragents:
# the default entry is considered last
- self.default_entry = entry
+ if self.default_entry is None:
+ # the first default entry wins
+ self.default_entry = entry
else:
self.entries.append(entry)
@@ -120,7 +122,7 @@ class RobotFileParser:
entry.rulelines.append(RuleLine(line[1], True))
state = 2
if state == 2:
- self.entries.append(entry)
+ self._add_entry(entry)
def can_fetch(self, useragent, url):
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 431b8ff..650b603 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -202,6 +202,20 @@ bad = ['/folder1/anotherfile.html']
RobotTest(13, doc, good, bad, agent="googlebot")
+# 14. For issue #4108 (obey first * entry)
+doc = """
+User-agent: *
+Disallow: /some/path
+
+User-agent: *
+Disallow: /another/path
+"""
+
+good = ['/another/path']
+bad = ['/some/path']
+
+RobotTest(14, doc, good, bad)
+
class TestCase(unittest.TestCase):
def runTest(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index b0e66a3..37b143b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -35,6 +35,9 @@ Core and Builtins
when turned into an exception: in this case the exception simply
gets ignored.
+- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *'
+ entries, consider the first one.
+
- Issue #9354: Provide getsockopt() in asyncore's file_wrapper.
- In the unicode/str.format(), raise a ValueError when indexes to arguments are