diff options
| author | Georg Brandl <georg@python.org> | 2010-08-01 22:00:39 (GMT) |
|---|---|---|
| committer | Georg Brandl <georg@python.org> | 2010-08-01 22:00:39 (GMT) |
| commit | 0ba1f01adfe9132340324203e10671cf9b419b1c (patch) | |
| tree | e45d45f9f40fb5d6a649d9c5849baba18ec40fdf | |
| parent | 86edb140570f0e1761e6444d78d2a027d58dd240 (diff) | |
| download | cpython-0ba1f01adfe9132340324203e10671cf9b419b1c.zip cpython-0ba1f01adfe9132340324203e10671cf9b419b1c.tar.gz cpython-0ba1f01adfe9132340324203e10671cf9b419b1c.tar.bz2 | |
Merged revisions 83449 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/release27-maint
................
r83449 | georg.brandl | 2010-08-01 22:59:03 +0200 (So, 01 Aug 2010) | 9 lines
Merged revisions 83238 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r83238 | georg.brandl | 2010-07-29 19:55:01 +0200 (Do, 29 Jul 2010) | 1 line
#4108: the first default entry (User-agent: *) wins.
........
................
| -rw-r--r-- | Lib/robotparser.py | 6 | ||||
| -rw-r--r-- | Lib/test/test_robotparser.py | 14 | ||||
| -rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 21 insertions, 2 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 447563f..726854b 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -68,7 +68,9 @@ class RobotFileParser: def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last - self.default_entry = entry + if self.default_entry is None: + # the first default entry wins + self.default_entry = entry else: self.entries.append(entry) @@ -120,7 +122,7 @@ class RobotFileParser: entry.rulelines.append(RuleLine(line[1], True)) state = 2 if state == 2: - self.entries.append(entry) + self._add_entry(entry) def can_fetch(self, useragent, url): diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 431b8ff..650b603 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -202,6 +202,20 @@ bad = ['/folder1/anotherfile.html'] RobotTest(13, doc, good, bad, agent="googlebot") +# 14. For issue #4108 (obey first * entry) +doc = """ +User-agent: * +Disallow: /some/path + +User-agent: * +Disallow: /another/path +""" + +good = ['/another/path'] +bad = ['/some/path'] + +RobotTest(14, doc, good, bad) + class TestCase(unittest.TestCase): def runTest(self): @@ -35,6 +35,9 @@ Core and Builtins when turned into an exception: in this case the exception simply gets ignored. +- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *' + entries, consider the first one. + - Issue #9354: Provide getsockopt() in asyncore's file_wrapper. - In the unicode/str.format(), raise a ValueError when indexes to arguments are |
