diff options
author | Georg Brandl <georg@python.org> | 2010-08-01 20:57:27 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2010-08-01 20:57:27 (GMT) |
commit | 745e86b3f81463ab1a18e63374f27a522a0a784f (patch) | |
tree | 7d176a591a23c53d5d80c7f900b8c4a998617e29 | |
parent | 5f6f6eb0bff52841ee18e5c6a96fe3600f44693d (diff) | |
download | cpython-745e86b3f81463ab1a18e63374f27a522a0a784f.zip cpython-745e86b3f81463ab1a18e63374f27a522a0a784f.tar.gz cpython-745e86b3f81463ab1a18e63374f27a522a0a784f.tar.bz2 |
Merged revisions 83238 via svnmerge from
svn+ssh://svn.python.org/python/branches/py3k
........
r83238 | georg.brandl | 2010-07-29 19:55:01 +0200 (Do, 29 Jul 2010) | 1 line
#4108: the first default entry (User-agent: *) wins.
........
-rw-r--r-- | Lib/test/test_robotparser.py | 14 | ||||
-rw-r--r-- | Lib/urllib/robotparser.py | 6 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 21 insertions, 2 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 9d30405..fd00706 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -216,6 +216,20 @@ bad = ['/some/path?name=value'] RobotTest(14, doc, good, bad) +# 15. For issue #4108 (obey first * entry) +doc = """ +User-agent: * +Disallow: /some/path + +User-agent: * +Disallow: /another/path +""" + +good = ['/another/path'] +bad = ['/some/path'] + +RobotTest(15, doc, good, bad) + class NetworkTestCase(unittest.TestCase): diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 30baa05..75be4af 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -66,7 +66,9 @@ class RobotFileParser: def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last - self.default_entry = entry + if self.default_entry is None: + # the first default entry wins + self.default_entry = entry else: self.entries.append(entry) @@ -118,7 +120,7 @@ class RobotFileParser: entry.rulelines.append(RuleLine(line[1], True)) state = 2 if state == 2: - self.entries.append(entry) + self._add_entry(entry) def can_fetch(self, useragent, url): @@ -84,6 +84,9 @@ C-API Library ------- +- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *' + entries, consider the first one. + - Issue #8397: Raise an error when attempting to mix iteration and regular reads on a BZ2File object, rather than returning incorrect results. |