summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2010-07-29 17:55:01 (GMT)
committerGeorg Brandl <georg@python.org>2010-07-29 17:55:01 (GMT)
commit0a0fc07d371a97c35a6fc7c5a9828af3930720a1 (patch)
tree97cb861846812937705102c0ec9acf3f6f1ed4fa /Lib
parent70120e202d97826a83e16472d43f643ed82a9275 (diff)
downloadcpython-0a0fc07d371a97c35a6fc7c5a9828af3930720a1.zip
cpython-0a0fc07d371a97c35a6fc7c5a9828af3930720a1.tar.gz
cpython-0a0fc07d371a97c35a6fc7c5a9828af3930720a1.tar.bz2
#4108: the first default entry (User-agent: *) wins.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_robotparser.py14
-rw-r--r--Lib/urllib/robotparser.py6
2 files changed, 18 insertions, 2 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 9d30405..fd00706 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -216,6 +216,20 @@ bad = ['/some/path?name=value']
RobotTest(14, doc, good, bad)
+# 15. For issue #4108 (obey first * entry)
+doc = """
+User-agent: *
+Disallow: /some/path
+
+User-agent: *
+Disallow: /another/path
+"""
+
+good = ['/another/path']
+bad = ['/some/path']
+
+RobotTest(15, doc, good, bad)
+
class NetworkTestCase(unittest.TestCase):
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 30baa05..75be4af 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -66,7 +66,9 @@ class RobotFileParser:
def _add_entry(self, entry):
if "*" in entry.useragents:
# the default entry is considered last
- self.default_entry = entry
+ if self.default_entry is None:
+ # the first default entry wins
+ self.default_entry = entry
else:
self.entries.append(entry)
@@ -118,7 +120,7 @@ class RobotFileParser:
entry.rulelines.append(RuleLine(line[1], True))
state = 2
if state == 2:
- self.entries.append(entry)
+ self._add_entry(entry)
def can_fetch(self, useragent, url):