summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/shelve.py12
-rw-r--r--Lib/test/test_robotparser.py69
-rw-r--r--Lib/test/test_shelve.py15
-rw-r--r--Lib/urllib/robotparser.py5
4 files changed, 100 insertions, 1 deletions
diff --git a/Lib/shelve.py b/Lib/shelve.py
index d651b9e..c8d9cf5 100644
--- a/Lib/shelve.py
+++ b/Lib/shelve.py
@@ -64,6 +64,16 @@ import warnings
__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
+class _ClosedDict(collections.MutableMapping):
+ 'Marker for a closed dict. Access attempts raise a ValueError.'
+
+ def closed(self, *args):
+ raise ValueError('invalid operation on closed shelf')
+ __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed
+
+ def __repr__(self):
+ return '<Closed Dictionary>'
+
class Shelf(collections.MutableMapping):
"""Base class for shelf implementations.
@@ -127,7 +137,7 @@ class Shelf(collections.MutableMapping):
self.dict.close()
except AttributeError:
pass
- self.dict = 0
+ self.dict = _ClosedDict()
def __del__(self):
if not hasattr(self, 'writeback'):
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 2101918..9c47e31 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -136,6 +136,75 @@ bad = [] # Bug report says "/" should be denied, but that is not in the RFC
RobotTest(7, doc, good, bad)
+# From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364
+
+# 8.
+doc = """
+User-agent: Googlebot
+Allow: /folder1/myfile.html
+Disallow: /folder1/
+"""
+
+good = ['/folder1/myfile.html']
+bad = ['/folder1/anotherfile.html']
+
+RobotTest(8, doc, good, bad, agent="Googlebot")
+
+# 9. This file is incorrect because "Googlebot" is a substring of
+# "Googlebot-Mobile", so test 10 works just like test 9.
+doc = """
+User-agent: Googlebot
+Disallow: /
+
+User-agent: Googlebot-Mobile
+Allow: /
+"""
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(9, doc, good, bad, agent="Googlebot")
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(10, doc, good, bad, agent="Googlebot-Mobile")
+
+# 11. Get the order correct.
+doc = """
+User-agent: Googlebot-Mobile
+Allow: /
+
+User-agent: Googlebot
+Disallow: /
+"""
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(11, doc, good, bad, agent="Googlebot")
+
+good = ['/something.jpg']
+bad = []
+
+RobotTest(12, doc, good, bad, agent="Googlebot-Mobile")
+
+
+# 13. Google also got the order wrong in #8. You need to specify the
+# URLs from more specific to more general.
+doc = """
+User-agent: Googlebot
+Allow: /folder1/myfile.html
+Disallow: /folder1/
+"""
+
+good = ['/folder1/myfile.html']
+bad = ['/folder1/anotherfile.html']
+
+RobotTest(13, doc, good, bad, agent="googlebot")
+
+
+
class NetworkTestCase(unittest.TestCase):
def testPasswordProtectedSite(self):
diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py
index b2ed87d..9699043 100644
--- a/Lib/test/test_shelve.py
+++ b/Lib/test/test_shelve.py
@@ -47,6 +47,21 @@ class TestCase(unittest.TestCase):
for f in glob.glob(self.fn+"*"):
support.unlink(f)
+ def test_close(self):
+ d1 = {}
+ s = shelve.Shelf(d1, protocol=2, writeback=False)
+ s['key1'] = [1,2,3,4]
+ self.assertEqual(s['key1'], [1,2,3,4])
+ self.assertEqual(len(s), 1)
+ s.close()
+ self.assertRaises(ValueError, len, s)
+ try:
+ s['key1']
+ except ValueError:
+ pass
+ else:
+ self.fail('Closed shelf should not find a key')
+
def test_ascii_file_shelf(self):
s = shelve.open(self.fn, protocol=0)
try:
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index c55fb50..bafb611 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -76,6 +76,10 @@ class RobotFileParser:
We allow that a user-agent: line is not preceded by
one or more blank lines.
"""
+ # states:
+ # 0: start state
+ # 1: saw user-agent line
+ # 2: saw an allow or disallow line
state = 0
entry = Entry()
@@ -112,6 +116,7 @@ class RobotFileParser:
elif line[0] == "allow":
if state != 0:
entry.rulelines.append(RuleLine(line[1], True))
+ state = 2
if state == 2:
self.entries.append(entry)