diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2017-11-23 23:57:58 (GMT) |
---|---|---|
committer | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2017-11-23 23:57:58 (GMT) |
commit | ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03 (patch) | |
tree | 977839c6cf63c20540af78240c219ccd39a190cc /Lib | |
parent | a645b23ffc76073a2eb4e77b88cb7648cfc6ef77 (diff) | |
download | cpython-ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03.zip cpython-ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03.tar.gz cpython-ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03.tar.bz2 |
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)
(cherry picked from commit 3df02dbc8e197053105f9dffeae40b04ec66766e)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_robotparser.py | 9 | ||||
-rw-r--r-- | Lib/urllib/robotparser.py | 9 |
2 files changed, 10 insertions, 8 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 0f64ba8..e47344c 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -2,7 +2,6 @@ import io import os import unittest import urllib.robotparser -from collections import namedtuple from test import support from http.server import BaseHTTPRequestHandler, HTTPServer try: @@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest): self.parser.crawl_delay(agent), self.crawl_delay ) if self.request_rate: + self.assertIsInstance( + self.parser.request_rate(agent), + urllib.robotparser.RequestRate + ) self.assertEqual( self.parser.request_rate(agent).requests, self.request_rate.requests @@ -111,7 +114,7 @@ Disallow: /a%2fb.html Disallow: /%7ejoe/index.html """ agent = 'figtree' - request_rate = namedtuple('req_rate', 'requests seconds')(9, 30) + request_rate = urllib.robotparser.RequestRate(9, 30) crawl_delay = 3 good = [('figtree', '/foo.html')] bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', @@ -240,7 +243,7 @@ Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ """ - request_rate = namedtuple('req_rate', 'requests seconds')(3, 15) + request_rate = urllib.robotparser.RequestRate(3, 15) crawl_delay = 1 good = ['/', '/test.html'] bad = ['/cyberworld/map/index.html'] diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 9dab4c1..daac29c 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -16,6 +16,9 @@ import urllib.request __all__ = ["RobotFileParser"] +RequestRate = collections.namedtuple("RequestRate", "requests seconds") + + class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. @@ -136,11 +139,7 @@ class RobotFileParser: # check if all values are sane if (len(numbers) == 2 and numbers[0].strip().isdigit() and numbers[1].strip().isdigit()): - req_rate = collections.namedtuple('req_rate', - 'requests seconds') - entry.req_rate = req_rate - entry.req_rate.requests = int(numbers[0]) - entry.req_rate.seconds = int(numbers[1]) + entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1])) state = 2 if state == 2: self._add_entry(entry) |