diff options
author | Berker Peksag <berker.peksag@gmail.com> | 2017-11-23 23:40:26 (GMT) |
---|---|---|
committer | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2017-11-23 23:40:26 (GMT) |
commit | 3df02dbc8e197053105f9dffeae40b04ec66766e (patch) | |
tree | 81997c8cf3814de49d129bc0d79f4df84af659e9 /Lib | |
parent | 0858495a50e19defde786a4ec050ec182e920f46 (diff) | |
download | cpython-3df02dbc8e197053105f9dffeae40b04ec66766e.zip cpython-3df02dbc8e197053105f9dffeae40b04ec66766e.tar.gz cpython-3df02dbc8e197053105f9dffeae40b04ec66766e.tar.bz2 |
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_robotparser.py | 9 | ||||
-rw-r--r-- | Lib/urllib/robotparser.py | 9 |
2 files changed, 10 insertions, 8 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 5c1a571..75198b7 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -3,7 +3,6 @@ import os import threading import unittest import urllib.robotparser -from collections import namedtuple from test import support from http.server import BaseHTTPRequestHandler, HTTPServer @@ -87,6 +86,10 @@ class BaseRequestRateTest(BaseRobotTest): self.parser.crawl_delay(agent), self.crawl_delay ) if self.request_rate: + self.assertIsInstance( + self.parser.request_rate(agent), + urllib.robotparser.RequestRate + ) self.assertEqual( self.parser.request_rate(agent).requests, self.request_rate.requests @@ -108,7 +111,7 @@ Disallow: /a%2fb.html Disallow: /%7ejoe/index.html """ agent = 'figtree' - request_rate = namedtuple('req_rate', 'requests seconds')(9, 30) + request_rate = urllib.robotparser.RequestRate(9, 30) crawl_delay = 3 good = [('figtree', '/foo.html')] bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', @@ -237,7 +240,7 @@ Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ """ - request_rate = namedtuple('req_rate', 'requests seconds')(3, 15) + request_rate = urllib.robotparser.RequestRate(3, 15) crawl_delay = 1 good = ['/', '/test.html'] bad = ['/cyberworld/map/index.html'] diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 9dab4c1..daac29c 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -16,6 +16,9 @@ import urllib.request __all__ = ["RobotFileParser"] +RequestRate = collections.namedtuple("RequestRate", "requests seconds") + + class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. @@ -136,11 +139,7 @@ class RobotFileParser: # check if all values are sane if (len(numbers) == 2 and numbers[0].strip().isdigit() and numbers[1].strip().isdigit()): - req_rate = collections.namedtuple('req_rate', - 'requests seconds') - entry.req_rate = req_rate - entry.req_rate.requests = int(numbers[0]) - entry.req_rate.seconds = int(numbers[1]) + entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1])) state = 2 if state == 2: self._add_entry(entry) |