summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorBerker Peksag <berker.peksag@gmail.com>2017-11-23 23:40:26 (GMT)
committerRaymond Hettinger <rhettinger@users.noreply.github.com>2017-11-23 23:40:26 (GMT)
commit3df02dbc8e197053105f9dffeae40b04ec66766e (patch)
tree81997c8cf3814de49d129bc0d79f4df84af659e9 /Lib
parent0858495a50e19defde786a4ec050ec182e920f46 (diff)
downloadcpython-3df02dbc8e197053105f9dffeae40b04ec66766e.zip
cpython-3df02dbc8e197053105f9dffeae40b04ec66766e.tar.gz
cpython-3df02dbc8e197053105f9dffeae40b04ec66766e.tar.bz2
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_robotparser.py9
-rw-r--r--Lib/urllib/robotparser.py9
2 files changed, 10 insertions, 8 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 5c1a571..75198b7 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -3,7 +3,6 @@ import os
import threading
import unittest
import urllib.robotparser
-from collections import namedtuple
from test import support
from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -87,6 +86,10 @@ class BaseRequestRateTest(BaseRobotTest):
self.parser.crawl_delay(agent), self.crawl_delay
)
if self.request_rate:
+ self.assertIsInstance(
+ self.parser.request_rate(agent),
+ urllib.robotparser.RequestRate
+ )
self.assertEqual(
self.parser.request_rate(agent).requests,
self.request_rate.requests
@@ -108,7 +111,7 @@ Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
"""
agent = 'figtree'
- request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
+ request_rate = urllib.robotparser.RequestRate(9, 30)
crawl_delay = 3
good = [('figtree', '/foo.html')]
bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -237,7 +240,7 @@ Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
"""
- request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
+ request_rate = urllib.robotparser.RequestRate(3, 15)
crawl_delay = 1
good = ['/', '/test.html']
bad = ['/cyberworld/map/index.html']
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 9dab4c1..daac29c 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -16,6 +16,9 @@ import urllib.request
__all__ = ["RobotFileParser"]
+RequestRate = collections.namedtuple("RequestRate", "requests seconds")
+
+
class RobotFileParser:
""" This class provides a set of methods to read, parse and answer
questions about a single robots.txt file.
@@ -136,11 +139,7 @@ class RobotFileParser:
# check if all values are sane
if (len(numbers) == 2 and numbers[0].strip().isdigit()
and numbers[1].strip().isdigit()):
- req_rate = collections.namedtuple('req_rate',
- 'requests seconds')
- entry.req_rate = req_rate
- entry.req_rate.requests = int(numbers[0])
- entry.req_rate.seconds = int(numbers[1])
+ entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
state = 2
if state == 2:
self._add_entry(entry)