From 2c4810efa2421c1a3e0042888b71193a917b39c5 Mon Sep 17 00:00:00 2001
From: Senthil Kumaran <senthil@uthcode.com>
Date: Wed, 29 May 2013 05:58:47 -0700
Subject: #17403: urllib.parse.robotparser normalizes the urls before adding to
 ruleline. This helps in handling certain types invalid urls in a conservative
 manner.

---
 Lib/robotparser.py           |  1 +
 Lib/test/test_robotparser.py | 12 ++++++++++++
 Misc/NEWS                    |  4 ++++
 3 files changed, 17 insertions(+)

diff --git a/Lib/robotparser.py b/Lib/robotparser.py
index 1722863..ad3be94 100644
--- a/Lib/robotparser.py
+++ b/Lib/robotparser.py
@@ -160,6 +160,7 @@ class RuleLine:
         if path == '' and not allowance:
             # an empty value means allow all
             allowance = True
+        path = urlparse.urlunparse(urlparse.urlparse(path))
         self.path = urllib.quote(path)
         self.allowance = allowance
 
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index b3d4a46..651301b 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -228,6 +228,18 @@ bad = ['/some/path']
 
 RobotTest(15, doc, good, bad)
 
+# 16. Empty query (issue #17403). Normalizing the url first.
+doc = """
+User-agent: *
+Allow: /some/path?
+Disallow: /another/path?
+"""
+
+good = ['/some/path?']
+bad = ['/another/path?']
+
+RobotTest(16, doc, good, bad)
+
 
 class NetworkTestCase(unittest.TestCase):
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 00448c4..7e901f7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,6 +15,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
+  ruleline. This helps in handling certain types invalid urls in a conservative
+  manner. Patch contributed by Mher Movsisyan.
+
 - Implement inequality on weakref.WeakSet.
 
 - Issue #17981: Closed socket on error in SysLogHandler.
-- 
cgit v0.12