summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrett Cannon <bcannon@gmail.com>2004-10-06 02:11:37 (GMT)
committerBrett Cannon <bcannon@gmail.com>2004-10-06 02:11:37 (GMT)
commit4f35c71543f5dc0023590676c21b18b1a3a5986f (patch)
tree64e2553be58da706224e0df4856e9d9156d4245b
parent579b3e2416cd403b634e2b41a946719630679fbc (diff)
downloadcpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.zip
cpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.tar.gz
cpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.tar.bz2
Locale data that contains regex metacharacters are now properly escaped.
Closes bug #1039270.
-rw-r--r--Lib/_strptime.py6
-rw-r--r--Lib/test/test_strptime.py13
-rw-r--r--Misc/NEWS5
3 files changed, 22 insertions, 2 deletions
diff --git a/Lib/_strptime.py b/Lib/_strptime.py
index 22455ae..d93139e 100644
--- a/Lib/_strptime.py
+++ b/Lib/_strptime.py
@@ -15,6 +15,7 @@ import locale
import calendar
from re import compile as re_compile
from re import IGNORECASE
+from re import escape as re_escape
from datetime import date as datetime_date
try:
from thread import allocate_lock as _thread_allocate_lock
@@ -232,7 +233,7 @@ class TimeRE(dict):
return ''
to_convert = to_convert[:]
to_convert.sort(key=len, reverse=True)
- regex = '|'.join(to_convert)
+ regex = '|'.join(re_escape(stuff) for stuff in to_convert)
regex = '(?P<%s>%s' % (directive, regex)
return '%s)' % regex
@@ -245,7 +246,8 @@ class TimeRE(dict):
"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
- # as regex syntax.
+ # as regex syntax. Cannot use re.escape since we have to deal with
+ # format directives (%m, etc.).
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re_compile('\s+')
diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py
index 5aaa9a7..17f339b 100644
--- a/Lib/test/test_strptime.py
+++ b/Lib/test/test_strptime.py
@@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase):
found = compiled_re.match("\w+ 10")
self.failUnless(found, "Escaping failed of format '\w+ 10'")
+ def test_locale_data_w_regex_metacharacters(self):
+ # Check that if locale data contains regex metacharacters they are
+ # escaped properly.
+ # Discovered by bug #1039270 .
+ locale_time = _strptime.LocaleTime()
+ locale_time.timezone = (frozenset(("utc", "gmt",
+ "Tokyo (standard time)")),
+ frozenset("Tokyo (daylight time)"))
+ time_re = _strptime.TimeRE(locale_time)
+ self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"),
+ "locale data that contains regex metacharacters is not"
+ " properly escaped")
+
class StrptimeTests(unittest.TestCase):
"""Tests for _strptime.strptime."""
diff --git a/Misc/NEWS b/Misc/NEWS
index 7927036..81db100 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,11 @@ Extension modules
Library
-------
+- time.strptime() now properly escapes timezones and all other locale-specific
+ strings for regex-specific symbols. Was breaking under Japanese Windows when
+ the timezone was specified as "Tokyo (standard time)".
+ Closes bug #1039270.
+
- Updates for the email package:
+ All deprecated APIs that in email 2.x issued warnings have been removed:
_encoder argument to the MIMEText constructor, Message.add_payload(),