diff options
author | Brett Cannon <bcannon@gmail.com> | 2004-10-06 02:11:37 (GMT) |
---|---|---|
committer | Brett Cannon <bcannon@gmail.com> | 2004-10-06 02:11:37 (GMT) |
commit | 4f35c71543f5dc0023590676c21b18b1a3a5986f (patch) | |
tree | 64e2553be58da706224e0df4856e9d9156d4245b | |
parent | 579b3e2416cd403b634e2b41a946719630679fbc (diff) | |
download | cpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.zip cpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.tar.gz cpython-4f35c71543f5dc0023590676c21b18b1a3a5986f.tar.bz2 |
Locale data that contains regex metacharacters are now properly escaped.
Closes bug #1039270.
-rw-r--r-- | Lib/_strptime.py | 6 | ||||
-rw-r--r-- | Lib/test/test_strptime.py | 13 | ||||
-rw-r--r-- | Misc/NEWS | 5 |
3 files changed, 22 insertions, 2 deletions
diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 22455ae..d93139e 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -15,6 +15,7 @@ import locale import calendar from re import compile as re_compile from re import IGNORECASE +from re import escape as re_escape from datetime import date as datetime_date try: from thread import allocate_lock as _thread_allocate_lock @@ -232,7 +233,7 @@ class TimeRE(dict): return '' to_convert = to_convert[:] to_convert.sort(key=len, reverse=True) - regex = '|'.join(to_convert) + regex = '|'.join(re_escape(stuff) for stuff in to_convert) regex = '(?P<%s>%s' % (directive, regex) return '%s)' % regex @@ -245,7 +246,8 @@ class TimeRE(dict): """ processed_format = '' # The sub() call escapes all characters that might be misconstrued - # as regex syntax. + # as regex syntax. Cannot use re.escape since we have to deal with + # format directives (%m, etc.). regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") format = regex_chars.sub(r"\\\1", format) whitespace_replacement = re_compile('\s+') diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 5aaa9a7..17f339b 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase): found = compiled_re.match("\w+ 10") self.failUnless(found, "Escaping failed of format '\w+ 10'") + def test_locale_data_w_regex_metacharacters(self): + # Check that if locale data contains regex metacharacters they are + # escaped properly. + # Discovered by bug #1039270 . + locale_time = _strptime.LocaleTime() + locale_time.timezone = (frozenset(("utc", "gmt", + "Tokyo (standard time)")), + frozenset("Tokyo (daylight time)")) + time_re = _strptime.TimeRE(locale_time) + self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"), + "locale data that contains regex metacharacters is not" + " properly escaped") + class StrptimeTests(unittest.TestCase): """Tests for _strptime.strptime.""" @@ -34,6 +34,11 @@ Extension modules Library ------- +- time.strptime() now properly escapes timezones and all other locale-specific + strings for regex-specific symbols. Was breaking under Japanese Windows when + the timezone was specified as "Tokyo (standard time)". + Closes bug #1039270. + - Updates for the email package: + All deprecated APIs that in email 2.x issued warnings have been removed: _encoder argument to the MIMEText constructor, Message.add_payload(), |