diff options
author | Tim Peters <tim.peters@gmail.com> | 2003-01-19 04:40:44 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2003-01-19 04:40:44 (GMT) |
commit | 80cebc16aa800ec5740e4b9c8b6508bae4cca434 (patch) | |
tree | 5c615159c90774cf9b787120762da86a902532cb /Lib | |
parent | 6550051691d604c728ed56e4acf90dc6535981f9 (diff) | |
download | cpython-80cebc16aa800ec5740e4b9c8b6508bae4cca434.zip cpython-80cebc16aa800ec5740e4b9c8b6508bae4cca434.tar.gz cpython-80cebc16aa800ec5740e4b9c8b6508bae4cca434.tar.bz2 |
SF patch 670194: Performance enhancement for _strptime.py.
From Brett Cannon. Mostly speedups via caching format string ->
compiled regexp.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_strptime.py | 56 |
1 files changed, 36 insertions, 20 deletions
diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 1694456..0863426 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -24,7 +24,6 @@ import locale import calendar from re import compile as re_compile from re import IGNORECASE -from string import whitespace as whitespace_string __author__ = "Brett Cannon" __email__ = "drifty@bigfoot.com" @@ -33,6 +32,17 @@ __all__ = ['strptime'] RegexpType = type(re_compile('')) +def _getlang(): + # Figure out what the current language is set to. + current_lang = locale.getlocale(locale.LC_TIME)[0] + if current_lang: + return current_lang + else: + current_lang = locale.getdefaultlocale()[0] + if current_lang: + return current_lang + else: + return '' class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -285,19 +295,9 @@ class LocaleTime(object): self.__timezone = self.__pad(time.tzname, 0) def __calc_lang(self): - # Set self.__lang by using locale.getlocale() or - # locale.getdefaultlocale(). If both turn up empty, set the attribute - # to ''. This is to stop calls to this method and to make sure - # strptime() can produce an re object correctly. - current_lang = locale.getlocale(locale.LC_TIME)[0] - if current_lang: - self.__lang = current_lang - else: - current_lang = locale.getdefaultlocale()[0] - if current_lang: - self.__lang = current_lang - else: - self.__lang = '' + # Set self.__lang by using __getlang(). + self.__lang = _getlang() + class TimeRE(dict): @@ -382,8 +382,8 @@ class TimeRE(dict): def pattern(self, format): """Return re pattern for the format string.""" processed_format = '' - for whitespace in whitespace_string: - format = format.replace(whitespace, r'\s*') + whitespace_replacement = re_compile('\s+') + format = whitespace_replacement.sub('\s*', format) while format.find('%') != -1: directive_index = format.index('%')+1 processed_format = "%s%s%s" % (processed_format, @@ -394,15 +394,31 @@ class TimeRE(dict): def compile(self, format): """Return a compiled re object for the format string.""" - format = "(?#%s)%s" % (self.locale_time.lang,format) return re_compile(self.pattern(format), IGNORECASE) +# Cached TimeRE; probably only need one instance ever so cache it for performance +_locale_cache = TimeRE() +# Cached regex objects; same reason as for TimeRE cache +_regex_cache = dict() def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): """Return a time struct based on the input data and the format string.""" - locale_time = LocaleTime() - compiled_re = TimeRE(locale_time).compile(format) - found = compiled_re.match(data_string) + global _locale_cache + global _regex_cache + locale_time = _locale_cache.locale_time + # If the language changes, caches are invalidated, so clear them + if locale_time.lang != _getlang(): + _locale_cache = TimeRE() + _regex_cache.clear() + format_regex = _regex_cache.get(format) + if not format_regex: + # Limit regex cache size to prevent major bloating of the module; + # The value 5 is arbitrary + if len(_regex_cache) > 5: + _regex_cache.clear() + format_regex = _locale_cache.compile(format) + _regex_cache[format] = format_regex + found = format_regex.match(data_string) if not found: raise ValueError("time data did not match format") year = 1900 |