7 files changed, 329 insertions, 93 deletions
diff --git a/Doc/lib/libstring.tex b/Doc/lib/libstring.tex
index 48d7fc4..2824aeb 100644
--- a/Doc/lib/libstring.tex
+++ b/Doc/lib/libstring.tex
@@ -4,11 +4,23 @@
 \declaremodule{standard}{string}
 \modulesynopsis{Common string operations.}
 
+The \module{string} package contains a number of useful constants and classes,
+as well as some deprecated legacy functions that are also available as methods
+on strings.  See the module \refmodule{re}\refstmodindex{re} for string
+functions based on regular expressions.
 
-This module defines some constants useful for checking character
-classes and some useful string functions.  See the module
-\refmodule{re}\refstmodindex{re} for string functions based on regular
-expressions.
+In general, all of these objects are exposed directly in the \module{string}
+package so users need only import the \module{string} package to begin using
+these constants, classes, and functions.
+
+\begin{notice}
+Starting with Python 2.4, the traditional \module{string} module was turned
+into a package, however backward compatibility with existing code has been
+retained.  Code using the \module{string} module that worked prior to Python
+2.4 should continue to work unchanged.
+\end{notice}
+
+\subsection{String constants}
 
 The constants defined in this module are:
 
@@ -86,11 +98,113 @@ The constants defined in this module are:
   is undefined.
 \end{datadesc}
 
+\subsection{Template strings}
+
+Templates are Unicode strings that can be used to provide string substitutions
+as described in \pep{292}.  There is a \class{Template} class that is a
+subclass of \class{unicode}, overriding the default \method{__mod__()} method.
+Instead of the normal \samp{\%}-based substitutions, Template strings support
+\samp{\$}-based substitutions, using the following rules:
+
+\begin{itemize}
+\item \samp{\$\$} is an escape; it is replaced with a single \samp{\$}.
+
+\item \samp{\$identifier} names a substitution placeholder matching a mapping
+       key of "identifier".  By default, "identifier" must spell a Python
+       identifier.  The first non-identifier character after the \samp{\$}
+       character terminates this placeholder specification.
+
+\item \samp{\$\{identifier\}} is equivalent to \samp{\$identifier}.  It is
+      required when valid identifier characters follow the placeholder but are
+      not part of the placeholder, e.g. "\$\{noun\}ification".
+\end{itemize}
+
+Any other appearance of \samp{\$} in the string will result in a
+\exception{ValueError} being raised.
+
+Template strings are used just like normal strings, in that the modulus
+operator is used to interpolate a dictionary of values into a Template string,
+e.g.:
+
+\begin{verbatim}
+>>> from string import Template
+>>> s = Template('$who likes $what')
+>>> print s % dict(who='tim', what='kung pao')
+tim likes kung pao
+>>> Template('Give $who $100') % dict(who='tim')
+Traceback (most recent call last):
+[...]
+ValueError: Invalid placeholder at index 10
+\end{verbatim}
+
+There is also a \class{SafeTemplate} class, derived from \class{Template}
+which acts the same as \class{Template}, except that if placeholders are
+missing in the interpolation dictionary, no \exception{KeyError} will be
+raised.  Instead the original placeholder (with or without the braces, as
+appropriate) will be used:
+
+\begin{verbatim}
+>>> from string import SafeTemplate
+>>> s = SafeTemplate('$who likes $what for ${meal}')
+>>> print s % dict(who='tim')
+tim likes $what for ${meal}
+\end{verbatim}
+
+The values in the mapping will automatically be converted to Unicode strings,
+using the built-in \function{unicode()} function, which will be called without
+optional arguments \var{encoding} or \var{errors}.
+
+Advanced usage: you can derive subclasses of \class{Template} or
+\class{SafeTemplate} to use application-specific placeholder rules.  To do
+this, you override the class attribute \member{pattern}; the value must be a
+compiled regular expression object with four named capturing groups.  The
+capturing groups correspond to the rules given above, along with the invalid
+placeholder rule:
+
+\begin{itemize}
+\item \var{escaped} -- This group matches the escape sequence, i.e. \samp{\$\$}
+      in the default pattern.
+\item \var{named} -- This group matches the unbraced placeholder name; it
+      should not include the \samp{\$} in capturing group.
+\item \var{braced} -- This group matches the brace delimited placeholder name;
+      it should not include either the \samp{\$} or braces in the capturing
+      group.
+\item \var{bogus} -- This group matches any other \samp{\$}.  It usually just
+      matches a single \samp{\$} and should appear last.
+\end{itemize}
+
+\subsection{String functions}
+
+The following functions are available to operate on string and Unicode
+objects.  They are not available as string methods.
+
+\begin{funcdesc}{capwords}{s}
+  Split the argument into words using \function{split()}, capitalize
+  each word using \function{capitalize()}, and join the capitalized
+  words using \function{join()}.  Note that this replaces runs of
+  whitespace characters by a single space, and removes leading and
+  trailing whitespace.
+\end{funcdesc}
+
+\begin{funcdesc}{maketrans}{from, to}
+  Return a translation table suitable for passing to
+  \function{translate()} or \function{regex.compile()}, that will map
+  each character in \var{from} into the character at the same position
+  in \var{to}; \var{from} and \var{to} must have the same length.
+
+  \warning{Don't use strings derived from \constant{lowercase}
+  and \constant{uppercase} as arguments; in some locales, these don't have
+  the same length.  For case conversions, always use
+  \function{lower()} and \function{upper()}.}
+\end{funcdesc}
 
-Many of the functions provided by this module are also defined as
-methods of string and Unicode objects; see ``String Methods'' (section
-\ref{string-methods}) for more information on those.
-The functions defined in this module are:
+\subsection{Deprecated string functions}
+
+The following list of functions are also defined as methods of string and
+Unicode objects; see ``String Methods'' (section
+\ref{string-methods}) for more information on those.  You should consider
+these functions as deprecated, although they will not be removed until Python
+3.0.  The functions defined in this module are:
 
 \begin{funcdesc}{atof}{s}
   \deprecated{2.0}{Use the \function{float()} built-in function.}
@@ -138,14 +252,6 @@ The functions defined in this module are:
   Return a copy of \var{word} with only its first character capitalized.
 \end{funcdesc}
 
-\begin{funcdesc}{capwords}{s}
-  Split the argument into words using \function{split()}, capitalize
-  each word using \function{capitalize()}, and join the capitalized
-  words using \function{join()}.  Note that this replaces runs of
-  whitespace characters by a single space, and removes leading and
-  trailing whitespace.
-\end{funcdesc}
-
 \begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
   Expand tabs in a string, i.e.\ replace them by one or more spaces,
   depending on the current column and the given tab size.  The column
@@ -188,18 +294,6 @@ The functions defined in this module are:
   lower case.
 \end{funcdesc}
 
-\begin{funcdesc}{maketrans}{from, to}
-  Return a translation table suitable for passing to
-  \function{translate()} or \function{regex.compile()}, that will map
-  each character in \var{from} into the character at the same position
-  in \var{to}; \var{from} and \var{to} must have the same length.
-
-  \warning{Don't use strings derived from \constant{lowercase}
-  and \constant{uppercase} as arguments; in some locales, these don't have
-  the same length.  For case conversions, always use
-  \function{lower()} and \function{upper()}.}
-\end{funcdesc}
-
 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
   Return a list of the words of the string \var{s}.  If the optional
   second argument \var{sep} is absent or \code{None}, the words are
diff --git a/Lib/sre.py b/Lib/sre.py
index bb4bc16..8bf0fad 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -105,9 +105,6 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
 
 __version__ = "2.2.1"
 
-# this module works under 1.5.2 and later.  don't use string methods
-import string
-
 # flags
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
@@ -201,7 +198,7 @@ def escape(pattern):
                 s[i] = "\\000"
             else:
                 s[i] = "\\" + c
-    return _join(s, pattern)
+    return pattern[:0].join(s)
 
 # --------------------------------------------------------------------
 # internals
@@ -213,10 +210,6 @@ _pattern_type = type(sre_compile.compile("", 0))
 
 _MAXCACHE = 100
 
-def _join(seq, sep):
-    # internal: join into string having the same type as sep
-    return string.join(seq, sep[:0])
-
 def _compile(*key):
     # internal: compile pattern
     cachekey = (type(key[0]),) + key
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 002b195..1863f48 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -217,12 +217,11 @@ SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
 
 if __name__ == "__main__":
-    import string
     def dump(f, d, prefix):
         items = d.items()
         items.sort(key=lambda a: a[1])
         for k, v in items:
-            f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+            f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
     f = open("sre_constants.h", "w")
     f.write("""\
 /*
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 94d526d..5c4298a 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -12,8 +12,7 @@
 
 # XXX: show string offset and offending character for all errors
 
-# this module works under 1.5.2 and later.  don't use string methods
-import string, sys
+import sys
 
 from sre_constants import *
 
@@ -63,13 +62,6 @@ FLAGS = {
     "u": SRE_FLAG_UNICODE,
 }
 
-# figure out best way to convert hex/octal numbers to integers
-try:
-    int("10", 8)
-    atoi = int # 2.0 and later
-except TypeError:
-    atoi = string.atoi # 1.5.2
-
 class Pattern:
     # master pattern object.  keeps track of global attributes
     def __init__(self):
@@ -233,7 +225,7 @@ def isname(name):
 def _group(escape, groups):
     # check if the escape string represents a valid group
     try:
-        gid = atoi(escape[1:])
+        gid = int(escape[1:])
         if gid and gid < groups:
             return gid
     except ValueError:
@@ -256,13 +248,13 @@ def _class_escape(source, escape):
             escape = escape[2:]
             if len(escape) != 2:
                 raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, atoi(escape, 16) & 0xff
+            return LITERAL, int(escape, 16) & 0xff
         elif escape[1:2] in OCTDIGITS:
             # octal escape (up to three digits)
             while source.next in OCTDIGITS and len(escape) < 5:
                 escape = escape + source.get()
             escape = escape[1:]
-            return LITERAL, atoi(escape, 8) & 0xff
+            return LITERAL, int(escape, 8) & 0xff
         if len(escape) == 2:
             return LITERAL, ord(escape[1])
     except ValueError:
@@ -284,12 +276,12 @@ def _escape(source, escape, state):
                 escape = escape + source.get()
             if len(escape) != 4:
                 raise ValueError
-            return LITERAL, atoi(escape[2:], 16) & 0xff
+            return LITERAL, int(escape[2:], 16) & 0xff
         elif escape[1:2] == "0":
             # octal escape
             while source.next in OCTDIGITS and len(escape) < 4:
                 escape = escape + source.get()
-            return LITERAL, atoi(escape[1:], 8) & 0xff
+            return LITERAL, int(escape[1:], 8) & 0xff
         elif escape[1:2] in DIGITS:
             # octal escape *or* decimal group reference (sigh)
             if source.next in DIGITS:
@@ -298,7 +290,7 @@ def _escape(source, escape, state):
                     source.next in OCTDIGITS):
                     # got three octal digits; this is an octal escape
                     escape = escape + source.get()
-                    return LITERAL, atoi(escape[1:], 8) & 0xff
+                    return LITERAL, int(escape[1:], 8) & 0xff
             # got at least one decimal digit; this is a group reference
             group = _group(escape, state.groups)
             if group:
@@ -503,9 +495,9 @@ def _parse(source, state):
                     source.seek(here)
                     continue
                 if lo:
-                    min = atoi(lo)
+                    min = int(lo)
                 if hi:
-                    max = atoi(hi)
+                    max = int(hi)
                 if max < min:
                     raise error, "bad repeat interval"
             else:
@@ -617,7 +609,7 @@ def _parse(source, state):
                             raise error, "unknown group name"
                     else:
                         try:
-                            condgroup = atoi(condname)
+                            condgroup = int(condname)
                         except ValueError:
                             raise error, "bad character in group name"
                 else:
@@ -730,7 +722,7 @@ def parse_template(source, pattern):
                 if not name:
                     raise error, "bad group name"
                 try:
-                    index = atoi(name)
+                    index = int(name)
                 except ValueError:
                     if not isname(name):
                         raise error, "bad character in group name"
@@ -754,7 +746,7 @@ def parse_template(source, pattern):
                         break
                 if not code:
                     this = this[1:]
-                    code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff)
+                    code = LITERAL, makechar(int(this[-6:], 8) & 0xff)
                 if code[0] is LITERAL:
                     literal(code[1])
                 else:
@@ -793,4 +785,4 @@ def expand_template(template, match):
                 raise IndexError
     except IndexError:
         raise error, "empty group"
-    return string.join(literals, sep)
+    return sep.join(literals)
diff --git a/Lib/string.py b/Lib/string.py
index bc10c20..d166f38 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -35,10 +35,116 @@ printable = digits + letters + punctuation + whitespace
 
 # Case conversion helpers
 # Use str to convert Unicode literal in case of -U
+# Note that Cookie.py bogusly uses _idmap :(
 l = map(chr, xrange(256))
 _idmap = str('').join(l)
 del l
 
+# Functions which aren't available as string methods.
+
+# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
+# See also regsub.capwords().
+def capwords(s, sep=None):
+    """capwords(s, [sep]) -> string
+
+    Split the argument into words using split, capitalize each
+    word using capitalize, and join the capitalized words using
+    join. Note that this replaces runs of whitespace characters by
+    a single space.
+
+    """
+    return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
+
+
+# Construct a translation string
+_idmapL = None
+def maketrans(fromstr, tostr):
+    """maketrans(frm, to) -> string
+
+    Return a translation table (a string of 256 bytes long)
+    suitable for use in string.translate.  The strings frm and to
+    must be of the same length.
+
+    """
+    if len(fromstr) != len(tostr):
+        raise ValueError, "maketrans arguments must have same length"
+    global _idmapL
+    if not _idmapL:
+        _idmapL = map(None, _idmap)
+    L = _idmapL[:]
+    fromstr = map(ord, fromstr)
+    for i in range(len(fromstr)):
+        L[fromstr[i]] = tostr[i]
+    return ''.join(L)
+
+
+
+import re as _re
+
+class Template(unicode):
+    """A string class for supporting $-substitutions."""
+    __slots__ = []
+
+    # Search for $$, $identifier, ${identifier}, and any bare $'s
+    pattern = _re.compile(r"""
+# Match exactly two $'s -- this is the escape sequence
+(?P<escaped>\${2})|
+# Match a $ followed by a Python identifier
+\$(?P<named>[_a-z][_a-z0-9]*)|
+# Match a $ followed by a brace delimited identifier
+\${(?P<braced>[_a-z][_a-z0-9]*)}|
+# Match any other $'s
+(?P<bogus>\$)
+""", _re.IGNORECASE | _re.VERBOSE)
+
+    def __mod__(self, mapping):
+        def convert(mo):
+            groups = mo.groupdict()
+            if groups.get('escaped') is not None:
+                return '$'
+            if groups.get('bogus') is not None:
+                raise ValueError('Invalid placeholder at index %d' %
+                                 mo.start('bogus'))
+            val = mapping[groups.get('named') or groups.get('braced')]
+            return unicode(val)
+        return self.pattern.sub(convert, self)
+
+
+class SafeTemplate(Template):
+    """A string class for supporting $-substitutions.
+
+    This class is 'safe' in the sense that you will never get KeyErrors if
+    there are placeholders missing from the interpolation dictionary.  In that
+    case, you will get the original placeholder in the value string.
+    """
+    __slots__ = []
+
+    def __mod__(self, mapping):
+        def convert(mo):
+            groups = mo.groupdict()
+            if groups.get('escaped') is not None:
+                return '$'
+            if groups.get('bogus') is not None:
+                raise ValueError('Invalid placeholder at index %d' %
+                                 mo.start('bogus'))
+            named = groups.get('named')
+            if named is not None:
+                try:
+                    return unicode(mapping[named])
+                except KeyError:
+                    return '$' + named
+            braced = groups.get('braced')
+            try:
+                return unicode(mapping[braced])
+            except KeyError:
+                return '${' + braced + '}'
+        return self.pattern.sub(convert, self)
+
+
+
+# NOTE: Everything below here is deprecated.  Use string methods instead.
+# This stuff will go away in Python 3.0.
+
 # Backward compatible names for exceptions
 index_error = ValueError
 atoi_error = ValueError
@@ -336,40 +442,6 @@ def capitalize(s):
     """
     return s.capitalize()
 
-# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
-# See also regsub.capwords().
-def capwords(s, sep=None):
-    """capwords(s, [sep]) -> string
-
-    Split the argument into words using split, capitalize each
-    word using capitalize, and join the capitalized words using
-    join. Note that this replaces runs of whitespace characters by
-    a single space.
-
-    """
-    return join(map(capitalize, s.split(sep)), sep or ' ')
-
-# Construct a translation string
-_idmapL = None
-def maketrans(fromstr, tostr):
-    """maketrans(frm, to) -> string
-
-    Return a translation table (a string of 256 bytes long)
-    suitable for use in string.translate.  The strings frm and to
-    must be of the same length.
-
-    """
-    if len(fromstr) != len(tostr):
-        raise ValueError, "maketrans arguments must have same length"
-    global _idmapL
-    if not _idmapL:
-        _idmapL = map(None, _idmap)
-    L = _idmapL[:]
-    fromstr = map(ord, fromstr)
-    for i in range(len(fromstr)):
-        L[fromstr[i]] = tostr[i]
-    return join(L, "")
-
 # Substring replacement (global)
 def replace(s, old, new, maxsplit=-1):
     """replace (str, old, new[, maxsplit]) -> string
diff --git a/Lib/test/test_pep292.py b/Lib/test/test_pep292.py
new file mode 100644
index 0000000..7eff309
--- /dev/null
+++ b/Lib/test/test_pep292.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2004 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
+# License: http://www.opensource.org/licenses/PythonSoftFoundation.php
+
+import unittest
+from string import Template, SafeTemplate
+
+class TestTemplate(unittest.TestCase):
+
+    def test_regular_templates(self):
+        s = Template('$who likes to eat a bag of $what worth $$100')
+        self.assertEqual(s % dict(who='tim', what='ham'),
+                         'tim likes to eat a bag of ham worth $100')
+        self.assertRaises(KeyError, lambda s, d: s % d, s, dict(who='tim'))
+
+    def test_regular_templates_with_braces(self):
+        s = Template('$who likes ${what} for ${meal}')
+        self.assertEqual(s % dict(who='tim', what='ham', meal='dinner'),
+                         'tim likes ham for dinner')
+        self.assertRaises(KeyError, lambda s, d: s % d,
+                          s, dict(who='tim', what='ham'))
+
+    def test_escapes(self):
+        eq = self.assertEqual
+        s = Template('$who likes to eat a bag of $$what worth $$100')
+        eq(s % dict(who='tim', what='ham'),
+           'tim likes to eat a bag of $what worth $100')
+        s = Template('$who likes $$')
+        eq(s % dict(who='tim', what='ham'), 'tim likes $')
+
+    def test_percents(self):
+        s = Template('%(foo)s $foo ${foo}')
+        self.assertEqual(s % dict(foo='baz'), '%(foo)s baz baz')
+        s = SafeTemplate('%(foo)s $foo ${foo}')
+        self.assertEqual(s % dict(foo='baz'), '%(foo)s baz baz')
+
+    def test_stringification(self):
+        s = Template('tim has eaten $count bags of ham today')
+        self.assertEqual(s % dict(count=7),
+                         'tim has eaten 7 bags of ham today')
+        s = SafeTemplate('tim has eaten $count bags of ham today')
+        self.assertEqual(s % dict(count=7),
+                         'tim has eaten 7 bags of ham today')
+        s = SafeTemplate('tim has eaten ${count} bags of ham today')
+        self.assertEqual(s % dict(count=7),
+                         'tim has eaten 7 bags of ham today')
+
+    def test_SafeTemplate(self):
+        eq = self.assertEqual
+        s = SafeTemplate('$who likes ${what} for ${meal}')
+        eq(s % dict(who='tim'),
+           'tim likes ${what} for ${meal}')
+        eq(s % dict(what='ham'),
+           '$who likes ham for ${meal}')
+        eq(s % dict(what='ham', meal='dinner'),
+           '$who likes ham for dinner')
+        eq(s % dict(who='tim', what='ham'),
+           'tim likes ham for ${meal}')
+        eq(s % dict(who='tim', what='ham', meal='dinner'),
+           'tim likes ham for dinner')
+
+    def test_invalid_placeholders(self):
+        raises = self.assertRaises
+        s = Template('$who likes $')
+        raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+        s = Template('$who likes ${what)')
+        raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+        s = Template('$who likes $100')
+        raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestTemplate))
+    return suite
+
+
+def test_main():
+    from test import test_support
+    test_support.run_suite(suite())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 56c687f..be445c9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -57,6 +57,8 @@ Extension modules
 Library
 -------
 
+- PEP 292 classes Template and SafeTemplate are added to the string module.
+
 - tarfile now generates GNU tar files by default.
 
 - HTTPResponse has now a getheaders method.