summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libstring.tex150
-rw-r--r--Lib/sre.py9
-rw-r--r--Lib/sre_constants.py3
-rw-r--r--Lib/sre_parse.py34
-rw-r--r--Lib/string.py140
-rw-r--r--Lib/test/test_pep292.py84
-rw-r--r--Misc/NEWS2
7 files changed, 329 insertions, 93 deletions
diff --git a/Doc/lib/libstring.tex b/Doc/lib/libstring.tex
index 48d7fc4..2824aeb 100644
--- a/Doc/lib/libstring.tex
+++ b/Doc/lib/libstring.tex
@@ -4,11 +4,23 @@
\declaremodule{standard}{string}
\modulesynopsis{Common string operations.}
+The \module{string} package contains a number of useful constants and classes,
+as well as some deprecated legacy functions that are also available as methods
+on strings. See the module \refmodule{re}\refstmodindex{re} for string
+functions based on regular expressions.
-This module defines some constants useful for checking character
-classes and some useful string functions. See the module
-\refmodule{re}\refstmodindex{re} for string functions based on regular
-expressions.
+In general, all of these objects are exposed directly in the \module{string}
+package so users need only import the \module{string} package to begin using
+these constants, classes, and functions.
+
+\begin{notice}
+Starting with Python 2.4, the traditional \module{string} module was turned
+into a package, however backward compatibility with existing code has been
+retained. Code using the \module{string} module that worked prior to Python
+2.4 should continue to work unchanged.
+\end{notice}
+
+\subsection{String constants}
The constants defined in this module are:
@@ -86,11 +98,113 @@ The constants defined in this module are:
is undefined.
\end{datadesc}
+\subsection{Template strings}
+
+Templates are Unicode strings that can be used to provide string substitutions
+as described in \pep{292}. There is a \class{Template} class that is a
+subclass of \class{unicode}, overriding the default \method{__mod__()} method.
+Instead of the normal \samp{\%}-based substitutions, Template strings support
+\samp{\$}-based substitutions, using the following rules:
+
+\begin{itemize}
+\item \samp{\$\$} is an escape; it is replaced with a single \samp{\$}.
+
+\item \samp{\$identifier} names a substitution placeholder matching a mapping
+ key of "identifier". By default, "identifier" must spell a Python
+ identifier. The first non-identifier character after the \samp{\$}
+ character terminates this placeholder specification.
+
+\item \samp{\$\{identifier\}} is equivalent to \samp{\$identifier}. It is
+ required when valid identifier characters follow the placeholder but are
+ not part of the placeholder, e.g. "\$\{noun\}ification".
+\end{itemize}
+
+Any other appearance of \samp{\$} in the string will result in a
+\exception{ValueError} being raised.
+
+Template strings are used just like normal strings, in that the modulus
+operator is used to interpolate a dictionary of values into a Template string,
+e.g.:
+
+\begin{verbatim}
+>>> from string import Template
+>>> s = Template('$who likes $what')
+>>> print s % dict(who='tim', what='kung pao')
+tim likes kung pao
+>>> Template('Give $who $100') % dict(who='tim')
+Traceback (most recent call last):
+[...]
+ValueError: Invalid placeholder at index 10
+\end{verbatim}
+
+There is also a \class{SafeTemplate} class, derived from \class{Template}
+which acts the same as \class{Template}, except that if placeholders are
+missing in the interpolation dictionary, no \exception{KeyError} will be
+raised. Instead the original placeholder (with or without the braces, as
+appropriate) will be used:
+
+\begin{verbatim}
+>>> from string import SafeTemplate
+>>> s = SafeTemplate('$who likes $what for ${meal}')
+>>> print s % dict(who='tim')
+tim likes $what for ${meal}
+\end{verbatim}
+
+The values in the mapping will automatically be converted to Unicode strings,
+using the built-in \function{unicode()} function, which will be called without
+optional arguments \var{encoding} or \var{errors}.
+
+Advanced usage: you can derive subclasses of \class{Template} or
+\class{SafeTemplate} to use application-specific placeholder rules. To do
+this, you override the class attribute \member{pattern}; the value must be a
+compiled regular expression object with four named capturing groups. The
+capturing groups correspond to the rules given above, along with the invalid
+placeholder rule:
+
+\begin{itemize}
+\item \var{escaped} -- This group matches the escape sequence, i.e. \samp{\$\$}
+ in the default pattern.
+\item \var{named} -- This group matches the unbraced placeholder name; it
+ should not include the \samp{\$} in capturing group.
+\item \var{braced} -- This group matches the brace delimited placeholder name;
+ it should not include either the \samp{\$} or braces in the capturing
+ group.
+\item \var{bogus} -- This group matches any other \samp{\$}. It usually just
+ matches a single \samp{\$} and should appear last.
+\end{itemize}
+
+\subsection{String functions}
+
+The following functions are available to operate on string and Unicode
+objects. They are not available as string methods.
+
+\begin{funcdesc}{capwords}{s}
+ Split the argument into words using \function{split()}, capitalize
+ each word using \function{capitalize()}, and join the capitalized
+ words using \function{join()}. Note that this replaces runs of
+ whitespace characters by a single space, and removes leading and
+ trailing whitespace.
+\end{funcdesc}
+
+\begin{funcdesc}{maketrans}{from, to}
+ Return a translation table suitable for passing to
+ \function{translate()} or \function{regex.compile()}, that will map
+ each character in \var{from} into the character at the same position
+ in \var{to}; \var{from} and \var{to} must have the same length.
+
+ \warning{Don't use strings derived from \constant{lowercase}
+ and \constant{uppercase} as arguments; in some locales, these don't have
+ the same length. For case conversions, always use
+ \function{lower()} and \function{upper()}.}
+\end{funcdesc}
-Many of the functions provided by this module are also defined as
-methods of string and Unicode objects; see ``String Methods'' (section
-\ref{string-methods}) for more information on those.
-The functions defined in this module are:
+\subsection{Deprecated string functions}
+
+The following list of functions are also defined as methods of string and
+Unicode objects; see ``String Methods'' (section
+\ref{string-methods}) for more information on those. You should consider
+these functions as deprecated, although they will not be removed until Python
+3.0. The functions defined in this module are:
\begin{funcdesc}{atof}{s}
\deprecated{2.0}{Use the \function{float()} built-in function.}
@@ -138,14 +252,6 @@ The functions defined in this module are:
Return a copy of \var{word} with only its first character capitalized.
\end{funcdesc}
-\begin{funcdesc}{capwords}{s}
- Split the argument into words using \function{split()}, capitalize
- each word using \function{capitalize()}, and join the capitalized
- words using \function{join()}. Note that this replaces runs of
- whitespace characters by a single space, and removes leading and
- trailing whitespace.
-\end{funcdesc}
-
\begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
Expand tabs in a string, i.e.\ replace them by one or more spaces,
depending on the current column and the given tab size. The column
@@ -188,18 +294,6 @@ The functions defined in this module are:
lower case.
\end{funcdesc}
-\begin{funcdesc}{maketrans}{from, to}
- Return a translation table suitable for passing to
- \function{translate()} or \function{regex.compile()}, that will map
- each character in \var{from} into the character at the same position
- in \var{to}; \var{from} and \var{to} must have the same length.
-
- \warning{Don't use strings derived from \constant{lowercase}
- and \constant{uppercase} as arguments; in some locales, these don't have
- the same length. For case conversions, always use
- \function{lower()} and \function{upper()}.}
-\end{funcdesc}
-
\begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
Return a list of the words of the string \var{s}. If the optional
second argument \var{sep} is absent or \code{None}, the words are
diff --git a/Lib/sre.py b/Lib/sre.py
index bb4bc16..8bf0fad 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -105,9 +105,6 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
__version__ = "2.2.1"
-# this module works under 1.5.2 and later. don't use string methods
-import string
-
# flags
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
@@ -201,7 +198,7 @@ def escape(pattern):
s[i] = "\\000"
else:
s[i] = "\\" + c
- return _join(s, pattern)
+ return pattern[:0].join(s)
# --------------------------------------------------------------------
# internals
@@ -213,10 +210,6 @@ _pattern_type = type(sre_compile.compile("", 0))
_MAXCACHE = 100
-def _join(seq, sep):
- # internal: join into string having the same type as sep
- return string.join(seq, sep[:0])
-
def _compile(*key):
# internal: compile pattern
cachekey = (type(key[0]),) + key
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 002b195..1863f48 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -217,12 +217,11 @@ SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
SRE_INFO_CHARSET = 4 # pattern starts with character from given set
if __name__ == "__main__":
- import string
def dump(f, d, prefix):
items = d.items()
items.sort(key=lambda a: a[1])
for k, v in items:
- f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+ f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
f = open("sre_constants.h", "w")
f.write("""\
/*
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 94d526d..5c4298a 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -12,8 +12,7 @@
# XXX: show string offset and offending character for all errors
-# this module works under 1.5.2 and later. don't use string methods
-import string, sys
+import sys
from sre_constants import *
@@ -63,13 +62,6 @@ FLAGS = {
"u": SRE_FLAG_UNICODE,
}
-# figure out best way to convert hex/octal numbers to integers
-try:
- int("10", 8)
- atoi = int # 2.0 and later
-except TypeError:
- atoi = string.atoi # 1.5.2
-
class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
@@ -233,7 +225,7 @@ def isname(name):
def _group(escape, groups):
# check if the escape string represents a valid group
try:
- gid = atoi(escape[1:])
+ gid = int(escape[1:])
if gid and gid < groups:
return gid
except ValueError:
@@ -256,13 +248,13 @@ def _class_escape(source, escape):
escape = escape[2:]
if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape)
- return LITERAL, atoi(escape, 16) & 0xff
+ return LITERAL, int(escape, 16) & 0xff
elif escape[1:2] in OCTDIGITS:
# octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) < 5:
escape = escape + source.get()
escape = escape[1:]
- return LITERAL, atoi(escape, 8) & 0xff
+ return LITERAL, int(escape, 8) & 0xff
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
@@ -284,12 +276,12 @@ def _escape(source, escape, state):
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
- return LITERAL, atoi(escape[2:], 16) & 0xff
+ return LITERAL, int(escape[2:], 16) & 0xff
elif escape[1:2] == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
- return LITERAL, atoi(escape[1:], 8) & 0xff
+ return LITERAL, int(escape[1:], 8) & 0xff
elif escape[1:2] in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
@@ -298,7 +290,7 @@ def _escape(source, escape, state):
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
- return LITERAL, atoi(escape[1:], 8) & 0xff
+ return LITERAL, int(escape[1:], 8) & 0xff
# got at least one decimal digit; this is a group reference
group = _group(escape, state.groups)
if group:
@@ -503,9 +495,9 @@ def _parse(source, state):
source.seek(here)
continue
if lo:
- min = atoi(lo)
+ min = int(lo)
if hi:
- max = atoi(hi)
+ max = int(hi)
if max < min:
raise error, "bad repeat interval"
else:
@@ -617,7 +609,7 @@ def _parse(source, state):
raise error, "unknown group name"
else:
try:
- condgroup = atoi(condname)
+ condgroup = int(condname)
except ValueError:
raise error, "bad character in group name"
else:
@@ -730,7 +722,7 @@ def parse_template(source, pattern):
if not name:
raise error, "bad group name"
try:
- index = atoi(name)
+ index = int(name)
except ValueError:
if not isname(name):
raise error, "bad character in group name"
@@ -754,7 +746,7 @@ def parse_template(source, pattern):
break
if not code:
this = this[1:]
- code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff)
+ code = LITERAL, makechar(int(this[-6:], 8) & 0xff)
if code[0] is LITERAL:
literal(code[1])
else:
@@ -793,4 +785,4 @@ def expand_template(template, match):
raise IndexError
except IndexError:
raise error, "empty group"
- return string.join(literals, sep)
+ return sep.join(literals)
diff --git a/Lib/string.py b/Lib/string.py
index bc10c20..d166f38 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -35,10 +35,116 @@ printable = digits + letters + punctuation + whitespace
# Case conversion helpers
# Use str to convert Unicode literal in case of -U
+# Note that Cookie.py bogusly uses _idmap :(
l = map(chr, xrange(256))
_idmap = str('').join(l)
del l
+# Functions which aren't available as string methods.
+
+# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
+# See also regsub.capwords().
+def capwords(s, sep=None):
+ """capwords(s, [sep]) -> string
+
+ Split the argument into words using split, capitalize each
+ word using capitalize, and join the capitalized words using
+ join. Note that this replaces runs of whitespace characters by
+ a single space.
+
+ """
+ return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
+
+
+# Construct a translation string
+_idmapL = None
+def maketrans(fromstr, tostr):
+ """maketrans(frm, to) -> string
+
+ Return a translation table (a string of 256 bytes long)
+ suitable for use in string.translate. The strings frm and to
+ must be of the same length.
+
+ """
+ if len(fromstr) != len(tostr):
+ raise ValueError, "maketrans arguments must have same length"
+ global _idmapL
+ if not _idmapL:
+ _idmapL = map(None, _idmap)
+ L = _idmapL[:]
+ fromstr = map(ord, fromstr)
+ for i in range(len(fromstr)):
+ L[fromstr[i]] = tostr[i]
+ return ''.join(L)
+
+
+
+import re as _re
+
+class Template(unicode):
+ """A string class for supporting $-substitutions."""
+ __slots__ = []
+
+ # Search for $$, $identifier, ${identifier}, and any bare $'s
+ pattern = _re.compile(r"""
+# Match exactly two $'s -- this is the escape sequence
+(?P<escaped>\${2})|
+# Match a $ followed by a Python identifier
+\$(?P<named>[_a-z][_a-z0-9]*)|
+# Match a $ followed by a brace delimited identifier
+\${(?P<braced>[_a-z][_a-z0-9]*)}|
+# Match any other $'s
+(?P<bogus>\$)
+""", _re.IGNORECASE | _re.VERBOSE)
+
+ def __mod__(self, mapping):
+ def convert(mo):
+ groups = mo.groupdict()
+ if groups.get('escaped') is not None:
+ return '$'
+ if groups.get('bogus') is not None:
+ raise ValueError('Invalid placeholder at index %d' %
+ mo.start('bogus'))
+ val = mapping[groups.get('named') or groups.get('braced')]
+ return unicode(val)
+ return self.pattern.sub(convert, self)
+
+
+class SafeTemplate(Template):
+ """A string class for supporting $-substitutions.
+
+ This class is 'safe' in the sense that you will never get KeyErrors if
+ there are placeholders missing from the interpolation dictionary. In that
+ case, you will get the original placeholder in the value string.
+ """
+ __slots__ = []
+
+ def __mod__(self, mapping):
+ def convert(mo):
+ groups = mo.groupdict()
+ if groups.get('escaped') is not None:
+ return '$'
+ if groups.get('bogus') is not None:
+ raise ValueError('Invalid placeholder at index %d' %
+ mo.start('bogus'))
+ named = groups.get('named')
+ if named is not None:
+ try:
+ return unicode(mapping[named])
+ except KeyError:
+ return '$' + named
+ braced = groups.get('braced')
+ try:
+ return unicode(mapping[braced])
+ except KeyError:
+ return '${' + braced + '}'
+ return self.pattern.sub(convert, self)
+
+
+
+# NOTE: Everything below here is deprecated. Use string methods instead.
+# This stuff will go away in Python 3.0.
+
# Backward compatible names for exceptions
index_error = ValueError
atoi_error = ValueError
@@ -336,40 +442,6 @@ def capitalize(s):
"""
return s.capitalize()
-# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
-# See also regsub.capwords().
-def capwords(s, sep=None):
- """capwords(s, [sep]) -> string
-
- Split the argument into words using split, capitalize each
- word using capitalize, and join the capitalized words using
- join. Note that this replaces runs of whitespace characters by
- a single space.
-
- """
- return join(map(capitalize, s.split(sep)), sep or ' ')
-
-# Construct a translation string
-_idmapL = None
-def maketrans(fromstr, tostr):
- """maketrans(frm, to) -> string
-
- Return a translation table (a string of 256 bytes long)
- suitable for use in string.translate. The strings frm and to
- must be of the same length.
-
- """
- if len(fromstr) != len(tostr):
- raise ValueError, "maketrans arguments must have same length"
- global _idmapL
- if not _idmapL:
- _idmapL = map(None, _idmap)
- L = _idmapL[:]
- fromstr = map(ord, fromstr)
- for i in range(len(fromstr)):
- L[fromstr[i]] = tostr[i]
- return join(L, "")
-
# Substring replacement (global)
def replace(s, old, new, maxsplit=-1):
"""replace (str, old, new[, maxsplit]) -> string
diff --git a/Lib/test/test_pep292.py b/Lib/test/test_pep292.py
new file mode 100644
index 0000000..7eff309
--- /dev/null
+++ b/Lib/test/test_pep292.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2004 Python Software Foundation
+# Author: barry@python.org (Barry Warsaw)
+# License: http://www.opensource.org/licenses/PythonSoftFoundation.php
+
+import unittest
+from string import Template, SafeTemplate
+
+class TestTemplate(unittest.TestCase):
+
+ def test_regular_templates(self):
+ s = Template('$who likes to eat a bag of $what worth $$100')
+ self.assertEqual(s % dict(who='tim', what='ham'),
+ 'tim likes to eat a bag of ham worth $100')
+ self.assertRaises(KeyError, lambda s, d: s % d, s, dict(who='tim'))
+
+ def test_regular_templates_with_braces(self):
+ s = Template('$who likes ${what} for ${meal}')
+ self.assertEqual(s % dict(who='tim', what='ham', meal='dinner'),
+ 'tim likes ham for dinner')
+ self.assertRaises(KeyError, lambda s, d: s % d,
+ s, dict(who='tim', what='ham'))
+
+ def test_escapes(self):
+ eq = self.assertEqual
+ s = Template('$who likes to eat a bag of $$what worth $$100')
+ eq(s % dict(who='tim', what='ham'),
+ 'tim likes to eat a bag of $what worth $100')
+ s = Template('$who likes $$')
+ eq(s % dict(who='tim', what='ham'), 'tim likes $')
+
+ def test_percents(self):
+ s = Template('%(foo)s $foo ${foo}')
+ self.assertEqual(s % dict(foo='baz'), '%(foo)s baz baz')
+ s = SafeTemplate('%(foo)s $foo ${foo}')
+ self.assertEqual(s % dict(foo='baz'), '%(foo)s baz baz')
+
+ def test_stringification(self):
+ s = Template('tim has eaten $count bags of ham today')
+ self.assertEqual(s % dict(count=7),
+ 'tim has eaten 7 bags of ham today')
+ s = SafeTemplate('tim has eaten $count bags of ham today')
+ self.assertEqual(s % dict(count=7),
+ 'tim has eaten 7 bags of ham today')
+ s = SafeTemplate('tim has eaten ${count} bags of ham today')
+ self.assertEqual(s % dict(count=7),
+ 'tim has eaten 7 bags of ham today')
+
+ def test_SafeTemplate(self):
+ eq = self.assertEqual
+ s = SafeTemplate('$who likes ${what} for ${meal}')
+ eq(s % dict(who='tim'),
+ 'tim likes ${what} for ${meal}')
+ eq(s % dict(what='ham'),
+ '$who likes ham for ${meal}')
+ eq(s % dict(what='ham', meal='dinner'),
+ '$who likes ham for dinner')
+ eq(s % dict(who='tim', what='ham'),
+ 'tim likes ham for ${meal}')
+ eq(s % dict(who='tim', what='ham', meal='dinner'),
+ 'tim likes ham for dinner')
+
+ def test_invalid_placeholders(self):
+ raises = self.assertRaises
+ s = Template('$who likes $')
+ raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+ s = Template('$who likes ${what)')
+ raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+ s = Template('$who likes $100')
+ raises(ValueError, lambda s, d: s % d, s, dict(who='tim'))
+
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(TestTemplate))
+ return suite
+
+
+def test_main():
+ from test import test_support
+ test_support.run_suite(suite())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 56c687f..be445c9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -57,6 +57,8 @@ Extension modules
Library
-------
+- PEP 292 classes Template and SafeTemplate are added to the string module.
+
- tarfile now generates GNU tar files by default.
- HTTPResponse has now a getheaders method.