summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Kuchling <amk@amk.ca>2013-06-16 17:02:55 (GMT)
committerAndrew Kuchling <amk@amk.ca>2013-06-16 17:02:55 (GMT)
commitf567727abcba9d3b56fd2d0254050eff23535d1a (patch)
tree479246982709288ca4e8da7cd7e7718b0c4bd1cc
parent39295e7a55d03b9ef31c0d0dd27d129b1ad5a695 (diff)
parentc7b6c50f29fac4971e7271ac649ee3b7ef3deac7 (diff)
downloadcpython-f567727abcba9d3b56fd2d0254050eff23535d1a.zip
cpython-f567727abcba9d3b56fd2d0254050eff23535d1a.tar.gz
cpython-f567727abcba9d3b56fd2d0254050eff23535d1a.tar.bz2
Merge with 3.3
-rw-r--r--Doc/library/codecs.rst6
-rw-r--r--Doc/library/functions.rst40
-rw-r--r--Lib/codecs.py1
-rw-r--r--Modules/_io/_iomodule.c4
-rw-r--r--Modules/_io/textio.c5
5 files changed, 41 insertions, 15 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 0d38253..e80fc3a 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -78,7 +78,11 @@ It defines the following functions:
reference (for encoding only)
* ``'backslashreplace'``: replace with backslashed escape sequences (for
encoding only)
- * ``'surrogateescape'``: replace with surrogate U+DCxx, see :pep:`383`
+ * ``'surrogateescape'``: on decoding, replace with code points in the Unicode
+ Private Use Area ranging from U+DC80 to U+DCFF. These private code
+ points will then be turned back into the same bytes when the
+ ``surrogateescape`` error handler is used when encoding the data.
+ (See :pep:`383` for more.)
as well as any other error handling name defined via :func:`register_error`.
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 65b591b..43164af 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -905,16 +905,36 @@ are always available. They are listed here in alphabetical order.
the list of supported encodings.
*errors* is an optional string that specifies how encoding and decoding
- errors are to be handled--this cannot be used in binary mode. Pass
- ``'strict'`` to raise a :exc:`ValueError` exception if there is an encoding
- error (the default of ``None`` has the same effect), or pass ``'ignore'`` to
- ignore errors. (Note that ignoring encoding errors can lead to data loss.)
- ``'replace'`` causes a replacement marker (such as ``'?'``) to be inserted
- where there is malformed data. When writing, ``'xmlcharrefreplace'``
- (replace with the appropriate XML character reference) or
- ``'backslashreplace'`` (replace with backslashed escape sequences) can be
- used. Any other error handling name that has been registered with
- :func:`codecs.register_error` is also valid.
+ errors are to be handled--this cannot be used in binary mode.
+ A variety of standard error handlers are available, though any
+ error handling name that has been registered with
+ :func:`codecs.register_error` is also valid. The standard names
+ are:
+
+ * ``'strict'`` to raise a :exc:`ValueError` exception if there is
+ an encoding error. The default value of ``None`` has the same
+ effect.
+
+ * ``'ignore'`` ignores errors. Note that ignoring encoding errors
+ can lead to data loss.
+
+ * ``'replace'`` causes a replacement marker (such as ``'?'``) to be inserted
+ where there is malformed data.
+
+ * ``'surrogateescape'`` will represent any incorrect bytes as code
+ points in the Unicode Private Use Area ranging from U+DC80 to
+ U+DCFF. These private code points will then be turned back into
+ the same bytes when the ``surrogateescape`` error handler is used
+ when writing data. This is useful for processing files in an
+ unknown encoding.
+
+ * ``'xmlcharrefreplace'`` is only supported when writing to a file.
+ Characters not supported by the encoding are replaced with the
+ appropriate XML character reference ``&#nnn;``.
+
+ * ``'backslashreplace'`` (also only supported when writing)
+ replaces unsupported characters with Python's backslashed escape
+ sequences.
.. index::
single: universal newlines; open() built-in function
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 48d4c9c..6a6eb90 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -105,6 +105,7 @@ class Codec:
Python will use the official U+FFFD REPLACEMENT
CHARACTER for the builtin Unicode codecs on
decoding and '?' on encoding.
+ 'surrogateescape' - replace with private codepoints U+DCnn.
'xmlcharrefreplace' - Replace with the appropriate XML
character reference (only for encoding).
'backslashreplace' - Replace with backslashed escape sequences
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index b5cd176..4a7e758 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -168,8 +168,8 @@ PyDoc_STRVAR(open_doc,
"'strict' to raise a ValueError exception if there is an encoding error\n"
"(the default of None has the same effect), or pass 'ignore' to ignore\n"
"errors. (Note that ignoring encoding errors can lead to data loss.)\n"
-"See the documentation for codecs.register for a list of the permitted\n"
-"encoding error strings.\n"
+"See the documentation for codecs.register or run 'help(codecs.Codec)'\n"
+"for a list of the permitted encoding error strings.\n"
"\n"
"newline controls how universal newlines works (it only applies to text\n"
"mode). It can be None, '', '\\n', '\\r', and '\\r\\n'. It works as\n"
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 8bd9ba1..4d0009d 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -642,8 +642,9 @@ PyDoc_STRVAR(textiowrapper_doc,
"encoding gives the name of the encoding that the stream will be\n"
"decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
"\n"
- "errors determines the strictness of encoding and decoding (see the\n"
- "codecs.register) and defaults to \"strict\".\n"
+ "errors determines the strictness of encoding and decoding (see\n"
+ "help(codecs.Codec) or the documentation for codecs.register) and\n"
+ "defaults to \"strict\".\n"
"\n"
"newline controls how line endings are handled. It can be None, '',\n"
"'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
tx'> if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5diff/testh5diff.sh.in b/tools/h5diff/testh5diff.sh.in
index 0dfdaa2..0df9cb8 100644
--- a/tools/h5diff/testh5diff.sh.in
+++ b/tools/h5diff/testh5diff.sh.in
@@ -374,7 +374,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5DIFF_TESTFILES
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5dump/testh5dump.sh.in b/tools/h5dump/testh5dump.sh.in
index ef23d67..9108f44 100644
--- a/tools/h5dump/testh5dump.sh.in
+++ b/tools/h5dump/testh5dump.sh.in
@@ -423,7 +423,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5DUMP_TESTFILES
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5dump/testh5dumppbits.sh.in b/tools/h5dump/testh5dumppbits.sh.in
index 4211c63..446020a 100644
--- a/tools/h5dump/testh5dumppbits.sh.in
+++ b/tools/h5dump/testh5dumppbits.sh.in
@@ -190,7 +190,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5DUMP_TESTFILES/pbits
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5dump/testh5dumpvds.sh.in b/tools/h5dump/testh5dumpvds.sh.in
index 850c03a..16411f5 100644
--- a/tools/h5dump/testh5dumpvds.sh.in
+++ b/tools/h5dump/testh5dumpvds.sh.in
@@ -169,7 +169,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5DUMP_TESTFILES/vds
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5dump/testh5dumpxml.sh.in b/tools/h5dump/testh5dumpxml.sh.in
index 1efde85..33a67c0 100644
--- a/tools/h5dump/testh5dumpxml.sh.in
+++ b/tools/h5dump/testh5dumpxml.sh.in
@@ -221,7 +221,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5DUMP_TESTFILES
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5import/h5importtestutil.sh.in b/tools/h5import/h5importtestutil.sh.in
index f2694a0..08e0c3f 100644
--- a/tools/h5import/h5importtestutil.sh.in
+++ b/tools/h5import/h5importtestutil.sh.in
@@ -161,7 +161,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5IMPORT_TESTFILES
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5jam/testh5jam.sh.in b/tools/h5jam/testh5jam.sh.in
index 1664d6b..d8c9274 100644
--- a/tools/h5jam/testh5jam.sh.in
+++ b/tools/h5jam/testh5jam.sh.in
@@ -126,7 +126,7 @@ CLEAN_TESTFILES_AND_TESTDIR()
# skip rm if srcdir is same as destdir
# this occurs when build/test performed in source dir and
# make cp fail
- SDIR=`$DIRNAME $tstfile`
+ SDIR=$SRC_H5JAM_TESTFILES
INODE_SDIR=`$LS -i -d $SDIR | $AWK -F' ' '{print $1}'`
INODE_DDIR=`$LS -i -d $TESTDIR | $AWK -F' ' '{print $1}'`
if [ "$INODE_SDIR" != "$INODE_DDIR" ]; then
diff --git a/tools/h5ls/testh5ls.sh.in b/tools/h5ls/testh5ls.sh.in